From 3b157704372b57e5d6b83eae6ae3451461af2851 Mon Sep 17 00:00:00 2001 From: AI-anonymous Date: Wed, 20 May 2026 00:18:23 +0200 Subject: [PATCH] style: run clang-format and configure pre-commit hooks --- .pre-commit-config.yaml | 51 ++ benchmarks/bench_evolve.cpp | 54 +- benchmarks/bench_step.cpp | 31 +- examples/pytorch_integration.cpp | 74 +- examples/simple_optimization.cpp | 39 +- examples/telemetry_and_inference.cpp | 184 ++--- include/fces/config.hpp | 101 +-- include/fces/controller.hpp | 150 ++-- include/fces/evolution.hpp | 39 +- include/fces/fitness.hpp | 155 ++-- include/fces/optimizer.hpp | 112 +-- include/fces/oscillation.hpp | 27 +- include/fces/population.hpp | 194 +++-- include/fces/spectral.hpp | 28 +- include/fces/telemetry.hpp | 14 +- python/fces_native.cpp | 55 +- src/controller.cpp | 573 +++++++------- src/evolution.cpp | 71 +- src/fitness.cpp | 90 +-- src/optimizer.cpp | 769 ++++++++++--------- src/oscillation.cpp | 144 ++-- src/population.cpp | 1059 ++++++++++++++------------ src/spectral.cpp | 53 +- src/telemetry.cpp | 39 +- tests/test_controller.cpp | 67 +- tests/test_fitness.cpp | 37 +- tests/test_optimizer.cpp | 51 +- tests/test_population.cpp | 26 +- 28 files changed, 2226 insertions(+), 2061 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..899df2d --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,51 @@ +# Pre-commit configuration for FCES-native +repos: + # 1. Standard hooks for general file cleanliness + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + + # 2. C++ Formatting using clang-format (fetched dynamically) + - repo: https://github.com/pre-commit/mirrors-clang-format + rev: v18.1.5 + hooks: + - id: clang-format + types_or: [c++, c] + + # 3. C++ Static Analysis using local cppcheck + - repo: local + hooks: + - id: cppcheck + name: cppcheck + entry: cppcheck + language: system + types_or: [c++, c] + args: [ + "--enable=warning,portability", + "--suppress=missingIncludeSystem", + "--suppress=unusedFunction", + "--suppress=normalCheckLevelMaxBranches", + "--inline-suppr", + "--error-exitcode=1", + "-Iinclude" + ] + + # 4. Python Linter and Formatter (ruff) + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.4.4 + hooks: + - id: ruff + args: [ --fix ] + - id: ruff-format + + # 5. Python Type Checking (mypy) + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.10.0 + hooks: + - id: mypy + args: [ "--ignore-missing-imports", "--strict" ] + additional_dependencies: [ "types-requests", "pydantic" ] diff --git a/benchmarks/bench_evolve.cpp b/benchmarks/bench_evolve.cpp index b69b71b..dff0f0b 100644 --- a/benchmarks/bench_evolve.cpp +++ b/benchmarks/bench_evolve.cpp @@ -1,45 +1,47 @@ -#include -#include "fces/population.hpp" #include "fces/controller.hpp" +#include "fces/population.hpp" +#include using namespace fces; -static void BM_ControllerDecideUpdate(benchmark::State& state) { - FuzzyController ctrl; - std::vector> stats(state.range(0), {0.1f, 0.2f, 0.3f, 0.4f, 0.5f}); +static void BM_ControllerDecideUpdate(benchmark::State &state) { + FuzzyController ctrl; + std::vector> stats(state.range(0), + {0.1f, 0.2f, 0.3f, 0.4f, 0.5f}); - for (auto _ : state) { - auto actions = ctrl.decide_update(stats, 0.0f, 0.5f, 0.0f, 0.1f, 0.0f, 0.0f, 1.0f, 0.0f); - benchmark::DoNotOptimize(actions); - } + for (auto _ : state) { + auto actions = ctrl.decide_update(stats, 0.0f, 0.5f, 0.0f, 0.1f, 0.0f, 0.0f, + 1.0f, 0.0f); + benchmark::DoNotOptimize(actions); + } } BENCHMARK(BM_ControllerDecideUpdate)->Arg(10)->Arg(50)->Arg(200); -static void BM_Evolve(benchmark::State& state) { - Population pop(state.range(0)); +static void BM_Evolve(benchmark::State &state) { + Population pop(state.range(0)); - for (auto _ : state) { - pop.evolve(2.0f, -0.01f, 0.5f); - } + for (auto _ : state) { + pop.evolve(2.0f, -0.01f, 0.5f); + } } BENCHMARK(BM_Evolve)->Arg(50)->Arg(100)->Arg(200); -static void BM_Mutation(benchmark::State& state) { - FuzzyController ctrl; +static void BM_Mutation(benchmark::State &state) { + FuzzyController ctrl; - for (auto _ : state) { - auto child = ctrl.mutate(2.0f, 1.0f); - benchmark::DoNotOptimize(child); - } + for (auto _ : state) { + auto child = ctrl.mutate(2.0f, 1.0f); + benchmark::DoNotOptimize(child); + } } BENCHMARK(BM_Mutation); -static void BM_Crossover(benchmark::State& state) { - FuzzyController a, b; +static void BM_Crossover(benchmark::State &state) { + FuzzyController a, b; - for (auto _ : state) { - auto child = a.crossover(b); - benchmark::DoNotOptimize(child); - } + for (auto _ : state) { + auto child = a.crossover(b); + benchmark::DoNotOptimize(child); + } } BENCHMARK(BM_Crossover); diff --git a/benchmarks/bench_step.cpp b/benchmarks/bench_step.cpp index 546b9c1..2e4c817 100644 --- a/benchmarks/bench_step.cpp +++ b/benchmarks/bench_step.cpp @@ -1,25 +1,26 @@ +#include "fces/optimizer.hpp" #include #include -#include "fces/optimizer.hpp" using namespace fces; -static void BM_OptimizerStep(benchmark::State& state) { - auto model = torch::nn::Linear(state.range(0), state.range(0) / 2); - std::vector params; - for (auto& p : model->parameters()) params.push_back(p); +static void BM_OptimizerStep(benchmark::State &state) { + auto model = torch::nn::Linear(state.range(0), state.range(0) / 2); + std::vector params; + for (auto &p : model->parameters()) + params.push_back(p); - FCESOptimizer opt(params, FCESConfig{}.set_lr(1e-3f)); + FCESOptimizer opt(params, FCESConfig{}.set_lr(1e-3f)); - auto x = torch::randn({8, state.range(0)}); + auto x = torch::randn({8, state.range(0)}); - for (auto _ : state) { - auto y = model->forward(x); - auto loss = y.sum(); - loss.backward(); - opt.step(); - opt.zero_grad(); - benchmark::DoNotOptimize(loss); - } + for (auto _ : state) { + auto y = model->forward(x); + auto loss = y.sum(); + loss.backward(); + opt.step(); + opt.zero_grad(); + benchmark::DoNotOptimize(loss); + } } BENCHMARK(BM_OptimizerStep)->Arg(64)->Arg(256)->Arg(1024); diff --git a/examples/pytorch_integration.cpp b/examples/pytorch_integration.cpp index ea99f3d..b474121 100644 --- a/examples/pytorch_integration.cpp +++ b/examples/pytorch_integration.cpp @@ -3,59 +3,57 @@ * @brief Example: train a small neural network with FCES via libtorch. */ +#include "fces/optimizer.hpp" #include #include -#include "fces/optimizer.hpp" struct TinyNet : torch::nn::Module { - torch::nn::Linear fc1{nullptr}, fc2{nullptr}; + torch::nn::Linear fc1{nullptr}, fc2{nullptr}; - TinyNet() { - fc1 = register_module("fc1", torch::nn::Linear(10, 32)); - fc2 = register_module("fc2", torch::nn::Linear(32, 1)); - } + TinyNet() { + fc1 = register_module("fc1", torch::nn::Linear(10, 32)); + fc2 = register_module("fc2", torch::nn::Linear(32, 1)); + } - torch::Tensor forward(torch::Tensor x) { - x = torch::relu(fc1->forward(x)); - return fc2->forward(x); - } + torch::Tensor forward(torch::Tensor x) { + x = torch::relu(fc1->forward(x)); + return fc2->forward(x); + } }; int main() { - auto model = std::make_shared(); + auto model = std::make_shared(); - std::vector params; - for (auto& p : model->parameters()) params.push_back(p); + std::vector params; + for (auto &p : model->parameters()) + params.push_back(p); - fces::FCESOptimizer optimizer( - params, - fces::FCESConfig{} - .set_lr(1.6e-3f) - .set_population_size(200) - .set_total_steps(1000) - ); + fces::FCESOptimizer optimizer(params, fces::FCESConfig{} + .set_lr(1.6e-3f) + .set_population_size(200) + .set_total_steps(1000)); - // Generate synthetic regression data - auto x_train = torch::randn({100, 10}); - auto y_train = torch::sin(x_train.sum(1, true)); + // Generate synthetic regression data + auto x_train = torch::randn({100, 10}); + auto y_train = torch::sin(x_train.sum(1, true)); - for (int epoch = 0; epoch < 100; ++epoch) { - optimizer.zero_grad(); - auto pred = model->forward(x_train); - auto loss = torch::mse_loss(pred, y_train); - loss.backward(); - optimizer.step(); - optimizer.update_fitness(loss.item()); + for (int epoch = 0; epoch < 100; ++epoch) { + optimizer.zero_grad(); + auto pred = model->forward(x_train); + auto loss = torch::mse_loss(pred, y_train); + loss.backward(); + optimizer.step(); + optimizer.update_fitness(loss.item()); - if (epoch % 10 == 0) { - std::cout << "Epoch " << epoch - << " | Loss: " << loss.item() << std::endl; - } + if (epoch % 10 == 0) { + std::cout << "Epoch " << epoch << " | Loss: " << loss.item() + << std::endl; } + } - std::cout << "\nTraining complete. Final loss: " - << torch::mse_loss(model->forward(x_train), y_train).item() - << std::endl; + std::cout << "\nTraining complete. Final loss: " + << torch::mse_loss(model->forward(x_train), y_train).item() + << std::endl; - return 0; + return 0; } diff --git a/examples/simple_optimization.cpp b/examples/simple_optimization.cpp index 986bae4..cafe66d 100644 --- a/examples/simple_optimization.cpp +++ b/examples/simple_optimization.cpp @@ -3,34 +3,33 @@ * @brief Minimal example: optimize a quadratic function with FCES. */ +#include "fces/optimizer.hpp" #include #include -#include "fces/optimizer.hpp" int main() { - // Target: minimize f(x) = ||x - target||^2 - auto target = torch::tensor({1.0f, 2.0f, 3.0f, 4.0f, 5.0f}); - auto x = torch::randn({5}, torch::requires_grad()); + // Target: minimize f(x) = ||x - target||^2 + auto target = torch::tensor({1.0f, 2.0f, 3.0f, 4.0f, 5.0f}); + auto x = torch::randn({5}, torch::requires_grad()); - std::vector params = {x}; - fces::FCESOptimizer optimizer(params, fces::FCESConfig{}.set_lr(1e-2f)); + std::vector params = {x}; + fces::FCESOptimizer optimizer(params, fces::FCESConfig{}.set_lr(1e-2f)); - for (int step = 0; step < 500; ++step) { - optimizer.zero_grad(); - auto loss = (x - target).pow(2).sum(); - loss.backward(); - optimizer.step(); - optimizer.update_fitness(loss.item()); + for (int step = 0; step < 500; ++step) { + optimizer.zero_grad(); + auto loss = (x - target).pow(2).sum(); + loss.backward(); + optimizer.step(); + optimizer.update_fitness(loss.item()); - if (step % 50 == 0) { - std::cout << "Step " << step - << " | Loss: " << loss.item() - << " | x: " << x << std::endl; - } + if (step % 50 == 0) { + std::cout << "Step " << step << " | Loss: " << loss.item() + << " | x: " << x << std::endl; } + } - std::cout << "\nFinal x: " << x << std::endl; - std::cout << "Target: " << target << std::endl; + std::cout << "\nFinal x: " << x << std::endl; + std::cout << "Target: " << target << std::endl; - return 0; + return 0; } diff --git a/examples/telemetry_and_inference.cpp b/examples/telemetry_and_inference.cpp index d7d6c0c..4357ec7 100644 --- a/examples/telemetry_and_inference.cpp +++ b/examples/telemetry_and_inference.cpp @@ -3,112 +3,124 @@ * @brief Example showcasing telemetry instrumentation and model inference. */ -#include -#include -#include #include "fces/optimizer.hpp" #include "fces/telemetry.hpp" +#include +#include +#include // Define a simple neural network for nonlinear regression: y = x^2 struct RegressionNet : torch::nn::Module { - torch::nn::Linear fc1{nullptr}, fc2{nullptr}; + torch::nn::Linear fc1{nullptr}, fc2{nullptr}; - RegressionNet() { - fc1 = register_module("fc1", torch::nn::Linear(1, 16)); - fc2 = register_module("fc2", torch::nn::Linear(16, 1)); - } + RegressionNet() { + fc1 = register_module("fc1", torch::nn::Linear(1, 16)); + fc2 = register_module("fc2", torch::nn::Linear(16, 1)); + } - torch::Tensor forward(torch::Tensor x) { - x = torch::tanh(fc1->forward(x)); - return fc2->forward(x); - } + torch::Tensor forward(torch::Tensor x) { + x = torch::tanh(fc1->forward(x)); + return fc2->forward(x); + } }; int main() { - fces::Telemetry::get().info("app_start", "Telemetry and Inference demo initialized."); + fces::Telemetry::get().info("app_start", + "Telemetry and Inference demo initialized."); - // 1. Create Model and Data - auto model = std::make_shared(); - - // Generate training data: x in [-2, 2], y = x^2 + noise - auto x_train = torch::linspace(-2.0, 2.0, 100).unsqueeze(1); - auto y_train = x_train.pow(2) + 0.1 * torch::randn({100, 1}); + // 1. Create Model and Data + auto model = std::make_shared(); - // 2. Configure Optimizer - std::vector params; - for (auto& p : model->parameters()) { - params.push_back(p); + // Generate training data: x in [-2, 2], y = x^2 + noise + auto x_train = torch::linspace(-2.0, 2.0, 100).unsqueeze(1); + auto y_train = x_train.pow(2) + 0.1 * torch::randn({100, 1}); + + // 2. Configure Optimizer + std::vector params; + for (auto &p : model->parameters()) { + params.push_back(p); + } + + fces::FCESOptimizer optimizer( + params, + fces::FCESConfig{}.set_lr(2e-3f).set_population_size(150).set_total_steps( + 100)); + + fces::Telemetry::get().info("training_start", + "Beginning neural net optimization with FCES."); + + auto start_train = std::chrono::high_resolution_clock::now(); + + // 3. Optimization Loop + for (int epoch = 0; epoch <= 100; ++epoch) { + optimizer.zero_grad(); + auto pred = model->forward(x_train); + auto loss = torch::mse_loss(pred, y_train); + loss.backward(); + optimizer.step(); + optimizer.update_fitness(loss.item()); + + if (epoch % 20 == 0) { + fces::Telemetry::get().info( + "epoch_checkpoint", "Epoch " + std::to_string(epoch) + " | Loss: " + + std::to_string(loss.item())); } + } - fces::FCESOptimizer optimizer( - params, - fces::FCESConfig{} - .set_lr(2e-3f) - .set_population_size(150) - .set_total_steps(100) - ); + auto end_train = std::chrono::high_resolution_clock::now(); + double train_duration = + std::chrono::duration(end_train - start_train) + .count(); - fces::Telemetry::get().info("training_start", "Beginning neural net optimization with FCES."); + fces::Telemetry::get().info("training_complete", + "Duration: " + std::to_string(train_duration) + + " ms"); - auto start_train = std::chrono::high_resolution_clock::now(); + // 4. Inference Phase + fces::Telemetry::get().info("inference_phase_start", + "Evaluating model on new test inputs."); - // 3. Optimization Loop - for (int epoch = 0; epoch <= 100; ++epoch) { - optimizer.zero_grad(); - auto pred = model->forward(x_train); - auto loss = torch::mse_loss(pred, y_train); - loss.backward(); - optimizer.step(); - optimizer.update_fitness(loss.item()); + // Generate test inputs + auto x_test = torch::tensor({-1.5f, -0.5f, 0.0f, 0.5f, 1.5f}).unsqueeze(1); + auto y_expected = x_test.pow(2); - if (epoch % 20 == 0) { - fces::Telemetry::get().info("epoch_checkpoint", - "Epoch " + std::to_string(epoch) + " | Loss: " + std::to_string(loss.item())); - } - } + // Switch model to evaluation mode + model->eval(); - auto end_train = std::chrono::high_resolution_clock::now(); - double train_duration = std::chrono::duration(end_train - start_train).count(); + // Run inference and measure latency + auto start_inf = std::chrono::high_resolution_clock::now(); + torch::Tensor y_pred; + { + torch::NoGradGuard no_grad; + y_pred = model->forward(x_test); + } + auto end_inf = std::chrono::high_resolution_clock::now(); + double inf_duration = + std::chrono::duration(end_inf - start_inf).count(); - fces::Telemetry::get().info("training_complete", - "Duration: " + std::to_string(train_duration) + " ms"); + // Log telemetry for inference performance + fces::Telemetry::get().info( + "inference_perf", "Inputs: " + std::to_string(x_test.size(0)) + + " | Latency: " + std::to_string(inf_duration) + + " ms"); - // 4. Inference Phase - fces::Telemetry::get().info("inference_phase_start", "Evaluating model on new test inputs."); + // Print predictions and expected values side-by-side + std::cout << "\n================ INFERENCE RESULTS ================" + << std::endl; + std::cout << "Input (x) | Predicted (y_pred) | Expected (y_expected)" + << std::endl; + std::cout << "----------------------------------------------------" + << std::endl; + for (int i = 0; i < x_test.size(0); ++i) { + float x_val = x_test[i][0].item(); + float pred_val = y_pred[i][0].item(); + float exp_val = y_expected[i][0].item(); + std::printf(" %7.2f | %7.4f | %7.4f\n", x_val, + pred_val, exp_val); + } + std::cout << "====================================================\n" + << std::endl; - // Generate test inputs - auto x_test = torch::tensor({-1.5f, -0.5f, 0.0f, 0.5f, 1.5f}).unsqueeze(1); - auto y_expected = x_test.pow(2); - - // Switch model to evaluation mode - model->eval(); - - // Run inference and measure latency - auto start_inf = std::chrono::high_resolution_clock::now(); - torch::Tensor y_pred; - { - torch::NoGradGuard no_grad; - y_pred = model->forward(x_test); - } - auto end_inf = std::chrono::high_resolution_clock::now(); - double inf_duration = std::chrono::duration(end_inf - start_inf).count(); - - // Log telemetry for inference performance - fces::Telemetry::get().info("inference_perf", - "Inputs: " + std::to_string(x_test.size(0)) + " | Latency: " + std::to_string(inf_duration) + " ms"); - - // Print predictions and expected values side-by-side - std::cout << "\n================ INFERENCE RESULTS ================" << std::endl; - std::cout << "Input (x) | Predicted (y_pred) | Expected (y_expected)" << std::endl; - std::cout << "----------------------------------------------------" << std::endl; - for (int i = 0; i < x_test.size(0); ++i) { - float x_val = x_test[i][0].item(); - float pred_val = y_pred[i][0].item(); - float exp_val = y_expected[i][0].item(); - std::printf(" %7.2f | %7.4f | %7.4f\n", x_val, pred_val, exp_val); - } - std::cout << "====================================================\n" << std::endl; - - fces::Telemetry::get().info("app_finish", "Exiting demo successfully."); - return 0; + fces::Telemetry::get().info("app_finish", "Exiting demo successfully."); + return 0; } diff --git a/include/fces/config.hpp b/include/fces/config.hpp index a460a92..ecfe45b 100644 --- a/include/fces/config.hpp +++ b/include/fces/config.hpp @@ -18,65 +18,80 @@ namespace fces { * All fields have sensible defaults matching the Python V49.0 implementation. */ struct FCESConfig { - // Learning rate (V49 optimal default) - float lr = 1.6e-3f; + // Learning rate (V49 optimal default) + float lr = 1.6e-3f; - // Weight decay coefficient - float weight_decay = 0.0f; + // Weight decay coefficient + float weight_decay = 0.0f; - // Population size for evolutionary search - int population_size = 200; + // Population size for evolutionary search + int population_size = 200; - // Total training steps (for progress-aware scheduling) - int total_steps = 5000; + // Total training steps (for progress-aware scheduling) + int total_steps = 5000; - // Signal mode for loss velocity calculation - std::string signal_mode = "relative"; + // Signal mode for loss velocity calculation + std::string signal_mode = "relative"; - // Grokking awareness coefficient (0.0 = disabled) - float grokking_coefficient = 0.1f; + // Grokking awareness coefficient (0.0 = disabled) + float grokking_coefficient = 0.1f; - // Spectral sensing frequency (every N steps) - int spectral_frequency = 10; + // Spectral sensing frequency (every N steps) + int spectral_frequency = 10; - // Curriculum Spectral Regularization - bool csr_enabled = false; - int csr_warmup_steps = 500; - int csr_ramp_steps = 1000; + // Curriculum Spectral Regularization + bool csr_enabled = false; + int csr_warmup_steps = 500; + int csr_ramp_steps = 1000; - // Trust region clipping - float trust_region_clip = 0.01f; + // Trust region clipping + float trust_region_clip = 0.01f; - // Rollback threshold - float rollback_threshold = 1.5f; + // Rollback threshold + float rollback_threshold = 1.5f; - // Adaptive weight decay - bool adaptive_wd = false; + // Adaptive weight decay + bool adaptive_wd = false; - // Parasitic mode (gradient alignment reward) - bool parasitic_mode = false; + // Parasitic mode (gradient alignment reward) + bool parasitic_mode = false; - // Ablation mode: "", "force_sign", "force_grad" - std::string ablation_mode = ""; + // Ablation mode: "", "force_sign", "force_grad" + std::string ablation_mode = ""; - // Fractional factorial scoring (CRO trick) - bool use_fractional_scoring = false; + // Fractional factorial scoring (CRO trick) + bool use_fractional_scoring = false; - // Direct construction mode (pop_size=1) - bool direct_construction = false; + // Direct construction mode (pop_size=1) + bool direct_construction = false; - // Banach-Tarski fission - bool use_banach_fission = false; + // Banach-Tarski fission + bool use_banach_fission = false; - // Auto-population (stabilize on divergence) - bool auto_population = false; + // Auto-population (stabilize on divergence) + bool auto_population = false; - // Builder pattern - FCESConfig& set_lr(float v) { lr = v; return *this; } - FCESConfig& set_population_size(int v) { population_size = v; return *this; } - FCESConfig& set_total_steps(int v) { total_steps = v; return *this; } - FCESConfig& set_grokking_coefficient(float v) { grokking_coefficient = v; return *this; } - FCESConfig& set_direct_construction(bool v) { direct_construction = v; return *this; } + // Builder pattern + FCESConfig &set_lr(float v) { + lr = v; + return *this; + } + FCESConfig &set_population_size(int v) { + population_size = v; + return *this; + } + FCESConfig &set_total_steps(int v) { + total_steps = v; + return *this; + } + FCESConfig &set_grokking_coefficient(float v) { + grokking_coefficient = v; + return *this; + } + FCESConfig &set_direct_construction(bool v) { + direct_construction = v; + return *this; + } }; -} // namespace fces +} // namespace fces diff --git a/include/fces/controller.hpp b/include/fces/controller.hpp index e696d3c..57da5e6 100644 --- a/include/fces/controller.hpp +++ b/include/fces/controller.hpp @@ -27,28 +27,29 @@ constexpr int GENOME_INPUT_DIM = 14; constexpr int GENOME_HIDDEN_DIM = 8; // Controller output dimension: [multiplier, sign_gate, wd_mult] constexpr int GENOME_OUTPUT_DIM = 3; -// Total genome size: input->hidden weights + hidden biases + hidden->output weights + output biases +// Total genome size: input->hidden weights + hidden biases + hidden->output +// weights + output biases constexpr int GENOME_SIZE = - (GENOME_INPUT_DIM * GENOME_HIDDEN_DIM) + // input -> hidden weights - GENOME_HIDDEN_DIM + // hidden biases - (GENOME_HIDDEN_DIM * GENOME_OUTPUT_DIM) + // hidden -> output weights - GENOME_OUTPUT_DIM; // output biases + (GENOME_INPUT_DIM * GENOME_HIDDEN_DIM) + // input -> hidden weights + GENOME_HIDDEN_DIM + // hidden biases + (GENOME_HIDDEN_DIM * GENOME_OUTPUT_DIM) + // hidden -> output weights + GENOME_OUTPUT_DIM; // output biases /** * Genome — the "DNA" of a fuzzy controller. * A flat array of floats encoding a micro-MLP. */ struct Genome { - std::array weights{}; - std::array gene_success{}; - float sigma_gene = 0.1f; - float plasticity = 1.0f; + std::array weights{}; + std::array gene_success{}; + float sigma_gene = 0.1f; + float plasticity = 1.0f; - /// Initialize with random weights from a normal distribution - void randomize(std::mt19937& rng); + /// Initialize with random weights from a normal distribution + void randomize(std::mt19937 &rng); - /// Deep copy - Genome clone() const; + /// Deep copy + Genome clone() const; }; /** @@ -62,84 +63,81 @@ struct Genome { */ class FuzzyController { public: - /// Unique identifier - uint64_t id; + /// Unique identifier + uint64_t id; - /// The neural genome - Genome genome; + /// The neural genome + Genome genome; - /// Fitness scores - float fitness = 0.0f; - float lifetime_fitness = 0.0f; - float ema_fitness = 0.0f; - int evaluation_count = 0; - int age = 0; + /// Fitness scores + float fitness = 0.0f; + float lifetime_fitness = 0.0f; + float ema_fitness = 0.0f; + int evaluation_count = 0; + int age = 0; - /// Origin tracking - std::string origin = "random"; + /// Origin tracking + std::string origin = "random"; - /// Trust region violation counter - int trust_violations = 0; + /// Trust region violation counter + int trust_violations = 0; - /// Rolling fitness history (for Phase 23 strategies) - std::vector fitness_history; + /// Rolling fitness history (for Phase 23 strategies) + std::vector fitness_history; - // --------------------------------------------------------------- - // Construction - // --------------------------------------------------------------- + // --------------------------------------------------------------- + // Construction + // --------------------------------------------------------------- - FuzzyController(); - explicit FuzzyController(Genome genome); + FuzzyController(); + explicit FuzzyController(Genome genome); - // --------------------------------------------------------------- - // Core Operations - // --------------------------------------------------------------- + // --------------------------------------------------------------- + // Core Operations + // --------------------------------------------------------------- - /** - * Forward pass through the micro-MLP to produce update decisions. - * - * @param layer_stats Vector of per-layer feature maps - * @param loss_trend Current loss velocity - * @param step_pct Training progress [0, 1] - * @param rollback_rate Rolling average rollback frequency - * @param grad_stability Gradient coefficient of variation - * @param spectral_alpha Log spectral rank - * @param stagnation_intensity Stagnation counter / 500 - * @param kzm_damping Kibble-Zurek damping factor - * @param projected_drift Projected loss drift - * @return Tensor of shape [num_groups, 3] — (mult, sign_gate, wd_mult) - */ - torch::Tensor decide_update( - const std::vector>& layer_stats, - float loss_trend, - float step_pct, - float rollback_rate, - float grad_stability, - float spectral_alpha, - float stagnation_intensity, - float kzm_damping, - float projected_drift - ); + /** + * Forward pass through the micro-MLP to produce update decisions. + * + * @param layer_stats Vector of per-layer feature maps + * @param loss_trend Current loss velocity + * @param step_pct Training progress [0, 1] + * @param rollback_rate Rolling average rollback frequency + * @param grad_stability Gradient coefficient of variation + * @param spectral_alpha Log spectral rank + * @param stagnation_intensity Stagnation counter / 500 + * @param kzm_damping Kibble-Zurek damping factor + * @param projected_drift Projected loss drift + * @return Tensor of shape [num_groups, 3] — (mult, sign_gate, wd_mult) + */ + torch::Tensor + decide_update(const std::vector> &layer_stats, + float loss_trend, float step_pct, float rollback_rate, + float grad_stability, float spectral_alpha, + float stagnation_intensity, float kzm_damping, + float projected_drift); - // --------------------------------------------------------------- - // Evolutionary Operators - // --------------------------------------------------------------- + // --------------------------------------------------------------- + // Evolutionary Operators + // --------------------------------------------------------------- - /// Create a mutated child - FuzzyController mutate(float current_loss, float sigma_scale = 1.0f) const; + /// Create a mutated child + FuzzyController mutate(float current_loss, float sigma_scale = 1.0f) const; - /// Crossover with another controller - FuzzyController crossover(const FuzzyController& partner, bool use_alignment = true) const; + /// Crossover with another controller + FuzzyController crossover(const FuzzyController &partner, + bool use_alignment = true) const; - /// Create an orthogonal counter-strategy (Phoenix Rebirth) - FuzzyController create_orthogonal_child(float intensity = 1.0f) const; + /// Create an orthogonal counter-strategy (Phoenix Rebirth) + FuzzyController create_orthogonal_child(float intensity = 1.0f) const; - /// Banach-Tarski fission: split into two complementary children - std::pair banach_tarski_fission(float intensity = 1.0f) const; + /// Banach-Tarski fission: split into two complementary children + std::pair + banach_tarski_fission(float intensity = 1.0f) const; private: - static std::atomic next_id_; - static thread_local std::mt19937 rng_; + static std::atomic next_id_; + static thread_local std::mt19937 rng_; }; -} // namespace fces +} // namespace fces diff --git a/include/fces/evolution.hpp b/include/fces/evolution.hpp index 5cb2b4e..ae05546 100644 --- a/include/fces/evolution.hpp +++ b/include/fces/evolution.hpp @@ -21,34 +21,27 @@ namespace fces { */ class EvolutionManager { public: - explicit EvolutionManager( - Population& population, - int selection_interval = 50, - bool auto_population = false, - bool direct_construction = false - ); + explicit EvolutionManager(Population &population, int selection_interval = 50, + bool auto_population = false, + bool direct_construction = false); - /// Get the currently active controller - FuzzyController& get_active_controller(); + /// Get the currently active controller + FuzzyController &get_active_controller(); - /// Update population dynamics based on current training state - void update_population_dynamics( - float loss_velocity, - float ema_loss, - int step_counter, - int total_steps - ); + /// Update population dynamics based on current training state + void update_population_dynamics(float loss_velocity, float ema_loss, + int step_counter, int total_steps); - /// Steps the active controller has been in control - int steps_active = 0; + /// Steps the active controller has been in control + int steps_active = 0; - /// Selection interval (how long a controller stays active) - int selection_interval; + /// Selection interval (how long a controller stays active) + int selection_interval; private: - Population& population_; - bool auto_population_; - bool direct_construction_; + Population &population_; + bool auto_population_; + bool direct_construction_; }; -} // namespace fces +} // namespace fces diff --git a/include/fces/fitness.hpp b/include/fces/fitness.hpp index f529c6a..7d8d62a 100644 --- a/include/fces/fitness.hpp +++ b/include/fces/fitness.hpp @@ -2,14 +2,15 @@ /** * @file fitness.hpp - * @brief Fitness evaluation — loss signal processing and multi-objective evaluation. + * @brief Fitness evaluation — loss signal processing and multi-objective + * evaluation. * * Port of: packages/fces/core/fitness_engine.py + fitness.py */ #include -#include #include +#include namespace fces { @@ -19,18 +20,18 @@ namespace fces { */ class RunningStats { public: - void update(float value); - float z_score(float value) const; - float get_mean() const { return mean_; } - float get_std() const; - int get_count() const { return count_; } + void update(float value); + float z_score(float value) const; + float get_mean() const { return mean_; } + float get_std() const; + int get_count() const { return count_; } - void reset(); + void reset(); private: - int count_ = 0; - float mean_ = 0.0f; - float m2_ = 0.0f; + int count_ = 0; + float mean_ = 0.0f; + float m2_ = 0.0f; }; /** @@ -38,26 +39,27 @@ private: */ class FitnessEngine { public: - explicit FitnessEngine(float grokking_coefficient = 0.1f); + explicit FitnessEngine(float grokking_coefficient = 0.1f); - /** - * Calculate loss velocity signal. - * - * @param current_loss Current step loss - * @param ema_loss Exponential moving average loss - * @param mode "relative" or "absolute" - * @return Velocity signal (negative = improving) - */ - float calculate_loss_signal(float current_loss, float ema_loss, const std::string& mode = "relative") const; + /** + * Calculate loss velocity signal. + * + * @param current_loss Current step loss + * @param ema_loss Exponential moving average loss + * @param mode "relative" or "absolute" + * @return Velocity signal (negative = improving) + */ + float calculate_loss_signal(float current_loss, float ema_loss, + const std::string &mode = "relative") const; - /** - * Compute Kibble-Zurek Mechanism damping factor. - * Prevents topological defects during phase transitions. - */ - float compute_kzm_damping(float spectral_alpha) const; + /** + * Compute Kibble-Zurek Mechanism damping factor. + * Prevents topological defects during phase transitions. + */ + float compute_kzm_damping(float spectral_alpha) const; private: - float grokking_coefficient_; + float grokking_coefficient_; }; /** @@ -65,75 +67,76 @@ private: */ class FuzzySet { public: - FuzzySet(std::string name, float a, float b, float c, float d) noexcept - : name_(std::move(name)), a_(a), b_(b), c_(c), d_(d) {} + FuzzySet(std::string name, float a, float b, float c, float d) noexcept + : name_(std::move(name)), a_(a), b_(b), c_(c), d_(d) {} - float membership(float x) const noexcept { - if (!std::isfinite(x)) { - return 0.0f; - } - if (x <= a_ || x >= d_) { - return 0.0f; - } - if (x >= b_ && x <= c_) { - return 1.0f; - } - if (x > a_ && x < b_) { - float range = b_ - a_; - return (x - a_) / (range > 0.0f ? range : 1e-9f); - } - if (x > c_ && x < d_) { - float range = d_ - c_; - return (d_ - x) / (range > 0.0f ? range : 1e-9f); - } - return 0.0f; + float membership(float x) const noexcept { + if (!std::isfinite(x)) { + return 0.0f; } + if (x <= a_ || x >= d_) { + return 0.0f; + } + if (x >= b_ && x <= c_) { + return 1.0f; + } + if (x > a_ && x < b_) { + float range = b_ - a_; + return (x - a_) / (range > 0.0f ? range : 1e-9f); + } + if (x > c_ && x < d_) { + float range = d_ - c_; + return (d_ - x) / (range > 0.0f ? range : 1e-9f); + } + return 0.0f; + } - const std::string& name() const noexcept { return name_; } + const std::string &name() const noexcept { return name_; } private: - std::string name_; - float a_; - float b_; - float c_; - float d_; + std::string name_; + float a_; + float b_; + float c_; + float d_; }; /** * Fitness metrics for multi-objective evaluation. */ struct FitnessMetrics { - float training_advantage = 0.0f; - float validation_advantage = 0.0f; - float grad_cv = 0.0f; - float sparsity_delta = 0.0f; - float consistency_gap = 0.0f; - float stable_rank = 0.0f; + float training_advantage = 0.0f; + float validation_advantage = 0.0f; + float grad_cv = 0.0f; + float sparsity_delta = 0.0f; + float consistency_gap = 0.0f; + float stable_rank = 0.0f; }; /** - * FuzzyFitnessEvaluator — multi-objective fitness evaluation with fuzzy weighting. + * FuzzyFitnessEvaluator — multi-objective fitness evaluation with fuzzy + * weighting. */ class FuzzyFitnessEvaluator { public: - FuzzyFitnessEvaluator() noexcept; + FuzzyFitnessEvaluator() noexcept; - float evaluate(const FitnessMetrics& metrics) const noexcept; + float evaluate(const FitnessMetrics &metrics) const noexcept; private: - FuzzySet stability_set_; - FuzzySet train_set_; - FuzzySet val_set_; - FuzzySet sparsity_set_; - FuzzySet consistency_set_; - FuzzySet rank_set_; + FuzzySet stability_set_; + FuzzySet train_set_; + FuzzySet val_set_; + FuzzySet sparsity_set_; + FuzzySet consistency_set_; + FuzzySet rank_set_; - float w_stability_ = 0.2f; - float w_train_ = 0.2f; - float w_val_ = 0.3f; - float w_sparsity_ = 0.1f; - float w_consistency_ = 0.2f; - float w_rank_ = 0.1f; + float w_stability_ = 0.2f; + float w_train_ = 0.2f; + float w_val_ = 0.3f; + float w_sparsity_ = 0.1f; + float w_consistency_ = 0.2f; + float w_rank_ = 0.1f; }; -} // namespace fces +} // namespace fces diff --git a/include/fces/optimizer.hpp b/include/fces/optimizer.hpp index 6d3e818..341cb6f 100644 --- a/include/fces/optimizer.hpp +++ b/include/fces/optimizer.hpp @@ -5,17 +5,17 @@ * @brief FCESOptimizer — the main entry point. libtorch-compatible optimizer. */ -#include #include -#include #include +#include +#include #include "config.hpp" -#include "population.hpp" -#include "fitness.hpp" #include "evolution.hpp" -#include "spectral.hpp" +#include "fitness.hpp" #include "oscillation.hpp" +#include "population.hpp" +#include "spectral.hpp" #include "telemetry.hpp" namespace fces { @@ -24,7 +24,8 @@ namespace fces { * FCESOptimizer — Fuzzy Controlled Evolutionary Search V49.0 (C++ Port). * * Usage: - * auto optimizer = FCESOptimizer(model->parameters(), FCESConfig{}.set_lr(1.6e-3)); + * auto optimizer = FCESOptimizer(model->parameters(), + * FCESConfig{}.set_lr(1.6e-3)); * // In training loop: * optimizer.zero_grad(); * auto loss = model->forward(input); @@ -32,73 +33,72 @@ namespace fces { * optimizer.step(); * optimizer.update_fitness(loss.item()); */ -struct FCESOptimizerOptions : public torch::optim::OptimizerCloneableOptions { - explicit FCESOptimizerOptions(double lr = 0.01) : lr_(lr) {} +struct FCESOptimizerOptions + : public torch::optim::OptimizerCloneableOptions { + explicit FCESOptimizerOptions(double lr = 0.01) : lr_(lr) {} - double get_lr() const override { return lr_; } - void set_lr(const double lr) override { lr_ = lr; } + double get_lr() const override { return lr_; } + void set_lr(const double lr) override { lr_ = lr; } - double lr_; + double lr_; }; class FCESOptimizer : public torch::optim::Optimizer { public: - explicit FCESOptimizer( - std::vector params, - FCESConfig config = FCESConfig{} - ); + explicit FCESOptimizer(std::vector params, + FCESConfig config = FCESConfig{}); - /// Perform a single optimization step - torch::Tensor step(LossClosure closure = nullptr) override; + /// Perform a single optimization step + torch::Tensor step(LossClosure closure = nullptr) override; - /// Update evolutionary fitness with current loss - void update_fitness(float loss); + /// Update evolutionary fitness with current loss + void update_fitness(float loss); - /// Backup model weights to CPU RAM - void backup_to_ram(); + /// Backup model weights to CPU RAM + void backup_to_ram(); - /// Restore model weights from CPU RAM backup - void restore_from_ram(); + /// Restore model weights from CPU RAM backup + void restore_from_ram(); - /// Get current step count - int step_count() const { return step_counter_; } + /// Get current step count + int step_count() const { return step_counter_; } - /// Calculate model sparsity - float calculate_sparsity() const; + /// Calculate model sparsity + float calculate_sparsity() const; private: - FCESConfig config_; - Population population_; - FitnessEngine fitness_engine_; - FuzzyFitnessEvaluator fitness_evaluator_; - std::unique_ptr evolution_manager_; - OscillationDetector oscillation_detector_; - RunningStats grad_norm_tracker_; + FCESConfig config_; + Population population_; + FitnessEngine fitness_engine_; + FuzzyFitnessEvaluator fitness_evaluator_; + std::unique_ptr evolution_manager_; + OscillationDetector oscillation_detector_; + RunningStats grad_norm_tracker_; - // State - int step_counter_ = 0; - float ema_loss_ = 0.0f; - float last_step_loss_ = 0.0f; - float best_loss_window_ = std::numeric_limits::infinity(); - float rollback_ema_ = 0.0f; - int stagnation_counter_ = 0; - float last_loss_velocity_ = 0.0f; - float last_sparsity_ = 0.0f; + // State + int step_counter_ = 0; + float ema_loss_ = 0.0f; + float last_step_loss_ = 0.0f; + float best_loss_window_ = std::numeric_limits::infinity(); + float rollback_ema_ = 0.0f; + int stagnation_counter_ = 0; + float last_loss_velocity_ = 0.0f; + float last_sparsity_ = 0.0f; - // RAM backup - std::vector ram_backup_; + // RAM backup + std::vector ram_backup_; - // Layer stats and group mappings - std::vector> layer_stats_; - std::vector param_group_mapping_; - std::unique_ptr spectral_sensor_; - SpectralController spectral_controller_; - float last_spectral_rank_ = 0.0f; + // Layer stats and group mappings + std::vector> layer_stats_; + std::vector param_group_mapping_; + std::unique_ptr spectral_sensor_; + SpectralController spectral_controller_; + float last_spectral_rank_ = 0.0f; - // Internal methods - void gather_stats(); - void apply_parameter_updates(const torch::Tensor& actions); - void handle_rollback(); + // Internal methods + void gather_stats(); + void apply_parameter_updates(const torch::Tensor &actions); + void handle_rollback(); }; -} // namespace fces +} // namespace fces diff --git a/include/fces/oscillation.hpp b/include/fces/oscillation.hpp index 49dc532..2c73215 100644 --- a/include/fces/oscillation.hpp +++ b/include/fces/oscillation.hpp @@ -11,21 +11,22 @@ namespace fces { class OscillationDetector { public: - static constexpr int WINDOW_SIZE = 64; - static constexpr float POWER_THRESHOLD = 0.5f; - static constexpr int MIN_PERIOD = 4; - static constexpr int MAX_PERIOD = 16; + static constexpr int WINDOW_SIZE = 64; + static constexpr float POWER_THRESHOLD = 0.5f; + static constexpr int MIN_PERIOD = 4; + static constexpr int MAX_PERIOD = 16; - void update(float loss); - bool detect() const; - float get_score() const; - float get_variance_50() const; - void reset(); + void update(float loss); + bool detect() const; + float get_score() const; + float get_variance_50() const; + void reset(); private: - std::vector loss_history_; - static std::vector detrend(const std::vector& signal); - static std::vector compute_power_spectrum(const std::vector& signal); + std::vector loss_history_; + static std::vector detrend(const std::vector &signal); + static std::vector + compute_power_spectrum(const std::vector &signal); }; -} // namespace fces +} // namespace fces diff --git a/include/fces/population.hpp b/include/fces/population.hpp index 2fd44a2..46e2a1f 100644 --- a/include/fces/population.hpp +++ b/include/fces/population.hpp @@ -16,9 +16,9 @@ * Port of: packages/fces/core/population.py (~1260 LOC) */ +#include #include #include -#include #include "controller.hpp" @@ -28,11 +28,11 @@ namespace fces { * Elite selection strategy for stale elite mitigation (Phase 23). */ enum class EliteStrategy { - Cumulative, // Raw cumulative fitness - EMA, // Exponential moving average - Rolling, // Rolling window average - Reset, // Periodic reset every 500 steps - AgePenalty // fitness / log(age + 2) + Cumulative, // Raw cumulative fitness + EMA, // Exponential moving average + Rolling, // Rolling window average + Reset, // Periodic reset every 500 steps + AgePenalty // fitness / log(age + 2) }; /** @@ -40,126 +40,124 @@ enum class EliteStrategy { */ class Population { public: - // Configuration constants - static constexpr int ELITE_COUNT = 2; - static constexpr float NOVELTY_WEIGHT = 0.1f; - static constexpr float ISLAND_MIGRATION_RATE = 0.05f; - static constexpr int BEHAVIORAL_ARCHIVE_SIZE = 100; + // Configuration constants + static constexpr int ELITE_COUNT = 2; + static constexpr float NOVELTY_WEIGHT = 0.1f; + static constexpr float ISLAND_MIGRATION_RATE = 0.05f; + static constexpr int BEHAVIORAL_ARCHIVE_SIZE = 100; - // --------------------------------------------------------------- - // Construction - // --------------------------------------------------------------- + // --------------------------------------------------------------- + // Construction + // --------------------------------------------------------------- - explicit Population( - int active_size = 75, - int repo_size = 10000, - EliteStrategy elite_strategy = EliteStrategy::Cumulative, - bool link_mutation = false, - bool link_elite = false, - bool link_violator = false, - bool use_fuzzy_pacer = false, - bool use_fuzzy_importance = false, - bool direct_construction = false, - bool use_banach_fission = false - ); + explicit Population(int active_size = 75, int repo_size = 10000, + EliteStrategy elite_strategy = EliteStrategy::Cumulative, + bool link_mutation = false, bool link_elite = false, + bool link_violator = false, bool use_fuzzy_pacer = false, + bool use_fuzzy_importance = false, + bool direct_construction = false, + bool use_banach_fission = false); - // --------------------------------------------------------------- - // Core API - // --------------------------------------------------------------- + // --------------------------------------------------------------- + // Core API + // --------------------------------------------------------------- - /// Get the currently active controller (sticky selection) - FuzzyController& get_active_controller(); + /// Get the currently active controller (sticky selection) + FuzzyController &get_active_controller(); - /// Select a controller via fitness-weighted tournament - FuzzyController& select_weighted(); + /// Select a controller via fitness-weighted tournament + FuzzyController &select_weighted(); - /// Get the best controller in the active population - FuzzyController& get_best_active(); + /// Get the best controller in the active population + FuzzyController &get_best_active(); - /// Get the worst non-elite controller - FuzzyController& get_worst_active(); + /// Get the worst non-elite controller + FuzzyController &get_worst_active(); - /// Remove a specific controller (unless elite) - void kill(FuzzyController& controller); + /// Remove a specific controller (unless elite) + void kill(FuzzyController &controller); - /// Update a controller's fitness - void update_controller_fitness(FuzzyController& controller, float reward, bool increment_eval = true); + /// Update a controller's fitness + void update_controller_fitness(FuzzyController &controller, float reward, + bool increment_eval = true); - /// Mark a controller as a violator (rollback) - void mark_violated(FuzzyController& controller); + /// Mark a controller as a violator (rollback) + void mark_violated(FuzzyController &controller); - /// Get the effective fitness considering elite strategy and training progress - float get_effective_fitness(const FuzzyController& controller, float training_progress) const; + /// Get the effective fitness considering elite strategy and training progress + float get_effective_fitness(const FuzzyController &controller, + float training_progress) const; - // --------------------------------------------------------------- - // Evolution - // --------------------------------------------------------------- + // --------------------------------------------------------------- + // Evolution + // --------------------------------------------------------------- - /** - * Evolve the population: select parents, crossover/mutate, replace worst. - * - * @param current_loss Current training loss - * @param velocity Loss velocity - * @param training_progress Training progress [0, 1] - */ - void evolve(float current_loss, float velocity = 0.0f, float training_progress = 0.0f); + /** + * Evolve the population: select parents, crossover/mutate, replace worst. + * + * @param current_loss Current training loss + * @param velocity Loss velocity + * @param training_progress Training progress [0, 1] + */ + void evolve(float current_loss, float velocity = 0.0f, + float training_progress = 0.0f); - /// Resize the population (dynamic expansion/contraction) - void resize(int target_size, float training_progress = 0.5f); + /// Resize the population (dynamic expansion/contraction) + void resize(int target_size, float training_progress = 0.5f); - /// Reduce mutation variance after rollback - void calm_down(); + /// Reduce mutation variance after rollback + void calm_down(); - // --------------------------------------------------------------- - // Accessors - // --------------------------------------------------------------- + // --------------------------------------------------------------- + // Accessors + // --------------------------------------------------------------- - int size() const { return static_cast(gladiators_.size()); } - float global_sigma_modifier() const { return global_sigma_modifier_; } + int size() const { return static_cast(gladiators_.size()); } + float global_sigma_modifier() const { return global_sigma_modifier_; } - /// Compute diversity index (behavioral spread) - float get_diversity_index() const; + /// Compute diversity index (behavioral spread) + float get_diversity_index() const; - /// Serialization - // TODO: state_dict / load_state_dict + /// Serialization + // TODO: state_dict / load_state_dict private: - std::vector gladiators_; - std::vector repository_; - std::vector violated_controllers_; + std::vector gladiators_; + std::vector repository_; + std::vector violated_controllers_; - float global_sigma_modifier_ = 1.0f; + float global_sigma_modifier_ = 1.0f; - // Sticky controller selection - FuzzyController* active_controller_ = nullptr; - int steps_active_ = 0; - int selection_interval_ = 20; + // Sticky controller selection + FuzzyController *active_controller_ = nullptr; + int steps_active_ = 0; + int selection_interval_ = 20; - // Configuration - EliteStrategy elite_strategy_; - bool link_mutation_; - bool link_elite_; - bool link_violator_; - bool use_fuzzy_pacer_; - bool use_fuzzy_importance_; - bool direct_construction_; - bool use_banach_fission_; + // Configuration + EliteStrategy elite_strategy_; + bool link_mutation_; + bool link_elite_; + bool link_violator_; + bool use_fuzzy_pacer_; + bool use_fuzzy_importance_; + bool direct_construction_; + bool use_banach_fission_; - // Novelty search - std::vector> behavioral_archive_; + // Novelty search + std::vector> behavioral_archive_; - // Fitness history for fuzzy pacer - std::vector fitness_history_; + // Fitness history for fuzzy pacer + std::vector fitness_history_; - // Phase 23: periodic reset counter - int reset_step_counter_ = 0; + // Phase 23: periodic reset counter + int reset_step_counter_ = 0; - // --------------------------------------------------------------- - // Internal - // --------------------------------------------------------------- + // --------------------------------------------------------------- + // Internal + // --------------------------------------------------------------- - std::vector get_elites(); - void add_to_repository(const FuzzyController& controller); + std::vector get_elites(); + void add_to_repository(const FuzzyController &controller); }; -} // namespace fces +} // namespace fces diff --git a/include/fces/spectral.hpp b/include/fces/spectral.hpp index 8f26c48..969c46f 100644 --- a/include/fces/spectral.hpp +++ b/include/fces/spectral.hpp @@ -23,23 +23,23 @@ namespace fces { */ class SpectralSensor { public: - SpectralSensor() = default; - explicit SpectralSensor(torch::nn::Module& model); + SpectralSensor() = default; + explicit SpectralSensor(torch::nn::Module &model); - /// Track a layer's weight tensor - void track_layer(const std::string& name, const torch::Tensor& weight); + /// Track a layer's weight tensor + void track_layer(const std::string &name, const torch::Tensor &weight); - /// Get the global (average) effective rank - float get_global_rank() const; + /// Get the global (average) effective rank + float get_global_rank() const; - /// Reset all tracked layers - void reset(); + /// Reset all tracked layers + void reset(); private: - std::unordered_map layer_ranks_; + std::unordered_map layer_ranks_; - /// Compute effective rank via SVD - static float compute_effective_rank(const torch::Tensor& weight); + /// Compute effective rank via SVD + static float compute_effective_rank(const torch::Tensor &weight); }; /** @@ -47,8 +47,8 @@ private: */ class SpectralController { public: - /// Compute the spectral alpha (gating factor for rank-aware updates) - float compute_alpha(float global_rank, float grokking_coefficient) const; + /// Compute the spectral alpha (gating factor for rank-aware updates) + float compute_alpha(float global_rank, float grokking_coefficient) const; }; -} // namespace fces +} // namespace fces diff --git a/include/fces/telemetry.hpp b/include/fces/telemetry.hpp index ef922d9..974e884 100644 --- a/include/fces/telemetry.hpp +++ b/include/fces/telemetry.hpp @@ -11,16 +11,16 @@ namespace fces { class Telemetry { public: - static Telemetry& get(); + static Telemetry &get(); - void info(const std::string& event, const std::string& detail = ""); - void warning(const std::string& event, const std::string& detail = ""); - void error(const std::string& event, const std::string& detail = ""); + void info(const std::string &event, const std::string &detail = ""); + void warning(const std::string &event, const std::string &detail = ""); + void error(const std::string &event, const std::string &detail = ""); - void push_to_remote(); + void push_to_remote(); private: - Telemetry() = default; + Telemetry() = default; }; -} // namespace fces +} // namespace fces diff --git a/python/fces_native.cpp b/python/fces_native.cpp index 9a010e1..b081996 100644 --- a/python/fces_native.cpp +++ b/python/fces_native.cpp @@ -13,39 +13,40 @@ #include #include -#include "fces/optimizer.hpp" #include "fces/config.hpp" +#include "fces/optimizer.hpp" namespace py = pybind11; PYBIND11_MODULE(fces_native, m) { - m.doc() = "FCES-native: High-performance C++ FCES optimizer"; + m.doc() = "FCES-native: High-performance C++ FCES optimizer"; - py::class_(m, "FCESConfig") - .def(py::init<>()) - .def_readwrite("lr", &fces::FCESConfig::lr) - .def_readwrite("population_size", &fces::FCESConfig::population_size) - .def_readwrite("total_steps", &fces::FCESConfig::total_steps) - .def_readwrite("grokking_coefficient", &fces::FCESConfig::grokking_coefficient) - .def_readwrite("direct_construction", &fces::FCESConfig::direct_construction); + py::class_(m, "FCESConfig") + .def(py::init<>()) + .def_readwrite("lr", &fces::FCESConfig::lr) + .def_readwrite("population_size", &fces::FCESConfig::population_size) + .def_readwrite("total_steps", &fces::FCESConfig::total_steps) + .def_readwrite("grokking_coefficient", + &fces::FCESConfig::grokking_coefficient) + .def_readwrite("direct_construction", + &fces::FCESConfig::direct_construction); - py::class_(m, "FCESOptimizer") - .def(py::init, fces::FCESConfig>(), - py::arg("params"), - py::arg("config") = fces::FCESConfig{}) - .def("step", &fces::FCESOptimizer::step) - .def("update_fitness", &fces::FCESOptimizer::update_fitness) - .def("backup_to_ram", &fces::FCESOptimizer::backup_to_ram) - .def("restore_from_ram", &fces::FCESOptimizer::restore_from_ram) - .def("step_count", &fces::FCESOptimizer::step_count) - .def("calculate_sparsity", &fces::FCESOptimizer::calculate_sparsity) - .def("zero_grad", [](fces::FCESOptimizer& self) { - for (auto& group : self.param_groups()) { - for (auto& p : group.params()) { - if (p.grad().defined()) { - p.grad().zero_(); - } - } + py::class_(m, "FCESOptimizer") + .def(py::init, fces::FCESConfig>(), + py::arg("params"), py::arg("config") = fces::FCESConfig{}) + .def("step", &fces::FCESOptimizer::step) + .def("update_fitness", &fces::FCESOptimizer::update_fitness) + .def("backup_to_ram", &fces::FCESOptimizer::backup_to_ram) + .def("restore_from_ram", &fces::FCESOptimizer::restore_from_ram) + .def("step_count", &fces::FCESOptimizer::step_count) + .def("calculate_sparsity", &fces::FCESOptimizer::calculate_sparsity) + .def("zero_grad", [](fces::FCESOptimizer &self) { + for (auto &group : self.param_groups()) { + for (auto &p : group.params()) { + if (p.grad().defined()) { + p.grad().zero_(); } - }); + } + } + }); } diff --git a/src/controller.cpp b/src/controller.cpp index 60108b1..45d3d82 100644 --- a/src/controller.cpp +++ b/src/controller.cpp @@ -13,341 +13,354 @@ thread_local std::mt19937 FuzzyController::rng_{std::random_device{}()}; // Genome // --------------------------------------------------------------- -void Genome::randomize(std::mt19937& rng) { - std::normal_distribution dist(0.0f, 0.5f); - for (auto& w : weights) { - w = dist(rng); - } - gene_success.fill(0.0f); +void Genome::randomize(std::mt19937 &rng) { + std::normal_distribution dist(0.0f, 0.5f); + for (auto &w : weights) { + w = dist(rng); + } + gene_success.fill(0.0f); } Genome Genome::clone() const { - return *this; // Copy all fields + return *this; // Copy all fields } // --------------------------------------------------------------- // FuzzyController // --------------------------------------------------------------- -FuzzyController::FuzzyController() - : id(next_id_++), origin("random") { - genome.randomize(rng_); - // Bias output toward acceleration (V2.1 insight) - // Set output biases (last GENOME_OUTPUT_DIM elements) to +2.0, -1.0, 0.0 with noise - constexpr int bias_start = GENOME_SIZE - GENOME_OUTPUT_DIM; - std::normal_distribution bias_noise(0.0f, 0.5f); - genome.weights[bias_start] = 2.0f + bias_noise(rng_); - genome.weights[bias_start + 1] = -1.0f + bias_noise(rng_); - genome.weights[bias_start + 2] = 0.0f + bias_noise(rng_); - - // Initialize plasticity with variance to seed evolution - std::normal_distribution plast_noise(0.0f, 0.05f); - genome.plasticity = std::max(0.01f, 0.1f + plast_noise(rng_)); - genome.sigma_gene = 0.1f; - genome.gene_success.fill(1.0f); +FuzzyController::FuzzyController() : id(next_id_++), origin("random") { + genome.randomize(rng_); + // Bias output toward acceleration (V2.1 insight) + // Set output biases (last GENOME_OUTPUT_DIM elements) to +2.0, -1.0, 0.0 with + // noise + constexpr int bias_start = GENOME_SIZE - GENOME_OUTPUT_DIM; + std::normal_distribution bias_noise(0.0f, 0.5f); + genome.weights[bias_start] = 2.0f + bias_noise(rng_); + genome.weights[bias_start + 1] = -1.0f + bias_noise(rng_); + genome.weights[bias_start + 2] = 0.0f + bias_noise(rng_); + + // Initialize plasticity with variance to seed evolution + std::normal_distribution plast_noise(0.0f, 0.05f); + genome.plasticity = std::max(0.01f, 0.1f + plast_noise(rng_)); + genome.sigma_gene = 0.1f; + genome.gene_success.fill(1.0f); } FuzzyController::FuzzyController(Genome genome) : id(next_id_++), genome(std::move(genome)), origin("constructed") {} torch::Tensor FuzzyController::decide_update( - const std::vector>& layer_stats, - float loss_trend, - float step_pct, - float rollback_rate, - float grad_stability, - float spectral_alpha, - float stagnation_intensity, - float kzm_damping, - float projected_drift -) { - const int num_groups = static_cast(layer_stats.size()); - auto actions = torch::zeros({num_groups, GENOME_OUTPUT_DIM}); + const std::vector> &layer_stats, float loss_trend, + float step_pct, float rollback_rate, float grad_stability, + float spectral_alpha, float stagnation_intensity, float kzm_damping, + float projected_drift) { + const int num_groups = static_cast(layer_stats.size()); + auto actions = torch::zeros({num_groups, GENOME_OUTPUT_DIM}); - // Extract weight views for the micro-MLP - const float* w = genome.weights.data(); + // Extract weight views for the micro-MLP + const float *w = genome.weights.data(); - // Layer 1: input -> hidden - const float* W1 = w; // [(GENOME_INPUT_DIM + 1) x GENOME_HIDDEN_DIM] - // Layer 2: hidden -> output - const float* W2 = w + ((GENOME_INPUT_DIM + 1) * GENOME_HIDDEN_DIM); // [(GENOME_HIDDEN_DIM + 1) x GENOME_OUTPUT_DIM] + // Layer 1: input -> hidden + const float *W1 = w; // [(GENOME_INPUT_DIM + 1) x GENOME_HIDDEN_DIM] + // Layer 2: hidden -> output + const float *W2 = + w + ((GENOME_INPUT_DIM + 1) * + GENOME_HIDDEN_DIM); // [(GENOME_HIDDEN_DIM + 1) x GENOME_OUTPUT_DIM] - for (int g = 0; g < num_groups; ++g) { - // One-Hot Layer Type: Clamp to 5 to avoid overflow for new categories - float layer_type_val = (layer_stats[g].size() >= 3) ? layer_stats[g][2] : 5.0f; - int type_idx = std::min(5, static_cast(layer_type_val)); - std::array type_onehot{0.0f, 0.0f, 0.0f, 0.0f, 0.0f}; - if (type_idx >= 0 && type_idx < 5) { - type_onehot[type_idx] = 1.0f; - } - - // Build input vector - std::array input{}; - float gn = (layer_stats[g].size() >= 1) ? layer_stats[g][0] : 0.0f; - float sp = (layer_stats[g].size() >= 2) ? layer_stats[g][1] : 0.0f; - - // Sanitization matching nan_to_num - if (!std::isfinite(gn) || std::isnan(gn)) gn = 0.0f; - if (gn > 10.0f) gn = 10.0f; - if (gn < 0.0f) gn = 0.0f; - - if (!std::isfinite(sp) || std::isnan(sp)) sp = 0.0f; - if (sp > 1.0f) sp = 1.0f; - if (sp < 0.0f) sp = 0.0f; - - input[0] = gn; - input[1] = sp; - input[2] = loss_trend; - input[3] = step_pct; - input[4] = (num_groups > 1) ? static_cast(g) / (num_groups - 1.0f) : 0.0f; - input[5] = rollback_rate; - input[6] = grad_stability; - input[7] = spectral_alpha; - input[8] = type_onehot[0]; - input[9] = type_onehot[1]; - input[10] = type_onehot[2]; - input[11] = type_onehot[3]; - input[12] = type_onehot[4]; - input[13] = projected_drift; - - // Forward pass: hidden = tanh(W1 * [input, 1]) - std::array hidden{}; - for (int h = 0; h < GENOME_HIDDEN_DIM; ++h) { - float sum = W1[GENOME_INPUT_DIM * GENOME_HIDDEN_DIM + h]; // Bias weight - for (int i = 0; i < GENOME_INPUT_DIM; ++i) { - sum += input[i] * W1[i * GENOME_HIDDEN_DIM + h]; - } - hidden[h] = std::tanh(sum); - } - - // Output = W2 * [hidden, 1] - std::array out_layer{}; - for (int o = 0; o < GENOME_OUTPUT_DIM; ++o) { - float sum = W2[GENOME_HIDDEN_DIM * GENOME_OUTPUT_DIM + o]; // Bias weight - for (int h = 0; h < GENOME_HIDDEN_DIM; ++h) { - sum += hidden[h] * W2[h * GENOME_OUTPUT_DIM + o]; - } - out_layer[o] = sum; - } - - // Parse Output - float log_mult_raw = out_layer[0]; - float log_wd_raw = out_layer[2]; - float sign_logit = out_layer[1]; - - if (!std::isfinite(sign_logit) || std::isnan(sign_logit)) { - sign_logit = 0.0f; - } - - float noise_std = 0.0f; - if (genome.plasticity > 0.01f) { - noise_std += genome.plasticity; - } - if (stagnation_intensity > 0.0f) { - noise_std += stagnation_intensity * 0.5f; - } - - if (kzm_damping > 0.0f) { - noise_std *= (1.0f - kzm_damping); - log_mult_raw *= (1.0f - kzm_damping); - } - - if (noise_std > 0.0f) { - std::normal_distribution noise_dist(0.0f, noise_std); - log_mult_raw += noise_dist(rng_); - log_wd_raw += noise_dist(rng_); - } - - if (!std::isfinite(log_mult_raw) || std::isnan(log_mult_raw)) { - log_mult_raw = 0.0f; - } - log_mult_raw = std::max(-6.0f, std::min(6.0f, log_mult_raw)); - float mult = std::pow(2.0f, log_mult_raw); - - if (!std::isfinite(log_wd_raw) || std::isnan(log_wd_raw)) { - log_wd_raw = 0.0f; - } - log_wd_raw = std::max(-6.0f, std::min(6.0f, log_wd_raw)); - float wd_mult = std::pow(2.0f, log_wd_raw); - - actions[g][0] = mult; - actions[g][1] = sign_logit; - actions[g][2] = wd_mult; + for (int g = 0; g < num_groups; ++g) { + // One-Hot Layer Type: Clamp to 5 to avoid overflow for new categories + float layer_type_val = + (layer_stats[g].size() >= 3) ? layer_stats[g][2] : 5.0f; + int type_idx = std::min(5, static_cast(layer_type_val)); + std::array type_onehot{0.0f, 0.0f, 0.0f, 0.0f, 0.0f}; + if (type_idx >= 0 && type_idx < 5) { + type_onehot[type_idx] = 1.0f; } - return actions; + // Build input vector + std::array input{}; + float gn = (layer_stats[g].size() >= 1) ? layer_stats[g][0] : 0.0f; + float sp = (layer_stats[g].size() >= 2) ? layer_stats[g][1] : 0.0f; + + // Sanitization matching nan_to_num + if (!std::isfinite(gn) || std::isnan(gn)) + gn = 0.0f; + if (gn > 10.0f) + gn = 10.0f; + if (gn < 0.0f) + gn = 0.0f; + + if (!std::isfinite(sp) || std::isnan(sp)) + sp = 0.0f; + if (sp > 1.0f) + sp = 1.0f; + if (sp < 0.0f) + sp = 0.0f; + + input[0] = gn; + input[1] = sp; + input[2] = loss_trend; + input[3] = step_pct; + input[4] = + (num_groups > 1) ? static_cast(g) / (num_groups - 1.0f) : 0.0f; + input[5] = rollback_rate; + input[6] = grad_stability; + input[7] = spectral_alpha; + input[8] = type_onehot[0]; + input[9] = type_onehot[1]; + input[10] = type_onehot[2]; + input[11] = type_onehot[3]; + input[12] = type_onehot[4]; + input[13] = projected_drift; + + // Forward pass: hidden = tanh(W1 * [input, 1]) + std::array hidden{}; + for (int h = 0; h < GENOME_HIDDEN_DIM; ++h) { + float sum = W1[GENOME_INPUT_DIM * GENOME_HIDDEN_DIM + h]; // Bias weight + for (int i = 0; i < GENOME_INPUT_DIM; ++i) { + sum += input[i] * W1[i * GENOME_HIDDEN_DIM + h]; + } + hidden[h] = std::tanh(sum); + } + + // Output = W2 * [hidden, 1] + std::array out_layer{}; + for (int o = 0; o < GENOME_OUTPUT_DIM; ++o) { + float sum = W2[GENOME_HIDDEN_DIM * GENOME_OUTPUT_DIM + o]; // Bias weight + for (int h = 0; h < GENOME_HIDDEN_DIM; ++h) { + sum += hidden[h] * W2[h * GENOME_OUTPUT_DIM + o]; + } + out_layer[o] = sum; + } + + // Parse Output + float log_mult_raw = out_layer[0]; + float log_wd_raw = out_layer[2]; + float sign_logit = out_layer[1]; + + if (!std::isfinite(sign_logit) || std::isnan(sign_logit)) { + sign_logit = 0.0f; + } + + float noise_std = 0.0f; + if (genome.plasticity > 0.01f) { + noise_std += genome.plasticity; + } + if (stagnation_intensity > 0.0f) { + noise_std += stagnation_intensity * 0.5f; + } + + if (kzm_damping > 0.0f) { + noise_std *= (1.0f - kzm_damping); + log_mult_raw *= (1.0f - kzm_damping); + } + + if (noise_std > 0.0f) { + std::normal_distribution noise_dist(0.0f, noise_std); + log_mult_raw += noise_dist(rng_); + log_wd_raw += noise_dist(rng_); + } + + if (!std::isfinite(log_mult_raw) || std::isnan(log_mult_raw)) { + log_mult_raw = 0.0f; + } + log_mult_raw = std::max(-6.0f, std::min(6.0f, log_mult_raw)); + float mult = std::pow(2.0f, log_mult_raw); + + if (!std::isfinite(log_wd_raw) || std::isnan(log_wd_raw)) { + log_wd_raw = 0.0f; + } + log_wd_raw = std::max(-6.0f, std::min(6.0f, log_wd_raw)); + float wd_mult = std::pow(2.0f, log_wd_raw); + + actions[g][0] = mult; + actions[g][1] = sign_logit; + actions[g][2] = wd_mult; + } + + return actions; } -FuzzyController FuzzyController::mutate(float current_loss, float sigma_scale) const { - Genome child_genome = genome.clone(); - std::normal_distribution std_normal(0.0f, 1.0f); +FuzzyController FuzzyController::mutate(float current_loss, + float sigma_scale) const { + Genome child_genome = genome.clone(); + std::normal_distribution std_normal(0.0f, 1.0f); - float tau = 0.2f; - float new_sigma = genome.sigma_gene * std::exp(tau * std_normal(rng_)); - new_sigma = std::max(0.001f, std::min(0.8f, new_sigma)); + float tau = 0.2f; + float new_sigma = genome.sigma_gene * std::exp(tau * std_normal(rng_)); + new_sigma = std::max(0.001f, std::min(0.8f, new_sigma)); - float new_plast = genome.plasticity * std::exp(tau * std_normal(rng_)); - new_plast = std::max(0.0f, std::min(0.5f, new_plast)); + float new_plast = genome.plasticity * std::exp(tau * std_normal(rng_)); + new_plast = std::max(0.0f, std::min(0.5f, new_plast)); - float loss_val = std::max(0.0f, current_loss); - float annealing_factor = std::sqrt(loss_val + 0.1f); - float effective_sigma = new_sigma * sigma_scale * annealing_factor; + float loss_val = std::max(0.0f, current_loss); + float annealing_factor = std::sqrt(loss_val + 0.1f); + float effective_sigma = new_sigma * sigma_scale * annealing_factor; - std::normal_distribution noise(0.0f, effective_sigma); - for (size_t i = 0; i < child_genome.weights.size(); ++i) { - child_genome.weights[i] += noise(rng_); - child_genome.gene_success[i] = genome.gene_success[i] * 0.95f; - } + std::normal_distribution noise(0.0f, effective_sigma); + for (size_t i = 0; i < child_genome.weights.size(); ++i) { + child_genome.weights[i] += noise(rng_); + child_genome.gene_success[i] = genome.gene_success[i] * 0.95f; + } - child_genome.sigma_gene = new_sigma; - child_genome.plasticity = new_plast; + child_genome.sigma_gene = new_sigma; + child_genome.plasticity = new_plast; - FuzzyController child(child_genome); - child.origin = "mutation"; - return child; + FuzzyController child(child_genome); + child.origin = "mutation"; + return child; } -FuzzyController FuzzyController::crossover(const FuzzyController& partner, bool /*use_alignment*/) const { - Genome child_genome; - std::uniform_real_distribution u_dist(0.0f, 1.0f); +FuzzyController FuzzyController::crossover(const FuzzyController &partner, + bool /*use_alignment*/) const { + Genome child_genome; + std::uniform_real_distribution u_dist(0.0f, 1.0f); - for (size_t i = 0; i < child_genome.weights.size(); ++i) { - float success_self = genome.gene_success[i]; - float success_partner = partner.genome.gene_success[i]; - float prob_a = success_self / (success_self + success_partner + 1e-9f); + for (size_t i = 0; i < child_genome.weights.size(); ++i) { + float success_self = genome.gene_success[i]; + float success_partner = partner.genome.gene_success[i]; + float prob_a = success_self / (success_self + success_partner + 1e-9f); - bool choose_self = false; - if (u_dist(rng_) < 0.1f) { - // 10% Random injection - choose_self = (u_dist(rng_) < 0.5f); - } else { - // 90% Meritocratic - choose_self = (u_dist(rng_) < prob_a); - } - - if (choose_self) { - child_genome.weights[i] = genome.weights[i]; - child_genome.gene_success[i] = success_self; - } else { - child_genome.weights[i] = partner.genome.weights[i]; - child_genome.gene_success[i] = success_partner; - } + bool choose_self = false; + if (u_dist(rng_) < 0.1f) { + // 10% Random injection + choose_self = (u_dist(rng_) < 0.5f); + } else { + // 90% Meritocratic + choose_self = (u_dist(rng_) < prob_a); } - child_genome.sigma_gene = (genome.sigma_gene + partner.genome.sigma_gene) * 0.5f; - child_genome.plasticity = (genome.plasticity + partner.genome.plasticity) * 0.5f; + if (choose_self) { + child_genome.weights[i] = genome.weights[i]; + child_genome.gene_success[i] = success_self; + } else { + child_genome.weights[i] = partner.genome.weights[i]; + child_genome.gene_success[i] = success_partner; + } + } - FuzzyController child(child_genome); - child.origin = "crossover"; - return child; + child_genome.sigma_gene = + (genome.sigma_gene + partner.genome.sigma_gene) * 0.5f; + child_genome.plasticity = + (genome.plasticity + partner.genome.plasticity) * 0.5f; + + FuzzyController child(child_genome); + child.origin = "crossover"; + return child; } -FuzzyController FuzzyController::create_orthogonal_child(float intensity) const { - Genome child_genome; - std::normal_distribution norm_dist(0.0f, 1.0f); +FuzzyController +FuzzyController::create_orthogonal_child(float intensity) const { + Genome child_genome; + std::normal_distribution norm_dist(0.0f, 1.0f); - float norm_elite = 0.0f; - for (float w : genome.weights) { - norm_elite += w * w; - } - norm_elite = std::sqrt(norm_elite) + 1e-9f; + float norm_elite = 0.0f; + for (float w : genome.weights) { + norm_elite += w * w; + } + norm_elite = std::sqrt(norm_elite) + 1e-9f; - std::array random_vec{}; - float dot_product = 0.0f; + std::array random_vec{}; + float dot_product = 0.0f; + for (size_t i = 0; i < GENOME_SIZE; ++i) { + random_vec[i] = norm_dist(rng_); + dot_product += random_vec[i] * genome.weights[i]; + } + + std::array orthogonal_vec{}; + float norm_ortho = 0.0f; + for (size_t i = 0; i < GENOME_SIZE; ++i) { + float projection = + (dot_product / (norm_elite * norm_elite)) * genome.weights[i]; + orthogonal_vec[i] = random_vec[i] - projection; + norm_ortho += orthogonal_vec[i] * orthogonal_vec[i]; + } + norm_ortho = std::sqrt(norm_ortho) + 1e-9f; + + std::array scaled_vec{}; + float final_norm = 0.0f; + for (size_t i = 0; i < GENOME_SIZE; ++i) { + scaled_vec[i] = orthogonal_vec[i] * (norm_elite / norm_ortho) * intensity; + final_norm += scaled_vec[i] * scaled_vec[i]; + } + final_norm = std::sqrt(final_norm); + + float max_allowed = std::max(norm_elite, 10.0f); + if (final_norm > max_allowed) { + float scale = max_allowed / (final_norm + 1e-9f); for (size_t i = 0; i < GENOME_SIZE; ++i) { - random_vec[i] = norm_dist(rng_); - dot_product += random_vec[i] * genome.weights[i]; + scaled_vec[i] *= scale; } + } - std::array orthogonal_vec{}; - float norm_ortho = 0.0f; - for (size_t i = 0; i < GENOME_SIZE; ++i) { - float projection = (dot_product / (norm_elite * norm_elite)) * genome.weights[i]; - orthogonal_vec[i] = random_vec[i] - projection; - norm_ortho += orthogonal_vec[i] * orthogonal_vec[i]; - } - norm_ortho = std::sqrt(norm_ortho) + 1e-9f; + child_genome.weights = scaled_vec; + child_genome.gene_success.fill(1.0f); + child_genome.sigma_gene = 0.2f; + child_genome.plasticity = 0.2f; - std::array scaled_vec{}; - float final_norm = 0.0f; - for (size_t i = 0; i < GENOME_SIZE; ++i) { - scaled_vec[i] = orthogonal_vec[i] * (norm_elite / norm_ortho) * intensity; - final_norm += scaled_vec[i] * scaled_vec[i]; - } - final_norm = std::sqrt(final_norm); - - float max_allowed = std::max(norm_elite, 10.0f); - if (final_norm > max_allowed) { - float scale = max_allowed / (final_norm + 1e-9f); - for (size_t i = 0; i < GENOME_SIZE; ++i) { - scaled_vec[i] *= scale; - } - } - - child_genome.weights = scaled_vec; - child_genome.gene_success.fill(1.0f); - child_genome.sigma_gene = 0.2f; - child_genome.plasticity = 0.2f; - - FuzzyController child(child_genome); - child.origin = "phoenix_rebirth"; - return child; + FuzzyController child(child_genome); + child.origin = "phoenix_rebirth"; + return child; } -std::pair FuzzyController::banach_tarski_fission(float intensity) const { - Genome plus_genome; - Genome minus_genome; +std::pair +FuzzyController::banach_tarski_fission(float intensity) const { + Genome plus_genome; + Genome minus_genome; - float norm_parent = 0.0f; - float max_success = 0.0f; - for (size_t i = 0; i < GENOME_SIZE; ++i) { - norm_parent += genome.weights[i] * genome.weights[i]; - if (genome.gene_success[i] > max_success) { - max_success = genome.gene_success[i]; - } + float norm_parent = 0.0f; + float max_success = 0.0f; + for (size_t i = 0; i < GENOME_SIZE; ++i) { + norm_parent += genome.weights[i] * genome.weights[i]; + if (genome.gene_success[i] > max_success) { + max_success = genome.gene_success[i]; } - norm_parent = std::sqrt(norm_parent) + 1e-9f; - max_success += 1e-9f; + } + norm_parent = std::sqrt(norm_parent) + 1e-9f; + max_success += 1e-9f; - std::normal_distribution norm_dist(0.0f, 1.0f); - std::array noise{}; - float dot_product = 0.0f; - for (size_t i = 0; i < GENOME_SIZE; ++i) { - float saliency = genome.gene_success[i] / max_success; - noise[i] = norm_dist(rng_) * saliency; - dot_product += noise[i] * genome.weights[i]; - } + std::normal_distribution norm_dist(0.0f, 1.0f); + std::array noise{}; + float dot_product = 0.0f; + for (size_t i = 0; i < GENOME_SIZE; ++i) { + float saliency = genome.gene_success[i] / max_success; + noise[i] = norm_dist(rng_) * saliency; + dot_product += noise[i] * genome.weights[i]; + } - std::array fission_vec{}; - float norm_fission = 0.0f; - for (size_t i = 0; i < GENOME_SIZE; ++i) { - fission_vec[i] = noise[i] - (dot_product / (norm_parent * norm_parent)) * genome.weights[i]; - norm_fission += fission_vec[i] * fission_vec[i]; - } - norm_fission = std::sqrt(norm_fission) + 1e-9f; + std::array fission_vec{}; + float norm_fission = 0.0f; + for (size_t i = 0; i < GENOME_SIZE; ++i) { + fission_vec[i] = noise[i] - (dot_product / (norm_parent * norm_parent)) * + genome.weights[i]; + norm_fission += fission_vec[i] * fission_vec[i]; + } + norm_fission = std::sqrt(norm_fission) + 1e-9f; - for (size_t i = 0; i < GENOME_SIZE; ++i) { - float scaled_fission = fission_vec[i] * (norm_parent / norm_fission) * 0.1f * intensity; - plus_genome.weights[i] = genome.weights[i] + scaled_fission; - minus_genome.weights[i] = genome.weights[i] - scaled_fission; + for (size_t i = 0; i < GENOME_SIZE; ++i) { + float scaled_fission = + fission_vec[i] * (norm_parent / norm_fission) * 0.1f * intensity; + plus_genome.weights[i] = genome.weights[i] + scaled_fission; + minus_genome.weights[i] = genome.weights[i] - scaled_fission; - plus_genome.gene_success[i] = 1.0f; - minus_genome.gene_success[i] = 1.0f; - } + plus_genome.gene_success[i] = 1.0f; + minus_genome.gene_success[i] = 1.0f; + } - plus_genome.sigma_gene = genome.sigma_gene * 0.9f; - minus_genome.sigma_gene = genome.sigma_gene * 0.9f; + plus_genome.sigma_gene = genome.sigma_gene * 0.9f; + minus_genome.sigma_gene = genome.sigma_gene * 0.9f; - plus_genome.plasticity = genome.plasticity; - minus_genome.plasticity = genome.plasticity; + plus_genome.plasticity = genome.plasticity; + minus_genome.plasticity = genome.plasticity; - FuzzyController child_plus(plus_genome); - child_plus.origin = "fission_plus"; + FuzzyController child_plus(plus_genome); + child_plus.origin = "fission_plus"; - FuzzyController child_minus(minus_genome); - child_minus.origin = "fission_minus"; + FuzzyController child_minus(minus_genome); + child_minus.origin = "fission_minus"; - return {child_plus, child_minus}; + return {child_plus, child_minus}; } -} // namespace fces +} // namespace fces diff --git a/src/evolution.cpp b/src/evolution.cpp index d5d67bc..ddcf19d 100644 --- a/src/evolution.cpp +++ b/src/evolution.cpp @@ -2,51 +2,46 @@ namespace fces { -EvolutionManager::EvolutionManager( - Population& population, int selection_interval, - bool auto_population, bool direct_construction -) - : population_(population), - selection_interval(selection_interval), +EvolutionManager::EvolutionManager(Population &population, + int selection_interval, bool auto_population, + bool direct_construction) + : population_(population), selection_interval(selection_interval), auto_population_(auto_population), direct_construction_(direct_construction) {} -FuzzyController& EvolutionManager::get_active_controller() { - return population_.get_active_controller(); +FuzzyController &EvolutionManager::get_active_controller() { + return population_.get_active_controller(); } -void EvolutionManager::update_population_dynamics( - float loss_velocity, float ema_loss, int step_counter, int total_steps -) { - float progress = static_cast(step_counter) / std::max(1, total_steps); +void EvolutionManager::update_population_dynamics(float loss_velocity, + float ema_loss, + int step_counter, + int total_steps) { + float progress = static_cast(step_counter) / std::max(1, total_steps); - if (step_counter % 20 == 0) { - population_.evolve( - std::abs(loss_velocity), - loss_velocity, - progress - ); + if (step_counter % 20 == 0) { + population_.evolve(std::abs(loss_velocity), loss_velocity, progress); + } + + if (!auto_population_ || step_counter % 50 != 0) { + return; + } + + int current_pop = population_.size(); + float adaptive_threshold = 0.05f * (1.0f + ema_loss); + adaptive_threshold = std::min(0.5f, adaptive_threshold); + + if (std::abs(loss_velocity) < adaptive_threshold) { + int target_pop = 200; + if (target_pop > current_pop) { + population_.resize(target_pop, progress); } - - if (!auto_population_ || step_counter % 50 != 0) { - return; - } - - int current_pop = population_.size(); - float adaptive_threshold = 0.05f * (1.0f + ema_loss); - adaptive_threshold = std::min(0.5f, adaptive_threshold); - - if (std::abs(loss_velocity) < adaptive_threshold) { - int target_pop = 200; - if (target_pop > current_pop) { - population_.resize(target_pop, progress); - } - } else { - int target_pop = 40; - if (target_pop < current_pop) { - population_.resize(target_pop, progress); - } + } else { + int target_pop = 40; + if (target_pop < current_pop) { + population_.resize(target_pop, progress); } + } } -} // namespace fces +} // namespace fces diff --git a/src/fitness.cpp b/src/fitness.cpp index e7635eb..c6a46b3 100644 --- a/src/fitness.cpp +++ b/src/fitness.cpp @@ -1,7 +1,7 @@ #include "fces/fitness.hpp" +#include #include #include -#include #include namespace fces { @@ -11,28 +11,30 @@ namespace fces { // --------------------------------------------------------------- void RunningStats::update(float value) { - count_++; - float delta = value - mean_; - mean_ += delta / static_cast(count_); - float delta2 = value - mean_; - m2_ += delta * delta2; + count_++; + float delta = value - mean_; + mean_ += delta / static_cast(count_); + float delta2 = value - mean_; + m2_ += delta * delta2; } float RunningStats::z_score(float value) const { - float s = get_std(); - if (s < 1e-8f) return 0.0f; - return (value - mean_) / s; + float s = get_std(); + if (s < 1e-8f) + return 0.0f; + return (value - mean_) / s; } float RunningStats::get_std() const { - if (count_ < 2) return 1.0f; - return std::sqrt(m2_ / static_cast(count_ - 1)); + if (count_ < 2) + return 1.0f; + return std::sqrt(m2_ / static_cast(count_ - 1)); } void RunningStats::reset() { - count_ = 0; - mean_ = 0.0f; - m2_ = 0.0f; + count_ = 0; + mean_ = 0.0f; + m2_ = 0.0f; } // --------------------------------------------------------------- @@ -42,18 +44,20 @@ void RunningStats::reset() { FitnessEngine::FitnessEngine(float grokking_coefficient) : grokking_coefficient_(grokking_coefficient) {} -float FitnessEngine::calculate_loss_signal(float current_loss, float ema_loss, const std::string& mode) const { - if (ema_loss < 1e-8f) return 0.0f; +float FitnessEngine::calculate_loss_signal(float current_loss, float ema_loss, + const std::string &mode) const { + if (ema_loss < 1e-8f) + return 0.0f; - if (mode == "relative") { - return (current_loss - ema_loss) / (ema_loss + 1e-8f); - } - return current_loss - ema_loss; + if (mode == "relative") { + return (current_loss - ema_loss) / (ema_loss + 1e-8f); + } + return current_loss - ema_loss; } float FitnessEngine::compute_kzm_damping(float spectral_alpha) const { - // Kibble-Zurek damping: high spectral rank = more damping - return 1.0f / (1.0f + grokking_coefficient_ * spectral_alpha); + // Kibble-Zurek damping: high spectral rank = more damping + return 1.0f / (1.0f + grokking_coefficient_ * spectral_alpha); } // --------------------------------------------------------------- @@ -68,30 +72,28 @@ FuzzyFitnessEvaluator::FuzzyFitnessEvaluator() noexcept consistency_set_("Consistent", -1.0f, 0.0f, 0.02f, 0.1f), rank_set_("LowRank", -1.0f, 0.0f, 5.0f, 20.0f) {} -float FuzzyFitnessEvaluator::evaluate(const FitnessMetrics& metrics) const noexcept { - float m_stability = stability_set_.membership(metrics.grad_cv); - float m_train = train_set_.membership(metrics.training_advantage); - float m_val = val_set_.membership(metrics.validation_advantage); - float m_sparsity = sparsity_set_.membership(metrics.sparsity_delta); - float m_consistency = consistency_set_.membership(metrics.consistency_gap); - float m_rank = rank_set_.membership(metrics.stable_rank); +float FuzzyFitnessEvaluator::evaluate( + const FitnessMetrics &metrics) const noexcept { + float m_stability = stability_set_.membership(metrics.grad_cv); + float m_train = train_set_.membership(metrics.training_advantage); + float m_val = val_set_.membership(metrics.validation_advantage); + float m_sparsity = sparsity_set_.membership(metrics.sparsity_delta); + float m_consistency = consistency_set_.membership(metrics.consistency_gap); + float m_rank = rank_set_.membership(metrics.stable_rank); - float weighted_score = - m_stability * w_stability_ + - m_train * w_train_ + - m_val * w_val_ + - m_sparsity * w_sparsity_ + - m_consistency * w_consistency_ + - m_rank * w_rank_; + float weighted_score = m_stability * w_stability_ + m_train * w_train_ + + m_val * w_val_ + m_sparsity * w_sparsity_ + + m_consistency * w_consistency_ + m_rank * w_rank_; - float total_weight = w_stability_ + w_train_ + w_val_ + w_sparsity_ + w_consistency_ + w_rank_; - if (total_weight > 0.0f) { - weighted_score /= total_weight; - } + float total_weight = + w_stability_ + w_train_ + w_val_ + w_sparsity_ + w_consistency_ + w_rank_; + if (total_weight > 0.0f) { + weighted_score /= total_weight; + } - // V153: Generalization-Aware Gate (Non-Linear) - float gate_efficiency = 0.5f + 0.5f * m_consistency; - return weighted_score * gate_efficiency; + // V153: Generalization-Aware Gate (Non-Linear) + float gate_efficiency = 0.5f + 0.5f * m_consistency; + return weighted_score * gate_efficiency; } -} // namespace fces +} // namespace fces diff --git a/src/optimizer.cpp b/src/optimizer.cpp index c8643f8..33cea2d 100644 --- a/src/optimizer.cpp +++ b/src/optimizer.cpp @@ -1,475 +1,496 @@ #include "fces/optimizer.hpp" +#include #include #include -#include namespace fces { namespace { -int classify_layer_by_shape(const torch::Tensor& p) { - auto dims = p.sizes(); - if (dims.size() == 2) { - int64_t d0 = dims[0]; - int64_t d1 = dims[1]; - if (d0 > 10000 || d1 > 10000) { - return 0; // Embedding - } else if (d0 * 3 == d1 || d0 == d1 * 3) { - return 1; // Attention QKV - } else if (d0 == d1) { - return 3; // MLP/FFN - } else { - return 2; // Attention Proj - } - } else if (dims.size() == 1) { - if (dims[0] < 128) { - return 4; // LayerNorm - } else { - return 5; // Other / bias - } +int classify_layer_by_shape(const torch::Tensor &p) { + auto dims = p.sizes(); + if (dims.size() == 2) { + int64_t d0 = dims[0]; + int64_t d1 = dims[1]; + if (d0 > 10000 || d1 > 10000) { + return 0; // Embedding + } else if (d0 * 3 == d1 || d0 == d1 * 3) { + return 1; // Attention QKV + } else if (d0 == d1) { + return 3; // MLP/FFN + } else { + return 2; // Attention Proj } - return 5; // Other + } else if (dims.size() == 1) { + if (dims[0] < 128) { + return 4; // LayerNorm + } else { + return 5; // Other / bias + } + } + return 5; // Other } -torch::Tensor apply_trust_clipping(const torch::Tensor& p, torch::Tensor update, float trust_region_clip) { - if (torch::isnan(update).any().item() || torch::isinf(update).any().item()) { - return torch::zeros_like(update); +torch::Tensor apply_trust_clipping(const torch::Tensor &p, torch::Tensor update, + float trust_region_clip) { + if (torch::isnan(update).any().item() || + torch::isinf(update).any().item()) { + return torch::zeros_like(update); + } + + float p_norm = p.norm().item(); + if (p_norm > 1e-6f) { + float update_mag = update.norm().item(); + if (!std::isfinite(update_mag)) { + return torch::zeros_like(update); } - float p_norm = p.norm().item(); - if (p_norm > 1e-6f) { - float update_mag = update.norm().item(); - if (!std::isfinite(update_mag)) { - return torch::zeros_like(update); - } - - float max_update = trust_region_clip * p_norm; - if (update_mag > max_update) { - float correction = max_update / (update_mag + 1e-8f); - update.mul_(correction); - } + float max_update = trust_region_clip * p_norm; + if (update_mag > max_update) { + float correction = max_update / (update_mag + 1e-8f); + update.mul_(correction); } + } - if (torch::isnan(update).any().item() || torch::isinf(update).any().item()) { - return torch::zeros_like(update); - } + if (torch::isnan(update).any().item() || + torch::isinf(update).any().item()) { + return torch::zeros_like(update); + } - return update; + return update; } -float calculate_parasitic_reward(const torch::Tensor& p, float mult, const RunningStats& grad_norm_tracker) { - if (!p.grad().defined()) { - return 0.0f; - } - float g_norm = p.grad().abs().mean().item(); - float z_g = grad_norm_tracker.z_score(g_norm); - return z_g * (mult - 1.0f); +float calculate_parasitic_reward(const torch::Tensor &p, float mult, + const RunningStats &grad_norm_tracker) { + if (!p.grad().defined()) { + return 0.0f; + } + float g_norm = p.grad().abs().mean().item(); + float z_g = grad_norm_tracker.z_score(g_norm); + return z_g * (mult - 1.0f); } -std::unique_ptr make_optimizer_options(double lr) { - return std::make_unique(lr); +std::unique_ptr +make_optimizer_options(double lr) { + return std::make_unique(lr); } } // namespace -FCESOptimizer::FCESOptimizer( - std::vector params, - FCESConfig config -) +FCESOptimizer::FCESOptimizer(std::vector params, + FCESConfig config) : torch::optim::Optimizer( {torch::optim::OptimizerParamGroup(std::move(params))}, - make_optimizer_options(config.lr) - ), + make_optimizer_options(config.lr)), config_(std::move(config)), - population_(config_.population_size, 10000, - EliteStrategy::Cumulative, + population_(config_.population_size, 10000, EliteStrategy::Cumulative, false, false, false, false, false, - config_.direct_construction, - config_.use_banach_fission), + config_.direct_construction, config_.use_banach_fission), fitness_engine_(config_.grokking_coefficient) { - evolution_manager_ = std::make_unique( - population_, 50, config_.auto_population, config_.direct_construction - ); + evolution_manager_ = std::make_unique( + population_, 50, config_.auto_population, config_.direct_construction); - spectral_sensor_ = std::make_unique(); + spectral_sensor_ = std::make_unique(); - // Initial RAM backup - backup_to_ram(); + // Initial RAM backup + backup_to_ram(); - Telemetry::get().info("optimizer_initialized", - "version=0.1.0 pop_size=" + std::to_string(config_.population_size)); + Telemetry::get().info("optimizer_initialized", + "version=0.1.0 pop_size=" + + std::to_string(config_.population_size)); } torch::Tensor FCESOptimizer::step(LossClosure closure) { - torch::NoGradGuard no_grad; - step_counter_++; + torch::NoGradGuard no_grad; + step_counter_++; - torch::Tensor loss = {}; - if (closure) { - torch::AutoGradMode grad_mode(true); - loss = closure(); - } + torch::Tensor loss = {}; + if (closure) { + torch::AutoGradMode grad_mode(true); + loss = closure(); + } - // 1. Gather Statistics - gather_stats(); + // 1. Gather Statistics + gather_stats(); - // 2. Strategy: Population Selection & Dynamics - auto& active_controller = evolution_manager_->get_active_controller(); + // 2. Strategy: Population Selection & Dynamics + auto &active_controller = evolution_manager_->get_active_controller(); - // 3. Decision: Neural Decisions from Controllers - float current_loss_val = (loss.defined()) ? loss.item() : last_step_loss_; - - // Emergency Brake - NaN/Inf Detection - if (std::isnan(current_loss_val) || !std::isfinite(current_loss_val)) { - Telemetry::get().error("emergency_brake_nan", "NaN/Inf loss detected in step " + std::to_string(step_counter_)); - handle_rollback(); - return loss; - } - - float loss_velocity = fitness_engine_.calculate_loss_signal(current_loss_val, ema_loss_, config_.signal_mode); - last_loss_velocity_ = loss_velocity; - - float progress = std::min(1.0f, static_cast(step_counter_) / std::max(1, config_.total_steps)); - float grad_cv = grad_norm_tracker_.get_std() / (grad_norm_tracker_.get_mean() + 1e-8f); - - float csr_factor = 1.0f; - if (config_.csr_enabled) { - if (step_counter_ < config_.csr_warmup_steps) { - csr_factor = 0.0f; - } else { - float steps_since_warmup = static_cast(step_counter_ - config_.csr_warmup_steps); - csr_factor = std::min(1.0f, steps_since_warmup / std::max(1.0f, static_cast(config_.csr_ramp_steps))); - } - } - - // Update spectral sensing rank - float spectral_alpha = 0.0f; - if (config_.grokking_coefficient > 0.0f && spectral_sensor_) { - if (step_counter_ % config_.spectral_frequency == 0 || last_spectral_rank_ == 0.0f) { - int param_idx = 0; - for (auto& group : param_groups()) { - for (auto& p : group.params()) { - if (p.dim() >= 2) { - std::string name = "layer_" + std::to_string(param_idx); - spectral_sensor_->track_layer(name, p); - } - param_idx++; - } - } - last_spectral_rank_ = spectral_sensor_->get_global_rank(); - } - spectral_alpha = last_spectral_rank_; - } - - float effective_alpha = spectral_alpha * csr_factor; - float kzm_damping = fitness_engine_.compute_kzm_damping(effective_alpha); - float stagnation_intensity = std::min(1.0f, static_cast(stagnation_counter_) / 500.0f); - float log_spectral_alpha = std::log(effective_alpha + 1e-6f); - - // Call decide_update - auto actions = active_controller.decide_update( - layer_stats_, - loss_velocity, - progress, - rollback_ema_, - grad_cv, - log_spectral_alpha, - stagnation_intensity, - kzm_damping, - loss_velocity - ); - - // Bandit-style Early Stopping - if (step_counter_ % 5 == 0 && loss_velocity > 0.05f) { - Telemetry::get().warning("early_stopping_poor_controller", - "controller_id=" + std::to_string(active_controller.id) + " velocity=" + std::to_string(loss_velocity)); - evolution_manager_->steps_active = evolution_manager_->selection_interval; - } - - if (torch::isnan(actions).any().item()) { - Telemetry::get().error("controller_nan_actions", "NaN actions returned by controller ID " + std::to_string(active_controller.id)); - population_.kill(active_controller); - auto& new_controller = evolution_manager_->get_active_controller(); - actions = torch::zeros_like(actions); - for (int i = 0; i < actions.size(0); ++i) { - actions[i][0] = 0.5f; // log_mult default - } - } - - // 4. Action: Apply Updates - apply_parameter_updates(actions); - - // 5. Evolution & Maintenance - if (current_loss_val > 0.0f) { - evolution_manager_->update_population_dynamics( - loss_velocity, - ema_loss_, - step_counter_, - config_.total_steps - ); - } - - if (step_counter_ % 50 == 0) { - backup_to_ram(); - } + // 3. Decision: Neural Decisions from Controllers + float current_loss_val = + (loss.defined()) ? loss.item() : last_step_loss_; + // Emergency Brake - NaN/Inf Detection + if (std::isnan(current_loss_val) || !std::isfinite(current_loss_val)) { + Telemetry::get().error("emergency_brake_nan", + "NaN/Inf loss detected in step " + + std::to_string(step_counter_)); + handle_rollback(); return loss; + } + + float loss_velocity = fitness_engine_.calculate_loss_signal( + current_loss_val, ema_loss_, config_.signal_mode); + last_loss_velocity_ = loss_velocity; + + float progress = std::min(1.0f, static_cast(step_counter_) / + std::max(1, config_.total_steps)); + float grad_cv = + grad_norm_tracker_.get_std() / (grad_norm_tracker_.get_mean() + 1e-8f); + + float csr_factor = 1.0f; + if (config_.csr_enabled) { + if (step_counter_ < config_.csr_warmup_steps) { + csr_factor = 0.0f; + } else { + float steps_since_warmup = + static_cast(step_counter_ - config_.csr_warmup_steps); + csr_factor = std::min( + 1.0f, steps_since_warmup / + std::max(1.0f, static_cast(config_.csr_ramp_steps))); + } + } + + // Update spectral sensing rank + float spectral_alpha = 0.0f; + if (config_.grokking_coefficient > 0.0f && spectral_sensor_) { + if (step_counter_ % config_.spectral_frequency == 0 || + last_spectral_rank_ == 0.0f) { + int param_idx = 0; + for (auto &group : param_groups()) { + for (auto &p : group.params()) { + if (p.dim() >= 2) { + std::string name = "layer_" + std::to_string(param_idx); + spectral_sensor_->track_layer(name, p); + } + param_idx++; + } + } + last_spectral_rank_ = spectral_sensor_->get_global_rank(); + } + spectral_alpha = last_spectral_rank_; + } + + float effective_alpha = spectral_alpha * csr_factor; + float kzm_damping = fitness_engine_.compute_kzm_damping(effective_alpha); + float stagnation_intensity = + std::min(1.0f, static_cast(stagnation_counter_) / 500.0f); + float log_spectral_alpha = std::log(effective_alpha + 1e-6f); + + // Call decide_update + auto actions = active_controller.decide_update( + layer_stats_, loss_velocity, progress, rollback_ema_, grad_cv, + log_spectral_alpha, stagnation_intensity, kzm_damping, loss_velocity); + + // Bandit-style Early Stopping + if (step_counter_ % 5 == 0 && loss_velocity > 0.05f) { + Telemetry::get().warning( + "early_stopping_poor_controller", + "controller_id=" + std::to_string(active_controller.id) + + " velocity=" + std::to_string(loss_velocity)); + evolution_manager_->steps_active = evolution_manager_->selection_interval; + } + + if (torch::isnan(actions).any().item()) { + Telemetry::get().error("controller_nan_actions", + "NaN actions returned by controller ID " + + std::to_string(active_controller.id)); + population_.kill(active_controller); + auto &new_controller = evolution_manager_->get_active_controller(); + actions = torch::zeros_like(actions); + for (int i = 0; i < actions.size(0); ++i) { + actions[i][0] = 0.5f; // log_mult default + } + } + + // 4. Action: Apply Updates + apply_parameter_updates(actions); + + // 5. Evolution & Maintenance + if (current_loss_val > 0.0f) { + evolution_manager_->update_population_dynamics( + loss_velocity, ema_loss_, step_counter_, config_.total_steps); + } + + if (step_counter_ % 50 == 0) { + backup_to_ram(); + } + + return loss; } void FCESOptimizer::update_fitness(float loss) { - // 1. Divergence Safety - bool is_nan = std::isnan(loss) || !std::isfinite(loss); - bool is_spike = (step_counter_ > 1) && (ema_loss_ > 0.0f) && (loss > config_.rollback_threshold * ema_loss_) && (ema_loss_ > 0.1f); - if (is_nan || is_spike) { - Telemetry::get().warning("divergence_detected", "loss=" + std::to_string(loss) + " ema=" + std::to_string(ema_loss_)); - handle_rollback(); - return; - } + // 1. Divergence Safety + bool is_nan = std::isnan(loss) || !std::isfinite(loss); + bool is_spike = (step_counter_ > 1) && (ema_loss_ > 0.0f) && + (loss > config_.rollback_threshold * ema_loss_) && + (ema_loss_ > 0.1f); + if (is_nan || is_spike) { + Telemetry::get().warning("divergence_detected", + "loss=" + std::to_string(loss) + + " ema=" + std::to_string(ema_loss_)); + handle_rollback(); + return; + } - if (step_counter_ == 1 || ema_loss_ == 0.0f) { - ema_loss_ = loss; - last_step_loss_ = loss; - last_sparsity_ = calculate_sparsity(); - return; - } - - // 2. Metric Calculation - float train_adv = ema_loss_ - loss; - float val_adv = 0.0f; - float current_sparsity = calculate_sparsity(); - float sparsity_delta = current_sparsity - last_sparsity_; - float consistency_gap = std::max(0.0f, train_adv - val_adv); - - float grad_std = grad_norm_tracker_.get_std(); - float grad_mean = grad_norm_tracker_.get_mean(); - float grad_cv = grad_std / (grad_mean + 1e-8f); - - float raw_rank = (spectral_sensor_) ? spectral_sensor_->get_global_rank() : 0.0f; - float csr_factor = 1.0f; - if (config_.csr_enabled) { - if (step_counter_ < config_.csr_warmup_steps) { - csr_factor = 0.0f; - } else { - float steps_since_warmup = static_cast(step_counter_ - config_.csr_warmup_steps); - csr_factor = std::min(1.0f, steps_since_warmup / std::max(1.0f, static_cast(config_.csr_ramp_steps))); - } - } - float effective_rank = config_.csr_enabled ? raw_rank * csr_factor : raw_rank; - - FitnessMetrics metrics; - metrics.training_advantage = train_adv; - metrics.validation_advantage = val_adv; - metrics.grad_cv = grad_cv; - metrics.sparsity_delta = sparsity_delta; - metrics.consistency_gap = consistency_gap; - metrics.stable_rank = effective_rank; - - // 3. Fuzzy Evaluation - float final_fitness = fitness_evaluator_.evaluate(metrics); - - // 4. State Update - ema_loss_ = 0.95f * ema_loss_ + 0.05f * loss; + if (step_counter_ == 1 || ema_loss_ == 0.0f) { + ema_loss_ = loss; last_step_loss_ = loss; - last_sparsity_ = current_sparsity; + last_sparsity_ = calculate_sparsity(); + return; + } - // Stagnation logic - if (loss < best_loss_window_ * 0.995f) { - best_loss_window_ = loss; - stagnation_counter_ = 0; + // 2. Metric Calculation + float train_adv = ema_loss_ - loss; + float val_adv = 0.0f; + float current_sparsity = calculate_sparsity(); + float sparsity_delta = current_sparsity - last_sparsity_; + float consistency_gap = std::max(0.0f, train_adv - val_adv); + + float grad_std = grad_norm_tracker_.get_std(); + float grad_mean = grad_norm_tracker_.get_mean(); + float grad_cv = grad_std / (grad_mean + 1e-8f); + + float raw_rank = + (spectral_sensor_) ? spectral_sensor_->get_global_rank() : 0.0f; + float csr_factor = 1.0f; + if (config_.csr_enabled) { + if (step_counter_ < config_.csr_warmup_steps) { + csr_factor = 0.0f; } else { - stagnation_counter_++; + float steps_since_warmup = + static_cast(step_counter_ - config_.csr_warmup_steps); + csr_factor = std::min( + 1.0f, steps_since_warmup / + std::max(1.0f, static_cast(config_.csr_ramp_steps))); } + } + float effective_rank = config_.csr_enabled ? raw_rank * csr_factor : raw_rank; - // 5. Apply to Population - auto& active_controller = evolution_manager_->get_active_controller(); - population_.update_controller_fitness(active_controller, final_fitness); + FitnessMetrics metrics; + metrics.training_advantage = train_adv; + metrics.validation_advantage = val_adv; + metrics.grad_cv = grad_cv; + metrics.sparsity_delta = sparsity_delta; + metrics.consistency_gap = consistency_gap; + metrics.stable_rank = effective_rank; - Telemetry::get().info("fitness_calculated", - "loss=" + std::to_string(loss) + - " ema_loss=" + std::to_string(ema_loss_) + - " fitness=" + std::to_string(final_fitness)); + // 3. Fuzzy Evaluation + float final_fitness = fitness_evaluator_.evaluate(metrics); + + // 4. State Update + ema_loss_ = 0.95f * ema_loss_ + 0.05f * loss; + last_step_loss_ = loss; + last_sparsity_ = current_sparsity; + + // Stagnation logic + if (loss < best_loss_window_ * 0.995f) { + best_loss_window_ = loss; + stagnation_counter_ = 0; + } else { + stagnation_counter_++; + } + + // 5. Apply to Population + auto &active_controller = evolution_manager_->get_active_controller(); + population_.update_controller_fitness(active_controller, final_fitness); + + Telemetry::get().info("fitness_calculated", + "loss=" + std::to_string(loss) + + " ema_loss=" + std::to_string(ema_loss_) + + " fitness=" + std::to_string(final_fitness)); } void FCESOptimizer::backup_to_ram() { - ram_backup_.clear(); - for (auto& group : param_groups()) { - for (auto& p : group.params()) { - ram_backup_.push_back(p.data().clone().cpu()); - } + ram_backup_.clear(); + for (auto &group : param_groups()) { + for (auto &p : group.params()) { + ram_backup_.push_back(p.data().clone().cpu()); } + } } void FCESOptimizer::restore_from_ram() { - int idx = 0; - for (auto& group : param_groups()) { - for (auto& p : group.params()) { - if (idx < static_cast(ram_backup_.size())) { - p.data().copy_(ram_backup_[idx].to(p.device())); - idx++; - } - } + int idx = 0; + for (auto &group : param_groups()) { + for (auto &p : group.params()) { + if (idx < static_cast(ram_backup_.size())) { + p.data().copy_(ram_backup_[idx].to(p.device())); + idx++; + } } + } } float FCESOptimizer::calculate_sparsity() const { - int64_t total = 0, zeros = 0; - for (const auto& group : param_groups()) { - for (const auto& p : group.params()) { - total += p.numel(); - zeros += (p.data().abs() < 1e-5f).sum().item(); - } + int64_t total = 0, zeros = 0; + for (const auto &group : param_groups()) { + for (const auto &p : group.params()) { + total += p.numel(); + zeros += (p.data().abs() < 1e-5f).sum().item(); } - return (total > 0) ? static_cast(zeros) / total : 0.0f; + } + return (total > 0) ? static_cast(zeros) / total : 0.0f; } void FCESOptimizer::gather_stats() { - layer_stats_.clear(); - param_group_mapping_.clear(); + layer_stats_.clear(); + param_group_mapping_.clear(); - int param_idx = 0; - bool has_nan_or_inf = false; - float max_grad_norm = 0.0f; + int param_idx = 0; + bool has_nan_or_inf = false; + float max_grad_norm = 0.0f; - for (auto& group : param_groups()) { - for (auto& p : group.params()) { - if (!p.grad().defined()) { - param_group_mapping_.push_back(-1); - continue; - } + for (auto &group : param_groups()) { + for (auto &p : group.params()) { + if (!p.grad().defined()) { + param_group_mapping_.push_back(-1); + continue; + } - auto grad = p.grad(); - if (torch::isnan(grad).any().item() || torch::isinf(grad).any().item()) { - has_nan_or_inf = true; - } + auto grad = p.grad(); + if (torch::isnan(grad).any().item() || + torch::isinf(grad).any().item()) { + has_nan_or_inf = true; + } - float grad_norm = grad.norm().item(); - if (std::isnan(grad_norm) || !std::isfinite(grad_norm)) { - has_nan_or_inf = true; - grad_norm = 0.0f; - } + float grad_norm = grad.norm().item(); + if (std::isnan(grad_norm) || !std::isfinite(grad_norm)) { + has_nan_or_inf = true; + grad_norm = 0.0f; + } - if (grad_norm > max_grad_norm) { - max_grad_norm = grad_norm; - } + if (grad_norm > max_grad_norm) { + max_grad_norm = grad_norm; + } - int64_t total_elements = grad.numel(); - int64_t zeros = (grad.abs() < 1e-5f).sum().item(); - float sparsity = (total_elements > 0) ? static_cast(zeros) / total_elements : 0.0f; + int64_t total_elements = grad.numel(); + int64_t zeros = (grad.abs() < 1e-5f).sum().item(); + float sparsity = (total_elements > 0) + ? static_cast(zeros) / total_elements + : 0.0f; - int layer_type = classify_layer_by_shape(p); - int group_idx = static_cast(layer_stats_.size()); - layer_stats_.push_back({grad_norm, sparsity, static_cast(layer_type)}); - param_group_mapping_.push_back(group_idx); + int layer_type = classify_layer_by_shape(p); + int group_idx = static_cast(layer_stats_.size()); + layer_stats_.push_back( + {grad_norm, sparsity, static_cast(layer_type)}); + param_group_mapping_.push_back(group_idx); - if (spectral_sensor_ && p.dim() >= 2) { - std::string name = "layer_" + std::to_string(param_idx); - spectral_sensor_->track_layer(name, p); - } + if (spectral_sensor_ && p.dim() >= 2) { + std::string name = "layer_" + std::to_string(param_idx); + spectral_sensor_->track_layer(name, p); + } - param_idx++; - } + param_idx++; } + } - if (has_nan_or_inf) { - Telemetry::get().error("poisoned_gradients_detected", - "NaN/Inf detected in gradients during step " + std::to_string(step_counter_)); - handle_rollback(); - return; - } + if (has_nan_or_inf) { + Telemetry::get().error("poisoned_gradients_detected", + "NaN/Inf detected in gradients during step " + + std::to_string(step_counter_)); + handle_rollback(); + return; + } - if (step_counter_ == 1 && max_grad_norm > 1.0f) { - float safe_lr = 0.01f / (max_grad_norm + 1e-8f); - for (auto& group : param_groups()) { - if (group.options().get_lr() > safe_lr) { - Telemetry::get().info("auto_calibration_throttled_lr", - "old=" + std::to_string(group.options().get_lr()) + " new=" + std::to_string(safe_lr)); - group.options().set_lr(safe_lr); - config_.lr = safe_lr; - } - } + if (step_counter_ == 1 && max_grad_norm > 1.0f) { + float safe_lr = 0.01f / (max_grad_norm + 1e-8f); + for (auto &group : param_groups()) { + if (group.options().get_lr() > safe_lr) { + Telemetry::get().info( + "auto_calibration_throttled_lr", + "old=" + std::to_string(group.options().get_lr()) + + " new=" + std::to_string(safe_lr)); + group.options().set_lr(safe_lr); + config_.lr = safe_lr; + } } + } - if (!layer_stats_.empty()) { - float first_grad_norm = layer_stats_[0][0]; - grad_norm_tracker_.update(first_grad_norm); - } + if (!layer_stats_.empty()) { + float first_grad_norm = layer_stats_[0][0]; + grad_norm_tracker_.update(first_grad_norm); + } } -void FCESOptimizer::apply_parameter_updates(const torch::Tensor& actions) { - int param_idx = 0; - float parasitic_accum = 0.0f; - int count_updated = 0; +void FCESOptimizer::apply_parameter_updates(const torch::Tensor &actions) { + int param_idx = 0; + float parasitic_accum = 0.0f; + int count_updated = 0; - auto& active_controller = evolution_manager_->get_active_controller(); + auto &active_controller = evolution_manager_->get_active_controller(); - for (auto& group : param_groups()) { - float lr = static_cast(group.options().get_lr()); - float wd = config_.weight_decay; + for (auto &group : param_groups()) { + float lr = static_cast(group.options().get_lr()); + float wd = config_.weight_decay; - for (auto& p : group.params()) { - if (!p.grad().defined()) { - param_idx++; - continue; - } + for (auto &p : group.params()) { + if (!p.grad().defined()) { + param_idx++; + continue; + } - int g_idx = param_group_mapping_[param_idx]; - if (g_idx < 0 || g_idx >= actions.size(0)) { - param_idx++; - continue; - } + int g_idx = param_group_mapping_[param_idx]; + if (g_idx < 0 || g_idx >= actions.size(0)) { + param_idx++; + continue; + } - float mult = actions[g_idx][0].item(); - float sign_gate = actions[g_idx][1].item(); - float wd_mult = (actions.size(1) > 2) ? actions[g_idx][2].item() : 1.0f; + float mult = actions[g_idx][0].item(); + float sign_gate = actions[g_idx][1].item(); + float wd_mult = + (actions.size(1) > 2) ? actions[g_idx][2].item() : 1.0f; - bool use_sign = sign_gate > 0.0f; - if (config_.ablation_mode == "force_sign") { - use_sign = true; - } else if (config_.ablation_mode == "force_grad") { - use_sign = false; - } + bool use_sign = sign_gate > 0.0f; + if (config_.ablation_mode == "force_sign") { + use_sign = true; + } else if (config_.ablation_mode == "force_grad") { + use_sign = false; + } - if (wd > 0.0f) { - float effective_wd = wd; - if (config_.adaptive_wd) { - effective_wd *= wd_mult; - } - p.data().mul_(1.0f - lr * effective_wd); - } - - torch::Tensor update_vec = use_sign ? torch::sign(p.grad()) : p.grad(); - torch::Tensor update = -lr * mult * update_vec; - - update = apply_trust_clipping(p, update, config_.trust_region_clip); - p.data().add_(update); - - if (config_.parasitic_mode) { - parasitic_accum += calculate_parasitic_reward(p, mult, grad_norm_tracker_); - } - - param_idx++; - count_updated++; + if (wd > 0.0f) { + float effective_wd = wd; + if (config_.adaptive_wd) { + effective_wd *= wd_mult; } - } + p.data().mul_(1.0f - lr * effective_wd); + } - if (config_.parasitic_mode && count_updated > 0) { - float reward = parasitic_accum / static_cast(count_updated); - population_.update_controller_fitness(active_controller, reward * 10.0f, false); + torch::Tensor update_vec = use_sign ? torch::sign(p.grad()) : p.grad(); + torch::Tensor update = -lr * mult * update_vec; + + update = apply_trust_clipping(p, update, config_.trust_region_clip); + p.data().add_(update); + + if (config_.parasitic_mode) { + parasitic_accum += + calculate_parasitic_reward(p, mult, grad_norm_tracker_); + } + + param_idx++; + count_updated++; } + } + + if (config_.parasitic_mode && count_updated > 0) { + float reward = parasitic_accum / static_cast(count_updated); + population_.update_controller_fitness(active_controller, reward * 10.0f, + false); + } } void FCESOptimizer::handle_rollback() { - restore_from_ram(); - population_.calm_down(); - rollback_ema_ = 0.9f * rollback_ema_ + 0.1f; + restore_from_ram(); + population_.calm_down(); + rollback_ema_ = 0.9f * rollback_ema_ + 0.1f; - ema_loss_ = 0.0f; - last_step_loss_ = 0.0f; - grad_norm_tracker_.reset(); - zero_grad(); + ema_loss_ = 0.0f; + last_step_loss_ = 0.0f; + grad_norm_tracker_.reset(); + zero_grad(); - Telemetry::get().warning("hard_reset_executed", "rollback_sanitization"); + Telemetry::get().warning("hard_reset_executed", "rollback_sanitization"); } } // namespace fces diff --git a/src/oscillation.cpp b/src/oscillation.cpp index 0296a84..b077458 100644 --- a/src/oscillation.cpp +++ b/src/oscillation.cpp @@ -1,97 +1,103 @@ #include "fces/oscillation.hpp" +#include #include #include -#include namespace fces { void OscillationDetector::update(float loss) { - loss_history_.push_back(loss); - if (static_cast(loss_history_.size()) > WINDOW_SIZE) { - loss_history_.erase(loss_history_.begin()); - } + loss_history_.push_back(loss); + if (static_cast(loss_history_.size()) > WINDOW_SIZE) { + loss_history_.erase(loss_history_.begin()); + } } bool OscillationDetector::detect() const { - return get_score() > POWER_THRESHOLD; + return get_score() > POWER_THRESHOLD; } float OscillationDetector::get_score() const { - if (static_cast(loss_history_.size()) < WINDOW_SIZE) return 0.0f; + if (static_cast(loss_history_.size()) < WINDOW_SIZE) + return 0.0f; - auto detrended = detrend(loss_history_); - auto power = compute_power_spectrum(detrended); + auto detrended = detrend(loss_history_); + auto power = compute_power_spectrum(detrended); - // Sum power in oscillation bands (periods 4-16) - float osc_power = 0.0f; - float total_power = 0.0f; - int n = static_cast(power.size()); - for (int i = 1; i < n; ++i) { - total_power += power[i]; - int period = n / i; - if (period >= MIN_PERIOD && period <= MAX_PERIOD) { - osc_power += power[i]; - } + // Sum power in oscillation bands (periods 4-16) + float osc_power = 0.0f; + float total_power = 0.0f; + int n = static_cast(power.size()); + for (int i = 1; i < n; ++i) { + total_power += power[i]; + int period = n / i; + if (period >= MIN_PERIOD && period <= MAX_PERIOD) { + osc_power += power[i]; } + } - if (total_power < 1e-8f) return 0.0f; - return osc_power / total_power; + if (total_power < 1e-8f) + return 0.0f; + return osc_power / total_power; } float OscillationDetector::get_variance_50() const { - if (loss_history_.size() < 50) return 0.0f; - auto start = loss_history_.end() - 50; - float mean = std::accumulate(start, loss_history_.end(), 0.0f) / 50.0f; - float var = 0.0f; - for (auto it = start; it != loss_history_.end(); ++it) { - float d = *it - mean; - var += d * d; - } - return var / 50.0f; + if (loss_history_.size() < 50) + return 0.0f; + auto start = loss_history_.end() - 50; + float mean = std::accumulate(start, loss_history_.end(), 0.0f) / 50.0f; + float var = 0.0f; + for (auto it = start; it != loss_history_.end(); ++it) { + float d = *it - mean; + var += d * d; + } + return var / 50.0f; } -void OscillationDetector::reset() { - loss_history_.clear(); +void OscillationDetector::reset() { loss_history_.clear(); } + +std::vector +OscillationDetector::detrend(const std::vector &signal) { + int n = static_cast(signal.size()); + if (n < 2) + return signal; + + // Remove linear trend via least squares + float sum_x = 0, sum_y = 0, sum_xy = 0, sum_xx = 0; + for (int i = 0; i < n; ++i) { + sum_x += i; + sum_y += signal[i]; + sum_xy += i * signal[i]; + sum_xx += i * i; + } + float slope = + (n * sum_xy - sum_x * sum_y) / (n * sum_xx - sum_x * sum_x + 1e-8f); + float intercept = (sum_y - slope * sum_x) / n; + + std::vector result(n); + for (int i = 0; i < n; ++i) { + result[i] = signal[i] - (slope * i + intercept); + } + return result; } -std::vector OscillationDetector::detrend(const std::vector& signal) { - int n = static_cast(signal.size()); - if (n < 2) return signal; +std::vector +OscillationDetector::compute_power_spectrum(const std::vector &signal) { + // Simple DFT (for WINDOW_SIZE=64, this is fast enough; upgrade to FFT if + // needed) + int n = static_cast(signal.size()); + int half = n / 2; + std::vector power(half); - // Remove linear trend via least squares - float sum_x = 0, sum_y = 0, sum_xy = 0, sum_xx = 0; - for (int i = 0; i < n; ++i) { - sum_x += i; - sum_y += signal[i]; - sum_xy += i * signal[i]; - sum_xx += i * i; + for (int k = 0; k < half; ++k) { + float re = 0.0f, im = 0.0f; + for (int t = 0; t < n; ++t) { + float angle = 2.0f * 3.14159265358979f * k * t / n; + re += signal[t] * std::cos(angle); + im -= signal[t] * std::sin(angle); } - float slope = (n * sum_xy - sum_x * sum_y) / (n * sum_xx - sum_x * sum_x + 1e-8f); - float intercept = (sum_y - slope * sum_x) / n; - - std::vector result(n); - for (int i = 0; i < n; ++i) { - result[i] = signal[i] - (slope * i + intercept); - } - return result; + power[k] = re * re + im * im; + } + return power; } -std::vector OscillationDetector::compute_power_spectrum(const std::vector& signal) { - // Simple DFT (for WINDOW_SIZE=64, this is fast enough; upgrade to FFT if needed) - int n = static_cast(signal.size()); - int half = n / 2; - std::vector power(half); - - for (int k = 0; k < half; ++k) { - float re = 0.0f, im = 0.0f; - for (int t = 0; t < n; ++t) { - float angle = 2.0f * 3.14159265358979f * k * t / n; - re += signal[t] * std::cos(angle); - im -= signal[t] * std::sin(angle); - } - power[k] = re * re + im * im; - } - return power; -} - -} // namespace fces +} // namespace fces diff --git a/src/population.cpp b/src/population.cpp index f84f547..f7dff2c 100644 --- a/src/population.cpp +++ b/src/population.cpp @@ -4,563 +4,610 @@ namespace fces { -Population::Population( - int active_size, int repo_size, EliteStrategy elite_strategy, - bool link_mutation, bool link_elite, bool link_violator, - bool use_fuzzy_pacer, bool use_fuzzy_importance, - bool direct_construction, bool use_banach_fission -) - : elite_strategy_(elite_strategy), - link_mutation_(link_mutation), - link_elite_(link_elite), - link_violator_(link_violator), +Population::Population(int active_size, int repo_size, + EliteStrategy elite_strategy, bool link_mutation, + bool link_elite, bool link_violator, + bool use_fuzzy_pacer, bool use_fuzzy_importance, + bool direct_construction, bool use_banach_fission) + : elite_strategy_(elite_strategy), link_mutation_(link_mutation), + link_elite_(link_elite), link_violator_(link_violator), use_fuzzy_pacer_(use_fuzzy_pacer), use_fuzzy_importance_(use_fuzzy_importance), direct_construction_(direct_construction), use_banach_fission_(use_banach_fission) { - if (direct_construction) active_size = 1; + if (direct_construction) + active_size = 1; - gladiators_.reserve(active_size); - for (int i = 0; i < active_size; ++i) { - gladiators_.emplace_back(); - } - repository_.reserve(repo_size); + gladiators_.reserve(active_size); + for (int i = 0; i < active_size; ++i) { + gladiators_.emplace_back(); + } + repository_.reserve(repo_size); } -FuzzyController& Population::get_active_controller() { - if (active_controller_ == nullptr || steps_active_ >= selection_interval_) { - active_controller_ = &select_weighted(); - steps_active_ = 0; - } - steps_active_++; - return *active_controller_; +FuzzyController &Population::get_active_controller() { + if (active_controller_ == nullptr || steps_active_ >= selection_interval_) { + active_controller_ = &select_weighted(); + steps_active_ = 0; + } + steps_active_++; + return *active_controller_; } -FuzzyController& Population::select_weighted() { - static thread_local std::mt19937 rng{std::random_device{}()}; +FuzzyController &Population::select_weighted() { + static thread_local std::mt19937 rng{std::random_device{}()}; + if (gladiators_.empty()) { + throw std::runtime_error("Empty gladiators population"); + } + + float sum_fit = 0.0f; + for (const auto &g : gladiators_) { + sum_fit += std::max(0.0f, g.fitness); + } + if (sum_fit == 0.0f) { + std::uniform_int_distribution dist( + 0, static_cast(gladiators_.size()) - 1); + return gladiators_[dist(rng)]; + } + + // Select 3 random candidates for tournament + std::uniform_int_distribution dist( + 0, static_cast(gladiators_.size()) - 1); + int idx1 = dist(rng); + int idx2 = dist(rng); + int idx3 = dist(rng); + + auto get_score = [this](const FuzzyController &c) { + float base_score = c.fitness + (0.01f * static_cast(c.age)); + // Add novelty score if archive has enough entries + if (behavioral_archive_.size() >= 5) { + float novelty = 0.0f; + // Get behavioral vector: first 20 weights + std::vector behavior( + c.genome.weights.begin(), + c.genome.weights.begin() + + std::min(20, static_cast(c.genome.weights.size()))); + std::vector distances; + distances.reserve(behavioral_archive_.size()); + for (const auto &archived : behavioral_archive_) { + float dist_sum = 0.0f; + for (size_t i = 0; i < behavior.size() && i < archived.size(); ++i) { + float diff = behavior[i] - archived[i]; + dist_sum += diff * diff; + } + distances.push_back(std::sqrt(dist_sum)); + } + std::sort(distances.begin(), distances.end()); + int k = std::min(5, static_cast(distances.size())); + float avg_dist = 0.0f; + for (int i = 0; i < k; ++i) { + avg_dist += distances[i]; + } + if (k > 0) + avg_dist /= static_cast(k); + base_score += NOVELTY_WEIGHT * avg_dist; + } + return base_score; + }; + + FuzzyController *best = &gladiators_[idx1]; + float best_score = get_score(*best); + + FuzzyController *cand2 = &gladiators_[idx2]; + float score2 = get_score(*cand2); + if (score2 > best_score) { + best = cand2; + best_score = score2; + } + + FuzzyController *cand3 = &gladiators_[idx3]; + float score3 = get_score(*cand3); + if (score3 > best_score) { + best = cand3; + best_score = score3; + } + + return *best; +} + +FuzzyController &Population::get_best_active() { + return *std::max_element( + gladiators_.begin(), gladiators_.end(), + [](const FuzzyController &a, const FuzzyController &b) { + return a.fitness < b.fitness; + }); +} + +FuzzyController &Population::get_worst_active() { + auto elites = get_elites(); + std::vector non_elites; + for (auto &g : gladiators_) { + bool is_elite = false; + for (auto *e : elites) { + if (e->id == g.id) { + is_elite = true; + break; + } + } + if (!is_elite) { + non_elites.push_back(&g); + } + } + + if (non_elites.empty()) { + return *std::min_element( + gladiators_.begin(), gladiators_.end(), + [](const FuzzyController &a, const FuzzyController &b) { + return a.fitness < b.fitness; + }); + } + + // cppcheck-suppress returnReference; False Positive: elements of non_elites + // point to members of gladiators_ + return **std::min_element( + non_elites.begin(), non_elites.end(), + [](const FuzzyController *a, const FuzzyController *b) { + return a->fitness < b->fitness; + }); +} + +void Population::kill(FuzzyController &controller) { + auto elites = get_elites(); + for (auto *e : elites) { + if (e->id == controller.id) { + return; // Elite protection + } + } + + auto it = std::find_if( + gladiators_.begin(), gladiators_.end(), + [&](const FuzzyController &c) { return c.id == controller.id; }); + if (it != gladiators_.end()) { + gladiators_.erase(it); if (gladiators_.empty()) { - throw std::runtime_error("Empty gladiators population"); + gladiators_.emplace_back(); } - - float sum_fit = 0.0f; - for (const auto& g : gladiators_) { - sum_fit += std::max(0.0f, g.fitness); - } - if (sum_fit == 0.0f) { - std::uniform_int_distribution dist(0, static_cast(gladiators_.size()) - 1); - return gladiators_[dist(rng)]; - } - - // Select 3 random candidates for tournament - std::uniform_int_distribution dist(0, static_cast(gladiators_.size()) - 1); - int idx1 = dist(rng); - int idx2 = dist(rng); - int idx3 = dist(rng); - - auto get_score = [this](const FuzzyController& c) { - float base_score = c.fitness + (0.01f * static_cast(c.age)); - // Add novelty score if archive has enough entries - if (behavioral_archive_.size() >= 5) { - float novelty = 0.0f; - // Get behavioral vector: first 20 weights - std::vector behavior(c.genome.weights.begin(), c.genome.weights.begin() + std::min(20, static_cast(c.genome.weights.size()))); - std::vector distances; - distances.reserve(behavioral_archive_.size()); - for (const auto& archived : behavioral_archive_) { - float dist_sum = 0.0f; - for (size_t i = 0; i < behavior.size() && i < archived.size(); ++i) { - float diff = behavior[i] - archived[i]; - dist_sum += diff * diff; - } - distances.push_back(std::sqrt(dist_sum)); - } - std::sort(distances.begin(), distances.end()); - int k = std::min(5, static_cast(distances.size())); - float avg_dist = 0.0f; - for (int i = 0; i < k; ++i) { - avg_dist += distances[i]; - } - if (k > 0) avg_dist /= static_cast(k); - base_score += NOVELTY_WEIGHT * avg_dist; - } - return base_score; - }; - - FuzzyController* best = &gladiators_[idx1]; - float best_score = get_score(*best); - - FuzzyController* cand2 = &gladiators_[idx2]; - float score2 = get_score(*cand2); - if (score2 > best_score) { - best = cand2; - best_score = score2; - } - - FuzzyController* cand3 = &gladiators_[idx3]; - float score3 = get_score(*cand3); - if (score3 > best_score) { - best = cand3; - best_score = score3; - } - - return *best; + } } -FuzzyController& Population::get_best_active() { - return *std::max_element(gladiators_.begin(), gladiators_.end(), - [](const FuzzyController& a, const FuzzyController& b) { - return a.fitness < b.fitness; - }); +void Population::update_controller_fitness(FuzzyController &controller, + float reward, bool increment_eval) { + if (increment_eval) { + controller.age++; + controller.evaluation_count++; + } + controller.lifetime_fitness += reward; + + // Track in rolling history + constexpr size_t RECENT_WINDOW = 20; + controller.fitness_history.push_back(reward); + if (controller.fitness_history.size() > RECENT_WINDOW) { + controller.fitness_history.erase(controller.fitness_history.begin()); + } + + if (elite_strategy_ == EliteStrategy::EMA) { + constexpr float EMA_ALPHA = 0.1f; + controller.ema_fitness = + (1.0f - EMA_ALPHA) * controller.ema_fitness + EMA_ALPHA * reward; + controller.fitness = reward; + } else if (elite_strategy_ == EliteStrategy::Rolling) { + controller.fitness = reward; + } else { + controller.fitness = reward; + } } -FuzzyController& Population::get_worst_active() { - auto elites = get_elites(); - std::vector non_elites; - for (auto& g : gladiators_) { - bool is_elite = false; - for (auto* e : elites) { - if (e->id == g.id) { - is_elite = true; - break; - } - } - if (!is_elite) { - non_elites.push_back(&g); - } - } - - if (non_elites.empty()) { - return *std::min_element(gladiators_.begin(), gladiators_.end(), - [](const FuzzyController& a, const FuzzyController& b) { - return a.fitness < b.fitness; - }); - } - - return **std::min_element(non_elites.begin(), non_elites.end(), - [](const FuzzyController* a, const FuzzyController* b) { - return a->fitness < b->fitness; - }); +void Population::mark_violated(FuzzyController &controller) { + auto it = std::find_if( + violated_controllers_.begin(), violated_controllers_.end(), + [&](const FuzzyController &c) { return c.id == controller.id; }); + if (it == violated_controllers_.end()) { + violated_controllers_.push_back(controller); + } } -void Population::kill(FuzzyController& controller) { - auto elites = get_elites(); - for (auto* e : elites) { - if (e->id == controller.id) { - return; // Elite protection +float Population::get_effective_fitness(const FuzzyController &controller, + float training_progress) const { + float recent_avg = 0.0f; + if (!controller.fitness_history.empty()) { + float sum = 0.0f; + for (float f : controller.fitness_history) + sum += f; + recent_avg = sum / controller.fitness_history.size(); + } + + float lifetime_avg = 0.0f; + if (controller.evaluation_count > 0) { + lifetime_avg = controller.lifetime_fitness / + static_cast(controller.evaluation_count); + } + + float alpha = 0.2f + 0.6f * training_progress; + return alpha * recent_avg + (1.0f - alpha) * lifetime_avg; +} + +void Population::evolve(float current_loss, float velocity, + float training_progress) { + static thread_local std::mt19937 rng{std::random_device{}()}; + std::uniform_real_distribution coin(0.0f, 1.0f); + + if (gladiators_.empty()) + return; + + FuzzyController &worst = get_worst_active(); + FuzzyController &best_active = get_best_active(); + auto elites = get_elites(); + + // Update behavioral archive for novelty search + if (best_active.fitness > -999.0f) { + std::vector behavior( + best_active.genome.weights.begin(), + best_active.genome.weights.begin() + + std::min(20, static_cast(best_active.genome.weights.size()))); + behavioral_archive_.push_back(behavior); + if (behavioral_archive_.size() > BEHAVIORAL_ARCHIVE_SIZE) { + behavioral_archive_.erase(behavioral_archive_.begin()); + } + } + + // Phase-dependent scheduling + float phase_sigma_mult = 1.0f; + float phase_phoenix_intensity = 1.0f; + if (training_progress < 0.1f) { + phase_sigma_mult = 2.0f; + phase_phoenix_intensity = 1.5f; + } else if (training_progress > 0.7f) { + phase_sigma_mult = 0.5f; + phase_phoenix_intensity = 0.5f; + } + + // Loss-linked mutation rate + float mutation_rate = 0.5f; + if (link_mutation_) { + mutation_rate = std::max(0.05f, std::min(1.0f, current_loss / 5.0f)); + } + mutation_rate *= phase_sigma_mult; + mutation_rate = std::min(1.0f, mutation_rate); + + // Pairing probabilities + float elite_prob = 0.3f; + if (link_elite_) { + elite_prob = std::max(0.2f, std::min(0.8f, 1.0f - current_loss / 5.0f)); + } + float violator_prob = 0.1f; + if (link_violator_) { + violator_prob = + std::max(0.0f, std::min(0.5f, (current_loss - 1.0f) / 4.0f)); + } + + // Select parent + FuzzyController *parent = &best_active; + std::vector partner_pool; + + float roll = coin(rng); + if (roll < elite_prob && !elites.empty()) { + std::uniform_int_distribution elite_dist( + 0, static_cast(elites.size()) - 1); + parent = elites[elite_dist(rng)]; + partner_pool = elites; + } else if (roll < elite_prob + violator_prob && + !violated_controllers_.empty()) { + parent = &best_active; + // Filter living violators + for (auto &v : violated_controllers_) { + for (auto &g : gladiators_) { + if (g.id == v.id) { + partner_pool.push_back(&g); + break; } + } + } + if (partner_pool.empty()) { + // Fallback + for (size_t i = 0; + i < std::min(static_cast(10), gladiators_.size()); ++i) { + partner_pool.push_back(&gladiators_[i]); + } + } + } else { + parent = &best_active; + for (size_t i = 0; + i < std::min(static_cast(10), gladiators_.size()); ++i) { + partner_pool.push_back(&gladiators_[i]); + } + } + + // Crossover or mutation + FuzzyController child; + if (coin(rng) < 0.7f && partner_pool.size() > 1) { + std::uniform_int_distribution pool_dist( + 0, static_cast(partner_pool.size()) - 1); + FuzzyController *partner = partner_pool[pool_dist(rng)]; + if (partner->id == parent->id) { + // Pick another if possible + for (auto *p : partner_pool) { + if (p->id != parent->id) { + partner = p; + break; + } + } + } + child = parent->crossover(*partner, false); + } else { + float sigma_mod = global_sigma_modifier_ * mutation_rate; + if (velocity < -0.05f) { + sigma_mod *= 1.0f / (1.0f + std::abs(velocity) * 10.0f); + } + // Epigenetic lock + if (parent->fitness > 0.5f) { + sigma_mod *= 0.1f; + } + child = parent->mutate(current_loss, sigma_mod); + } + + // Recover temperature + global_sigma_modifier_ = std::min(1.0f, global_sigma_modifier_ * 1.01f); + + // Fuzzy Pacer + if (use_fuzzy_pacer_) { + fitness_history_.push_back(best_active.fitness); + if (fitness_history_.size() > 20) { + fitness_history_.erase(fitness_history_.begin()); + } + if (fitness_history_.size() >= 10) { + float trend = 0.0f; + for (size_t i = 1; i < fitness_history_.size(); ++i) { + trend += (fitness_history_[i] - fitness_history_[i - 1]); + } + trend /= (fitness_history_.size() - 1); + float diversity = get_diversity_index(); + if (trend < 0.001f && diversity < 0.2f) { + global_sigma_modifier_ = std::min(5.0f, global_sigma_modifier_ * 1.2f); + } else if (trend > 0.01f) { + global_sigma_modifier_ = std::max(0.1f, global_sigma_modifier_ * 0.95f); + } + } + } + + // Banach-Tarski Fission + if (use_banach_fission_ && coin(rng) < 0.2f && !elites.empty()) { + auto *prime_elite = elites[0]; + auto fission_pair = + prime_elite->banach_tarski_fission(phase_phoenix_intensity); + + // Find second worst + FuzzyController *second_worst = nullptr; + for (auto &g : gladiators_) { + if (g.id != worst.id) { + if (second_worst == nullptr || g.fitness < second_worst->fitness) { + second_worst = &g; + } + } } - auto it = std::find_if(gladiators_.begin(), gladiators_.end(), - [&](const FuzzyController& c) { return c.id == controller.id; }); - if (it != gladiators_.end()) { + // Replace worst and second_worst with plus and minus child + if (second_worst) { + uint64_t sw_id = second_worst->id; + auto it = + std::find_if(gladiators_.begin(), gladiators_.end(), + [&](const FuzzyController &c) { return c.id == sw_id; }); + if (it != gladiators_.end()) { gladiators_.erase(it); - if (gladiators_.empty()) { - gladiators_.emplace_back(); - } - } -} - -void Population::update_controller_fitness(FuzzyController& controller, float reward, bool increment_eval) { - if (increment_eval) { - controller.age++; - controller.evaluation_count++; - } - controller.lifetime_fitness += reward; - - // Track in rolling history - constexpr size_t RECENT_WINDOW = 20; - controller.fitness_history.push_back(reward); - if (controller.fitness_history.size() > RECENT_WINDOW) { - controller.fitness_history.erase(controller.fitness_history.begin()); + } + gladiators_.push_back(fission_pair.first); } - if (elite_strategy_ == EliteStrategy::EMA) { - constexpr float EMA_ALPHA = 0.1f; - controller.ema_fitness = (1.0f - EMA_ALPHA) * controller.ema_fitness + EMA_ALPHA * reward; - controller.fitness = reward; - } else if (elite_strategy_ == EliteStrategy::Rolling) { - controller.fitness = reward; + uint64_t w_id = worst.id; + auto it = + std::find_if(gladiators_.begin(), gladiators_.end(), + [&](const FuzzyController &c) { return c.id == w_id; }); + if (it != gladiators_.end()) { + gladiators_.erase(it); + } + gladiators_.push_back(fission_pair.second); + } else { + // Phoenix Rebirth or Standard replacement + uint64_t w_id = worst.id; + auto it = + std::find_if(gladiators_.begin(), gladiators_.end(), + [&](const FuzzyController &c) { return c.id == w_id; }); + if (it != gladiators_.end()) { + gladiators_.erase(it); + } + + if (coin(rng) < 0.1f && !elites.empty()) { + auto *prime_elite = elites[0]; + gladiators_.push_back( + prime_elite->create_orthogonal_child(phase_phoenix_intensity)); } else { - controller.fitness = reward; + gladiators_.push_back(child); } -} + } -void Population::mark_violated(FuzzyController& controller) { - auto it = std::find_if(violated_controllers_.begin(), violated_controllers_.end(), - [&](const FuzzyController& c) { return c.id == controller.id; }); - if (it == violated_controllers_.end()) { - violated_controllers_.push_back(controller); + // Periodic Reset + if (elite_strategy_ == EliteStrategy::Reset) { + reset_step_counter_++; + if (reset_step_counter_ >= 500) { + reset_step_counter_ = 0; + for (auto &g : gladiators_) { + g.fitness = 0.0f; + g.ema_fitness = 0.0f; + g.fitness_history.clear(); + } } -} + } -float Population::get_effective_fitness(const FuzzyController& controller, float training_progress) const { - float recent_avg = 0.0f; - if (!controller.fitness_history.empty()) { - float sum = 0.0f; - for (float f : controller.fitness_history) sum += f; - recent_avg = sum / controller.fitness_history.size(); - } - - float lifetime_avg = 0.0f; - if (controller.evaluation_count > 0) { - lifetime_avg = controller.lifetime_fitness / static_cast(controller.evaluation_count); - } - - float alpha = 0.2f + 0.6f * training_progress; - return alpha * recent_avg + (1.0f - alpha) * lifetime_avg; -} - -void Population::evolve(float current_loss, float velocity, float training_progress) { - static thread_local std::mt19937 rng{std::random_device{}()}; - std::uniform_real_distribution coin(0.0f, 1.0f); - - if (gladiators_.empty()) return; - - FuzzyController& worst = get_worst_active(); - FuzzyController& best_active = get_best_active(); - auto elites = get_elites(); - - // Update behavioral archive for novelty search - if (best_active.fitness > -999.0f) { - std::vector behavior(best_active.genome.weights.begin(), best_active.genome.weights.begin() + std::min(20, static_cast(best_active.genome.weights.size()))); - behavioral_archive_.push_back(behavior); - if (behavioral_archive_.size() > BEHAVIORAL_ARCHIVE_SIZE) { - behavioral_archive_.erase(behavioral_archive_.begin()); - } - } - - // Phase-dependent scheduling - float phase_sigma_mult = 1.0f; - float phase_phoenix_intensity = 1.0f; - if (training_progress < 0.1f) { - phase_sigma_mult = 2.0f; - phase_phoenix_intensity = 1.5f; - } else if (training_progress > 0.7f) { - phase_sigma_mult = 0.5f; - phase_phoenix_intensity = 0.5f; - } - - // Loss-linked mutation rate - float mutation_rate = 0.5f; - if (link_mutation_) { - mutation_rate = std::max(0.05f, std::min(1.0f, current_loss / 5.0f)); - } - mutation_rate *= phase_sigma_mult; - mutation_rate = std::min(1.0f, mutation_rate); - - // Pairing probabilities - float elite_prob = 0.3f; - if (link_elite_) { - elite_prob = std::max(0.2f, std::min(0.8f, 1.0f - current_loss / 5.0f)); - } - float violator_prob = 0.1f; - if (link_violator_) { - violator_prob = std::max(0.0f, std::min(0.5f, (current_loss - 1.0f) / 4.0f)); - } - - // Select parent - FuzzyController* parent = &best_active; - std::vector partner_pool; - - float roll = coin(rng); - if (roll < elite_prob && !elites.empty()) { - std::uniform_int_distribution elite_dist(0, static_cast(elites.size()) - 1); - parent = elites[elite_dist(rng)]; - partner_pool = elites; - } else if (roll < elite_prob + violator_prob && !violated_controllers_.empty()) { - parent = &best_active; - // Filter living violators - for (auto& v : violated_controllers_) { - for (auto& g : gladiators_) { - if (g.id == v.id) { - partner_pool.push_back(&g); - break; - } - } - } - if (partner_pool.empty()) { - // Fallback - for (size_t i = 0; i < std::min(static_cast(10), gladiators_.size()); ++i) { - partner_pool.push_back(&gladiators_[i]); - } - } - } else { - parent = &best_active; - for (size_t i = 0; i < std::min(static_cast(10), gladiators_.size()); ++i) { - partner_pool.push_back(&gladiators_[i]); - } - } - - // Crossover or mutation - FuzzyController child; - if (coin(rng) < 0.7f && partner_pool.size() > 1) { - std::uniform_int_distribution pool_dist(0, static_cast(partner_pool.size()) - 1); - FuzzyController* partner = partner_pool[pool_dist(rng)]; - if (partner->id == parent->id) { - // Pick another if possible - for (auto* p : partner_pool) { - if (p->id != parent->id) { - partner = p; - break; - } - } - } - child = parent->crossover(*partner, false); - } else { - float sigma_mod = global_sigma_modifier_ * mutation_rate; - if (velocity < -0.05f) { - sigma_mod *= 1.0f / (1.0f + std::abs(velocity) * 10.0f); - } - // Epigenetic lock - if (parent->fitness > 0.5f) { - sigma_mod *= 0.1f; - } - child = parent->mutate(current_loss, sigma_mod); - } - - // Recover temperature - global_sigma_modifier_ = std::min(1.0f, global_sigma_modifier_ * 1.01f); - - // Fuzzy Pacer - if (use_fuzzy_pacer_) { - fitness_history_.push_back(best_active.fitness); - if (fitness_history_.size() > 20) { - fitness_history_.erase(fitness_history_.begin()); - } - if (fitness_history_.size() >= 10) { - float trend = 0.0f; - for (size_t i = 1; i < fitness_history_.size(); ++i) { - trend += (fitness_history_[i] - fitness_history_[i - 1]); - } - trend /= (fitness_history_.size() - 1); - float diversity = get_diversity_index(); - if (trend < 0.001f && diversity < 0.2f) { - global_sigma_modifier_ = std::min(5.0f, global_sigma_modifier_ * 1.2f); - } else if (trend > 0.01f) { - global_sigma_modifier_ = std::max(0.1f, global_sigma_modifier_ * 0.95f); - } - } - } - - // Banach-Tarski Fission - if (use_banach_fission_ && coin(rng) < 0.2f && !elites.empty()) { - auto* prime_elite = elites[0]; - auto fission_pair = prime_elite->banach_tarski_fission(phase_phoenix_intensity); - - // Find second worst - FuzzyController* second_worst = nullptr; - for (auto& g : gladiators_) { - if (g.id != worst.id) { - if (second_worst == nullptr || g.fitness < second_worst->fitness) { - second_worst = &g; - } - } - } - - // Replace worst and second_worst with plus and minus child - if (second_worst) { - uint64_t sw_id = second_worst->id; - auto it = std::find_if(gladiators_.begin(), gladiators_.end(), [&](const FuzzyController& c) { return c.id == sw_id; }); - if (it != gladiators_.end()) { - gladiators_.erase(it); - } - gladiators_.push_back(fission_pair.first); - } - - uint64_t w_id = worst.id; - auto it = std::find_if(gladiators_.begin(), gladiators_.end(), [&](const FuzzyController& c) { return c.id == w_id; }); - if (it != gladiators_.end()) { - gladiators_.erase(it); - } - gladiators_.push_back(fission_pair.second); - } else { - // Phoenix Rebirth or Standard replacement - uint64_t w_id = worst.id; - auto it = std::find_if(gladiators_.begin(), gladiators_.end(), [&](const FuzzyController& c) { return c.id == w_id; }); - if (it != gladiators_.end()) { - gladiators_.erase(it); - } - - if (coin(rng) < 0.1f && !elites.empty()) { - auto* prime_elite = elites[0]; - gladiators_.push_back(prime_elite->create_orthogonal_child(phase_phoenix_intensity)); - } else { - gladiators_.push_back(child); - } - } - - // Periodic Reset - if (elite_strategy_ == EliteStrategy::Reset) { - reset_step_counter_++; - if (reset_step_counter_ >= 500) { - reset_step_counter_ = 0; - for (auto& g : gladiators_) { - g.fitness = 0.0f; - g.ema_fitness = 0.0f; - g.fitness_history.clear(); - } - } - } - - // Archive best - if (best_active.fitness > -999.0f) { - add_to_repository(best_active); - } + // Archive best + if (best_active.fitness > -999.0f) { + add_to_repository(best_active); + } } void Population::resize(int target_size, float training_progress) { - int current_size = static_cast(gladiators_.size()); - if (current_size == target_size) return; + int current_size = static_cast(gladiators_.size()); + if (current_size == target_size) + return; - static thread_local std::mt19937 rng{std::random_device{}()}; + static thread_local std::mt19937 rng{std::random_device{}()}; - if (current_size < target_size) { - int needed = target_size - current_size; - bool has_eval = false; - for (const auto& g : gladiators_) { - if (g.evaluation_count > 0) { - has_eval = true; - break; - } - } - if (has_eval) { - std::vector> candidates; - for (auto& g : gladiators_) { - candidates.push_back({get_effective_fitness(g, training_progress), &g}); - } - std::sort(candidates.begin(), candidates.end(), - [](const std::pair& a, const std::pair& b) { - return a.first > b.first; - }); - - int limit = std::min(10, static_cast(candidates.size())); - std::uniform_int_distribution cand_dist(0, limit - 1); - for (int i = 0; i < needed; ++i) { - FuzzyController* parent = candidates[cand_dist(rng)].second; - float mutation_str = 0.1f; - auto child = parent->mutate(mutation_str, 1.0f); - - float stability = 1.0f - std::min(1.0f, mutation_str); - std::uniform_real_distribution noise_dist(-0.1f, 0.1f); - float noise = noise_dist(rng) * std::abs(parent->fitness); - child.fitness = parent->fitness * stability + noise; - - gladiators_.push_back(child); - } - } else { - for (int i = 0; i < needed; ++i) { - gladiators_.emplace_back(); - } - } - } else { - std::vector evaluated; - std::vector unevaluated; - for (auto& g : gladiators_) { - if (g.evaluation_count > 0) { - evaluated.push_back(&g); - } else { - unevaluated.push_back(&g); - } - } - - std::sort(evaluated.begin(), evaluated.end(), - [this, training_progress](const FuzzyController* a, const FuzzyController* b) { - return get_effective_fitness(*a, training_progress) > get_effective_fitness(*b, training_progress); - }); - - std::vector new_pop; - new_pop.reserve(target_size); - for (int i = 0; i < std::min(target_size, static_cast(evaluated.size())); ++i) { - new_pop.push_back(*evaluated[i]); - } - int remaining = target_size - static_cast(new_pop.size()); - for (int i = 0; i < std::min(remaining, static_cast(unevaluated.size())); ++i) { - new_pop.push_back(*unevaluated[i]); - } - - gladiators_ = std::move(new_pop); + if (current_size < target_size) { + int needed = target_size - current_size; + bool has_eval = false; + for (const auto &g : gladiators_) { + if (g.evaluation_count > 0) { + has_eval = true; + break; + } } + if (has_eval) { + std::vector> candidates; + for (auto &g : gladiators_) { + candidates.push_back({get_effective_fitness(g, training_progress), &g}); + } + std::sort(candidates.begin(), candidates.end(), + [](const std::pair &a, + const std::pair &b) { + return a.first > b.first; + }); + + int limit = std::min(10, static_cast(candidates.size())); + std::uniform_int_distribution cand_dist(0, limit - 1); + for (int i = 0; i < needed; ++i) { + FuzzyController *parent = candidates[cand_dist(rng)].second; + float mutation_str = 0.1f; + auto child = parent->mutate(mutation_str, 1.0f); + + float stability = 1.0f - std::min(1.0f, mutation_str); + std::uniform_real_distribution noise_dist(-0.1f, 0.1f); + float noise = noise_dist(rng) * std::abs(parent->fitness); + child.fitness = parent->fitness * stability + noise; + + gladiators_.push_back(child); + } + } else { + for (int i = 0; i < needed; ++i) { + gladiators_.emplace_back(); + } + } + } else { + std::vector evaluated; + std::vector unevaluated; + for (auto &g : gladiators_) { + if (g.evaluation_count > 0) { + evaluated.push_back(&g); + } else { + unevaluated.push_back(&g); + } + } + + std::sort(evaluated.begin(), evaluated.end(), + [this, training_progress](const FuzzyController *a, + const FuzzyController *b) { + return get_effective_fitness(*a, training_progress) > + get_effective_fitness(*b, training_progress); + }); + + std::vector new_pop; + new_pop.reserve(target_size); + for (int i = 0; + i < std::min(target_size, static_cast(evaluated.size())); ++i) { + new_pop.push_back(*evaluated[i]); + } + int remaining = target_size - static_cast(new_pop.size()); + for (int i = 0; + i < std::min(remaining, static_cast(unevaluated.size())); ++i) { + new_pop.push_back(*unevaluated[i]); + } + + gladiators_ = std::move(new_pop); + } } void Population::calm_down() { - global_sigma_modifier_ *= 0.8f; - global_sigma_modifier_ = std::max(0.1f, global_sigma_modifier_); + global_sigma_modifier_ *= 0.8f; + global_sigma_modifier_ = std::max(0.1f, global_sigma_modifier_); } float Population::get_diversity_index() const { - if (gladiators_.size() < 2) return 0.0f; - float sum_dist = 0.0f; - int count = 0; - for (size_t i = 0; i < gladiators_.size(); ++i) { - for (size_t j = i + 1; j < gladiators_.size(); ++j) { - float dist_sq = 0.0f; - for (size_t w = 0; w < GENOME_SIZE; ++w) { - float diff = gladiators_[i].genome.weights[w] - gladiators_[j].genome.weights[w]; - dist_sq += diff * diff; - } - sum_dist += std::sqrt(dist_sq); - count++; - } + if (gladiators_.size() < 2) + return 0.0f; + float sum_dist = 0.0f; + int count = 0; + for (size_t i = 0; i < gladiators_.size(); ++i) { + for (size_t j = i + 1; j < gladiators_.size(); ++j) { + float dist_sq = 0.0f; + for (size_t w = 0; w < GENOME_SIZE; ++w) { + float diff = + gladiators_[i].genome.weights[w] - gladiators_[j].genome.weights[w]; + dist_sq += diff * diff; + } + sum_dist += std::sqrt(dist_sq); + count++; } - return sum_dist / static_cast(count); + } + return sum_dist / static_cast(count); } -std::vector Population::get_elites() { - if (gladiators_.size() <= static_cast(ELITE_COUNT)) { - std::vector ptrs; - ptrs.reserve(gladiators_.size()); - for (auto& g : gladiators_) { - ptrs.push_back(&g); - } - return ptrs; +std::vector Population::get_elites() { + if (gladiators_.size() <= static_cast(ELITE_COUNT)) { + std::vector ptrs; + ptrs.reserve(gladiators_.size()); + for (auto &g : gladiators_) { + ptrs.push_back(&g); } + return ptrs; + } - std::vector> candidates; - candidates.reserve(gladiators_.size()); - for (auto& g : gladiators_) { - float effective_fitness = 0.0f; - if (elite_strategy_ == EliteStrategy::AgePenalty) { - effective_fitness = g.fitness / std::log(static_cast(g.age) + 2.0f); - } else if (elite_strategy_ == EliteStrategy::EMA) { - effective_fitness = g.ema_fitness; - } else if (elite_strategy_ == EliteStrategy::Rolling) { - if (!g.fitness_history.empty()) { - float sum = 0.0f; - for (float f : g.fitness_history) sum += f; - effective_fitness = sum / g.fitness_history.size(); - } else { - effective_fitness = g.fitness; - } - } else { - effective_fitness = g.fitness; - } - candidates.push_back({effective_fitness, &g}); + std::vector> candidates; + candidates.reserve(gladiators_.size()); + for (auto &g : gladiators_) { + float effective_fitness = 0.0f; + if (elite_strategy_ == EliteStrategy::AgePenalty) { + effective_fitness = + g.fitness / std::log(static_cast(g.age) + 2.0f); + } else if (elite_strategy_ == EliteStrategy::EMA) { + effective_fitness = g.ema_fitness; + } else if (elite_strategy_ == EliteStrategy::Rolling) { + if (!g.fitness_history.empty()) { + float sum = 0.0f; + for (float f : g.fitness_history) + sum += f; + effective_fitness = sum / g.fitness_history.size(); + } else { + effective_fitness = g.fitness; + } + } else { + effective_fitness = g.fitness; } + candidates.push_back({effective_fitness, &g}); + } - std::sort(candidates.begin(), candidates.end(), - [](const std::pair& a, const std::pair& b) { - return a.first > b.first; - }); + std::sort(candidates.begin(), candidates.end(), + [](const std::pair &a, + const std::pair &b) { + return a.first > b.first; + }); - std::vector elites; - elites.reserve(ELITE_COUNT); - for (int i = 0; i < ELITE_COUNT; ++i) { - elites.push_back(candidates[i].second); - } - return elites; + std::vector elites; + elites.reserve(ELITE_COUNT); + for (int i = 0; i < ELITE_COUNT; ++i) { + elites.push_back(candidates[i].second); + } + return elites; } -void Population::add_to_repository(const FuzzyController& controller) { - auto it = std::lower_bound(repository_.begin(), repository_.end(), controller, - [](const FuzzyController& a, const FuzzyController& b) { - return a.fitness > b.fitness; - }); - repository_.insert(it, controller); +void Population::add_to_repository(const FuzzyController &controller) { + auto it = + std::lower_bound(repository_.begin(), repository_.end(), controller, + [](const FuzzyController &a, const FuzzyController &b) { + return a.fitness > b.fitness; + }); + repository_.insert(it, controller); - if (repository_.size() > 1000) { - repository_.resize(1000); - } + if (repository_.size() > 1000) { + repository_.resize(1000); + } } -} // namespace fces +} // namespace fces diff --git a/src/spectral.cpp b/src/spectral.cpp index 7e339fd..7066620 100644 --- a/src/spectral.cpp +++ b/src/spectral.cpp @@ -3,39 +3,40 @@ namespace fces { -SpectralSensor::SpectralSensor(torch::nn::Module& /*model*/) {} +SpectralSensor::SpectralSensor(torch::nn::Module & /*model*/) {} -void SpectralSensor::track_layer(const std::string& name, const torch::Tensor& weight) { - if (weight.dim() >= 2) { - layer_ranks_[name] = compute_effective_rank(weight); - } +void SpectralSensor::track_layer(const std::string &name, + const torch::Tensor &weight) { + if (weight.dim() >= 2) { + layer_ranks_[name] = compute_effective_rank(weight); + } } float SpectralSensor::get_global_rank() const { - if (layer_ranks_.empty()) return 0.0f; - float sum = 0.0f; - for (const auto& [_, rank] : layer_ranks_) { - sum += rank; - } - return sum / static_cast(layer_ranks_.size()); + if (layer_ranks_.empty()) + return 0.0f; + float sum = 0.0f; + for (const auto &[_, rank] : layer_ranks_) { + sum += rank; + } + return sum / static_cast(layer_ranks_.size()); } -void SpectralSensor::reset() { - layer_ranks_.clear(); +void SpectralSensor::reset() { layer_ranks_.clear(); } + +float SpectralSensor::compute_effective_rank(const torch::Tensor &weight) { + // SVD-based effective rank (Shannon entropy of normalized singular values) + auto svd_result = torch::svd(weight.to(torch::kFloat32)); + auto svd = std::get<1>(svd_result); + auto s = svd / svd.sum(); + auto log_s = (s + 1e-10f).log(); + float entropy = -s.mul(log_s).sum().item(); + return std::exp(entropy); } -float SpectralSensor::compute_effective_rank(const torch::Tensor& weight) { - // SVD-based effective rank (Shannon entropy of normalized singular values) - auto svd_result = torch::svd(weight.to(torch::kFloat32)); - auto svd = std::get<1>(svd_result); - auto s = svd / svd.sum(); - auto log_s = (s + 1e-10f).log(); - float entropy = -s.mul(log_s).sum().item(); - return std::exp(entropy); +float SpectralController::compute_alpha(float global_rank, + float grokking_coefficient) const { + return global_rank * grokking_coefficient; } -float SpectralController::compute_alpha(float global_rank, float grokking_coefficient) const { - return global_rank * grokking_coefficient; -} - -} // namespace fces +} // namespace fces diff --git a/src/telemetry.cpp b/src/telemetry.cpp index 57430b7..5568889 100644 --- a/src/telemetry.cpp +++ b/src/telemetry.cpp @@ -1,34 +1,37 @@ #include "fces/telemetry.hpp" -#include #include +#include namespace fces { -Telemetry& Telemetry::get() { - static Telemetry instance; - return instance; +Telemetry &Telemetry::get() { + static Telemetry instance; + return instance; } -void Telemetry::info(const std::string& event, const std::string& detail) { - std::cout << "[INFO] " << event; - if (!detail.empty()) std::cout << " | " << detail; - std::cout << std::endl; +void Telemetry::info(const std::string &event, const std::string &detail) { + std::cout << "[INFO] " << event; + if (!detail.empty()) + std::cout << " | " << detail; + std::cout << std::endl; } -void Telemetry::warning(const std::string& event, const std::string& detail) { - std::cerr << "[WARN] " << event; - if (!detail.empty()) std::cerr << " | " << detail; - std::cerr << std::endl; +void Telemetry::warning(const std::string &event, const std::string &detail) { + std::cerr << "[WARN] " << event; + if (!detail.empty()) + std::cerr << " | " << detail; + std::cerr << std::endl; } -void Telemetry::error(const std::string& event, const std::string& detail) { - std::cerr << "[ERROR] " << event; - if (!detail.empty()) std::cerr << " | " << detail; - std::cerr << std::endl; +void Telemetry::error(const std::string &event, const std::string &detail) { + std::cerr << "[ERROR] " << event; + if (!detail.empty()) + std::cerr << " | " << detail; + std::cerr << std::endl; } void Telemetry::push_to_remote() { - // TODO: Implement telemetry push (Git sync, file export, etc.) + // TODO: Implement telemetry push (Git sync, file export, etc.) } -} // namespace fces +} // namespace fces diff --git a/tests/test_controller.cpp b/tests/test_controller.cpp index 8041acb..5147a83 100644 --- a/tests/test_controller.cpp +++ b/tests/test_controller.cpp @@ -1,58 +1,59 @@ -#include #include "fces/controller.hpp" +#include using namespace fces; TEST(ControllerTest, Construction) { - FuzzyController ctrl; - EXPECT_GT(ctrl.id, 0u); - EXPECT_EQ(ctrl.fitness, 0.0f); - EXPECT_EQ(ctrl.origin, "random"); + FuzzyController ctrl; + EXPECT_GT(ctrl.id, 0u); + EXPECT_EQ(ctrl.fitness, 0.0f); + EXPECT_EQ(ctrl.origin, "random"); } TEST(ControllerTest, GenomeSize) { - FuzzyController ctrl; - EXPECT_EQ(ctrl.genome.weights.size(), static_cast(GENOME_SIZE)); + FuzzyController ctrl; + EXPECT_EQ(ctrl.genome.weights.size(), static_cast(GENOME_SIZE)); } TEST(ControllerTest, Mutation) { - FuzzyController parent; - auto child = parent.mutate(1.0f); - EXPECT_NE(child.id, parent.id); - EXPECT_EQ(child.origin, "mutation"); - // Child should differ from parent - bool differs = false; - for (size_t i = 0; i < parent.genome.weights.size(); ++i) { - if (parent.genome.weights[i] != child.genome.weights[i]) { - differs = true; - break; - } + FuzzyController parent; + auto child = parent.mutate(1.0f); + EXPECT_NE(child.id, parent.id); + EXPECT_EQ(child.origin, "mutation"); + // Child should differ from parent + bool differs = false; + for (size_t i = 0; i < parent.genome.weights.size(); ++i) { + if (parent.genome.weights[i] != child.genome.weights[i]) { + differs = true; + break; } - EXPECT_TRUE(differs); + } + EXPECT_TRUE(differs); } TEST(ControllerTest, Crossover) { - FuzzyController a, b; - auto child = a.crossover(b); - EXPECT_EQ(child.origin, "crossover"); + FuzzyController a, b; + auto child = a.crossover(b); + EXPECT_EQ(child.origin, "crossover"); } TEST(ControllerTest, DecideUpdate) { - FuzzyController ctrl; - std::vector> stats = {{0.1f, 0.2f, 0.3f, 0.4f, 0.5f}}; - auto actions = ctrl.decide_update(stats, 0.0f, 0.5f, 0.0f, 0.1f, 0.0f, 0.0f, 1.0f, 0.0f); - EXPECT_EQ(actions.size(0), 1); - EXPECT_EQ(actions.size(1), GENOME_OUTPUT_DIM); + FuzzyController ctrl; + std::vector> stats = {{0.1f, 0.2f, 0.3f, 0.4f, 0.5f}}; + auto actions = + ctrl.decide_update(stats, 0.0f, 0.5f, 0.0f, 0.1f, 0.0f, 0.0f, 1.0f, 0.0f); + EXPECT_EQ(actions.size(0), 1); + EXPECT_EQ(actions.size(1), GENOME_OUTPUT_DIM); } TEST(ControllerTest, OrthogonalChild) { - FuzzyController parent; - auto child = parent.create_orthogonal_child(1.0f); - EXPECT_EQ(child.origin, "phoenix_rebirth"); + FuzzyController parent; + auto child = parent.create_orthogonal_child(1.0f); + EXPECT_EQ(child.origin, "phoenix_rebirth"); } TEST(ControllerTest, BanachFission) { - FuzzyController parent; - auto [plus, minus] = parent.banach_tarski_fission(1.0f); - EXPECT_NE(plus.id, minus.id); + FuzzyController parent; + auto [plus, minus] = parent.banach_tarski_fission(1.0f); + EXPECT_NE(plus.id, minus.id); } diff --git a/tests/test_fitness.cpp b/tests/test_fitness.cpp index 078352e..8106a14 100644 --- a/tests/test_fitness.cpp +++ b/tests/test_fitness.cpp @@ -1,33 +1,34 @@ -#include #include "fces/fitness.hpp" +#include using namespace fces; TEST(RunningStatsTest, BasicUpdate) { - RunningStats stats; - stats.update(1.0f); - stats.update(2.0f); - stats.update(3.0f); - EXPECT_NEAR(stats.get_mean(), 2.0f, 1e-5f); - EXPECT_GT(stats.get_std(), 0.0f); + RunningStats stats; + stats.update(1.0f); + stats.update(2.0f); + stats.update(3.0f); + EXPECT_NEAR(stats.get_mean(), 2.0f, 1e-5f); + EXPECT_GT(stats.get_std(), 0.0f); } TEST(RunningStatsTest, ZScore) { - RunningStats stats; - for (int i = 0; i < 100; ++i) stats.update(static_cast(i)); - float z = stats.z_score(50.0f); - EXPECT_NEAR(z, 0.0f, 0.1f); + RunningStats stats; + for (int i = 0; i < 100; ++i) + stats.update(static_cast(i)); + float z = stats.z_score(50.0f); + EXPECT_NEAR(z, 0.0f, 0.1f); } TEST(FitnessEngineTest, LossSignal) { - FitnessEngine engine; - float sig = engine.calculate_loss_signal(1.0f, 2.0f, "relative"); - EXPECT_LT(sig, 0.0f); // Improving + FitnessEngine engine; + float sig = engine.calculate_loss_signal(1.0f, 2.0f, "relative"); + EXPECT_LT(sig, 0.0f); // Improving } TEST(FitnessEngineTest, KZMDamping) { - FitnessEngine engine(0.1f); - float d = engine.compute_kzm_damping(5.0f); - EXPECT_GT(d, 0.0f); - EXPECT_LT(d, 1.0f); + FitnessEngine engine(0.1f); + float d = engine.compute_kzm_damping(5.0f); + EXPECT_GT(d, 0.0f); + EXPECT_LT(d, 1.0f); } diff --git a/tests/test_optimizer.cpp b/tests/test_optimizer.cpp index 9cbd96e..7135f74 100644 --- a/tests/test_optimizer.cpp +++ b/tests/test_optimizer.cpp @@ -1,42 +1,45 @@ +#include "fces/optimizer.hpp" #include #include -#include "fces/optimizer.hpp" using namespace fces; TEST(OptimizerTest, Construction) { - auto model = torch::nn::Linear(10, 5); - std::vector params; - for (auto& p : model->parameters()) params.push_back(p); + auto model = torch::nn::Linear(10, 5); + std::vector params; + for (auto &p : model->parameters()) + params.push_back(p); - FCESOptimizer opt(params, FCESConfig{}.set_lr(1e-3f)); - EXPECT_EQ(opt.step_count(), 0); + FCESOptimizer opt(params, FCESConfig{}.set_lr(1e-3f)); + EXPECT_EQ(opt.step_count(), 0); } TEST(OptimizerTest, StepUpdatesCounter) { - auto model = torch::nn::Linear(10, 5); - std::vector params; - for (auto& p : model->parameters()) params.push_back(p); + auto model = torch::nn::Linear(10, 5); + std::vector params; + for (auto &p : model->parameters()) + params.push_back(p); - FCESOptimizer opt(params, FCESConfig{}.set_lr(1e-3f)); + FCESOptimizer opt(params, FCESConfig{}.set_lr(1e-3f)); - // Simulate a training step - auto x = torch::randn({2, 10}); - auto y = model->forward(x); - auto loss = y.sum(); - loss.backward(); - opt.step(); + // Simulate a training step + auto x = torch::randn({2, 10}); + auto y = model->forward(x); + auto loss = y.sum(); + loss.backward(); + opt.step(); - EXPECT_EQ(opt.step_count(), 1); + EXPECT_EQ(opt.step_count(), 1); } TEST(OptimizerTest, UpdateFitness) { - auto model = torch::nn::Linear(10, 5); - std::vector params; - for (auto& p : model->parameters()) params.push_back(p); + auto model = torch::nn::Linear(10, 5); + std::vector params; + for (auto &p : model->parameters()) + params.push_back(p); - FCESOptimizer opt(params); - opt.update_fitness(3.0f); - opt.update_fitness(2.5f); - // Should not crash + FCESOptimizer opt(params); + opt.update_fitness(3.0f); + opt.update_fitness(2.5f); + // Should not crash } diff --git a/tests/test_population.cpp b/tests/test_population.cpp index d56559c..e49e7da 100644 --- a/tests/test_population.cpp +++ b/tests/test_population.cpp @@ -1,28 +1,28 @@ -#include #include "fces/population.hpp" +#include using namespace fces; TEST(PopulationTest, Construction) { - Population pop(50); - EXPECT_EQ(pop.size(), 50); + Population pop(50); + EXPECT_EQ(pop.size(), 50); } TEST(PopulationTest, DirectConstruction) { - Population pop(200, 10000, EliteStrategy::Cumulative, - false, false, false, false, false, true); - EXPECT_EQ(pop.size(), 1); + Population pop(200, 10000, EliteStrategy::Cumulative, false, false, false, + false, false, true); + EXPECT_EQ(pop.size(), 1); } TEST(PopulationTest, GetBestActive) { - Population pop(10); - auto& best = pop.get_best_active(); - // Should not crash - EXPECT_GE(best.id, 0u); + Population pop(10); + auto &best = pop.get_best_active(); + // Should not crash + EXPECT_GE(best.id, 0u); } TEST(PopulationTest, CalmDown) { - Population pop(10); - pop.calm_down(); - EXPECT_LT(pop.global_sigma_modifier(), 1.0f); + Population pop(10); + pop.calm_down(); + EXPECT_LT(pop.global_sigma_modifier(), 1.0f); }