style: run clang-format and configure pre-commit hooks
This commit is contained in:
51
.pre-commit-config.yaml
Normal file
51
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,51 @@
|
||||
# Pre-commit configuration for FCES-native
|
||||
repos:
|
||||
# 1. Standard hooks for general file cleanliness
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.6.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
- id: end-of-file-fixer
|
||||
- id: check-yaml
|
||||
- id: check-added-large-files
|
||||
|
||||
# 2. C++ Formatting using clang-format (fetched dynamically)
|
||||
- repo: https://github.com/pre-commit/mirrors-clang-format
|
||||
rev: v18.1.5
|
||||
hooks:
|
||||
- id: clang-format
|
||||
types_or: [c++, c]
|
||||
|
||||
# 3. C++ Static Analysis using local cppcheck
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: cppcheck
|
||||
name: cppcheck
|
||||
entry: cppcheck
|
||||
language: system
|
||||
types_or: [c++, c]
|
||||
args: [
|
||||
"--enable=warning,portability",
|
||||
"--suppress=missingIncludeSystem",
|
||||
"--suppress=unusedFunction",
|
||||
"--suppress=normalCheckLevelMaxBranches",
|
||||
"--inline-suppr",
|
||||
"--error-exitcode=1",
|
||||
"-Iinclude"
|
||||
]
|
||||
|
||||
# 4. Python Linter and Formatter (ruff)
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.4.4
|
||||
hooks:
|
||||
- id: ruff
|
||||
args: [ --fix ]
|
||||
- id: ruff-format
|
||||
|
||||
# 5. Python Type Checking (mypy)
|
||||
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||
rev: v1.10.0
|
||||
hooks:
|
||||
- id: mypy
|
||||
args: [ "--ignore-missing-imports", "--strict" ]
|
||||
additional_dependencies: [ "types-requests", "pydantic" ]
|
||||
@@ -1,21 +1,23 @@
|
||||
#include <benchmark/benchmark.h>
|
||||
#include "fces/population.hpp"
|
||||
#include "fces/controller.hpp"
|
||||
#include "fces/population.hpp"
|
||||
#include <benchmark/benchmark.h>
|
||||
|
||||
using namespace fces;
|
||||
|
||||
static void BM_ControllerDecideUpdate(benchmark::State& state) {
|
||||
static void BM_ControllerDecideUpdate(benchmark::State &state) {
|
||||
FuzzyController ctrl;
|
||||
std::vector<std::vector<float>> stats(state.range(0), {0.1f, 0.2f, 0.3f, 0.4f, 0.5f});
|
||||
std::vector<std::vector<float>> stats(state.range(0),
|
||||
{0.1f, 0.2f, 0.3f, 0.4f, 0.5f});
|
||||
|
||||
for (auto _ : state) {
|
||||
auto actions = ctrl.decide_update(stats, 0.0f, 0.5f, 0.0f, 0.1f, 0.0f, 0.0f, 1.0f, 0.0f);
|
||||
auto actions = ctrl.decide_update(stats, 0.0f, 0.5f, 0.0f, 0.1f, 0.0f, 0.0f,
|
||||
1.0f, 0.0f);
|
||||
benchmark::DoNotOptimize(actions);
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_ControllerDecideUpdate)->Arg(10)->Arg(50)->Arg(200);
|
||||
|
||||
static void BM_Evolve(benchmark::State& state) {
|
||||
static void BM_Evolve(benchmark::State &state) {
|
||||
Population pop(state.range(0));
|
||||
|
||||
for (auto _ : state) {
|
||||
@@ -24,7 +26,7 @@ static void BM_Evolve(benchmark::State& state) {
|
||||
}
|
||||
BENCHMARK(BM_Evolve)->Arg(50)->Arg(100)->Arg(200);
|
||||
|
||||
static void BM_Mutation(benchmark::State& state) {
|
||||
static void BM_Mutation(benchmark::State &state) {
|
||||
FuzzyController ctrl;
|
||||
|
||||
for (auto _ : state) {
|
||||
@@ -34,7 +36,7 @@ static void BM_Mutation(benchmark::State& state) {
|
||||
}
|
||||
BENCHMARK(BM_Mutation);
|
||||
|
||||
static void BM_Crossover(benchmark::State& state) {
|
||||
static void BM_Crossover(benchmark::State &state) {
|
||||
FuzzyController a, b;
|
||||
|
||||
for (auto _ : state) {
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
#include "fces/optimizer.hpp"
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <torch/torch.h>
|
||||
#include "fces/optimizer.hpp"
|
||||
|
||||
using namespace fces;
|
||||
|
||||
static void BM_OptimizerStep(benchmark::State& state) {
|
||||
static void BM_OptimizerStep(benchmark::State &state) {
|
||||
auto model = torch::nn::Linear(state.range(0), state.range(0) / 2);
|
||||
std::vector<torch::Tensor> params;
|
||||
for (auto& p : model->parameters()) params.push_back(p);
|
||||
for (auto &p : model->parameters())
|
||||
params.push_back(p);
|
||||
|
||||
FCESOptimizer opt(params, FCESConfig{}.set_lr(1e-3f));
|
||||
|
||||
|
||||
@@ -3,9 +3,9 @@
|
||||
* @brief Example: train a small neural network with FCES via libtorch.
|
||||
*/
|
||||
|
||||
#include "fces/optimizer.hpp"
|
||||
#include <iostream>
|
||||
#include <torch/torch.h>
|
||||
#include "fces/optimizer.hpp"
|
||||
|
||||
struct TinyNet : torch::nn::Module {
|
||||
torch::nn::Linear fc1{nullptr}, fc2{nullptr};
|
||||
@@ -25,15 +25,13 @@ int main() {
|
||||
auto model = std::make_shared<TinyNet>();
|
||||
|
||||
std::vector<torch::Tensor> params;
|
||||
for (auto& p : model->parameters()) params.push_back(p);
|
||||
for (auto &p : model->parameters())
|
||||
params.push_back(p);
|
||||
|
||||
fces::FCESOptimizer optimizer(
|
||||
params,
|
||||
fces::FCESConfig{}
|
||||
fces::FCESOptimizer optimizer(params, fces::FCESConfig{}
|
||||
.set_lr(1.6e-3f)
|
||||
.set_population_size(200)
|
||||
.set_total_steps(1000)
|
||||
);
|
||||
.set_total_steps(1000));
|
||||
|
||||
// Generate synthetic regression data
|
||||
auto x_train = torch::randn({100, 10});
|
||||
@@ -48,8 +46,8 @@ int main() {
|
||||
optimizer.update_fitness(loss.item<float>());
|
||||
|
||||
if (epoch % 10 == 0) {
|
||||
std::cout << "Epoch " << epoch
|
||||
<< " | Loss: " << loss.item<float>() << std::endl;
|
||||
std::cout << "Epoch " << epoch << " | Loss: " << loss.item<float>()
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -3,9 +3,9 @@
|
||||
* @brief Minimal example: optimize a quadratic function with FCES.
|
||||
*/
|
||||
|
||||
#include "fces/optimizer.hpp"
|
||||
#include <iostream>
|
||||
#include <torch/torch.h>
|
||||
#include "fces/optimizer.hpp"
|
||||
|
||||
int main() {
|
||||
// Target: minimize f(x) = ||x - target||^2
|
||||
@@ -23,8 +23,7 @@ int main() {
|
||||
optimizer.update_fitness(loss.item<float>());
|
||||
|
||||
if (step % 50 == 0) {
|
||||
std::cout << "Step " << step
|
||||
<< " | Loss: " << loss.item<float>()
|
||||
std::cout << "Step " << step << " | Loss: " << loss.item<float>()
|
||||
<< " | x: " << x << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
* @brief Example showcasing telemetry instrumentation and model inference.
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <chrono>
|
||||
#include <torch/torch.h>
|
||||
#include "fces/optimizer.hpp"
|
||||
#include "fces/telemetry.hpp"
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
#include <torch/torch.h>
|
||||
|
||||
// Define a simple neural network for nonlinear regression: y = x^2
|
||||
struct RegressionNet : torch::nn::Module {
|
||||
@@ -25,7 +25,8 @@ struct RegressionNet : torch::nn::Module {
|
||||
};
|
||||
|
||||
int main() {
|
||||
fces::Telemetry::get().info("app_start", "Telemetry and Inference demo initialized.");
|
||||
fces::Telemetry::get().info("app_start",
|
||||
"Telemetry and Inference demo initialized.");
|
||||
|
||||
// 1. Create Model and Data
|
||||
auto model = std::make_shared<RegressionNet>();
|
||||
@@ -36,19 +37,17 @@ int main() {
|
||||
|
||||
// 2. Configure Optimizer
|
||||
std::vector<torch::Tensor> params;
|
||||
for (auto& p : model->parameters()) {
|
||||
for (auto &p : model->parameters()) {
|
||||
params.push_back(p);
|
||||
}
|
||||
|
||||
fces::FCESOptimizer optimizer(
|
||||
params,
|
||||
fces::FCESConfig{}
|
||||
.set_lr(2e-3f)
|
||||
.set_population_size(150)
|
||||
.set_total_steps(100)
|
||||
);
|
||||
fces::FCESConfig{}.set_lr(2e-3f).set_population_size(150).set_total_steps(
|
||||
100));
|
||||
|
||||
fces::Telemetry::get().info("training_start", "Beginning neural net optimization with FCES.");
|
||||
fces::Telemetry::get().info("training_start",
|
||||
"Beginning neural net optimization with FCES.");
|
||||
|
||||
auto start_train = std::chrono::high_resolution_clock::now();
|
||||
|
||||
@@ -62,19 +61,24 @@ int main() {
|
||||
optimizer.update_fitness(loss.item<float>());
|
||||
|
||||
if (epoch % 20 == 0) {
|
||||
fces::Telemetry::get().info("epoch_checkpoint",
|
||||
"Epoch " + std::to_string(epoch) + " | Loss: " + std::to_string(loss.item<float>()));
|
||||
fces::Telemetry::get().info(
|
||||
"epoch_checkpoint", "Epoch " + std::to_string(epoch) + " | Loss: " +
|
||||
std::to_string(loss.item<float>()));
|
||||
}
|
||||
}
|
||||
|
||||
auto end_train = std::chrono::high_resolution_clock::now();
|
||||
double train_duration = std::chrono::duration<double, std::milli>(end_train - start_train).count();
|
||||
double train_duration =
|
||||
std::chrono::duration<double, std::milli>(end_train - start_train)
|
||||
.count();
|
||||
|
||||
fces::Telemetry::get().info("training_complete",
|
||||
"Duration: " + std::to_string(train_duration) + " ms");
|
||||
"Duration: " + std::to_string(train_duration) +
|
||||
" ms");
|
||||
|
||||
// 4. Inference Phase
|
||||
fces::Telemetry::get().info("inference_phase_start", "Evaluating model on new test inputs.");
|
||||
fces::Telemetry::get().info("inference_phase_start",
|
||||
"Evaluating model on new test inputs.");
|
||||
|
||||
// Generate test inputs
|
||||
auto x_test = torch::tensor({-1.5f, -0.5f, 0.0f, 0.5f, 1.5f}).unsqueeze(1);
|
||||
@@ -91,23 +95,31 @@ int main() {
|
||||
y_pred = model->forward(x_test);
|
||||
}
|
||||
auto end_inf = std::chrono::high_resolution_clock::now();
|
||||
double inf_duration = std::chrono::duration<double, std::milli>(end_inf - start_inf).count();
|
||||
double inf_duration =
|
||||
std::chrono::duration<double, std::milli>(end_inf - start_inf).count();
|
||||
|
||||
// Log telemetry for inference performance
|
||||
fces::Telemetry::get().info("inference_perf",
|
||||
"Inputs: " + std::to_string(x_test.size(0)) + " | Latency: " + std::to_string(inf_duration) + " ms");
|
||||
fces::Telemetry::get().info(
|
||||
"inference_perf", "Inputs: " + std::to_string(x_test.size(0)) +
|
||||
" | Latency: " + std::to_string(inf_duration) +
|
||||
" ms");
|
||||
|
||||
// Print predictions and expected values side-by-side
|
||||
std::cout << "\n================ INFERENCE RESULTS ================" << std::endl;
|
||||
std::cout << "Input (x) | Predicted (y_pred) | Expected (y_expected)" << std::endl;
|
||||
std::cout << "----------------------------------------------------" << std::endl;
|
||||
std::cout << "\n================ INFERENCE RESULTS ================"
|
||||
<< std::endl;
|
||||
std::cout << "Input (x) | Predicted (y_pred) | Expected (y_expected)"
|
||||
<< std::endl;
|
||||
std::cout << "----------------------------------------------------"
|
||||
<< std::endl;
|
||||
for (int i = 0; i < x_test.size(0); ++i) {
|
||||
float x_val = x_test[i][0].item<float>();
|
||||
float pred_val = y_pred[i][0].item<float>();
|
||||
float exp_val = y_expected[i][0].item<float>();
|
||||
std::printf(" %7.2f | %7.4f | %7.4f\n", x_val, pred_val, exp_val);
|
||||
std::printf(" %7.2f | %7.4f | %7.4f\n", x_val,
|
||||
pred_val, exp_val);
|
||||
}
|
||||
std::cout << "====================================================\n" << std::endl;
|
||||
std::cout << "====================================================\n"
|
||||
<< std::endl;
|
||||
|
||||
fces::Telemetry::get().info("app_finish", "Exiting demo successfully.");
|
||||
return 0;
|
||||
|
||||
@@ -72,11 +72,26 @@ struct FCESConfig {
|
||||
bool auto_population = false;
|
||||
|
||||
// Builder pattern
|
||||
FCESConfig& set_lr(float v) { lr = v; return *this; }
|
||||
FCESConfig& set_population_size(int v) { population_size = v; return *this; }
|
||||
FCESConfig& set_total_steps(int v) { total_steps = v; return *this; }
|
||||
FCESConfig& set_grokking_coefficient(float v) { grokking_coefficient = v; return *this; }
|
||||
FCESConfig& set_direct_construction(bool v) { direct_construction = v; return *this; }
|
||||
FCESConfig &set_lr(float v) {
|
||||
lr = v;
|
||||
return *this;
|
||||
}
|
||||
FCESConfig &set_population_size(int v) {
|
||||
population_size = v;
|
||||
return *this;
|
||||
}
|
||||
FCESConfig &set_total_steps(int v) {
|
||||
total_steps = v;
|
||||
return *this;
|
||||
}
|
||||
FCESConfig &set_grokking_coefficient(float v) {
|
||||
grokking_coefficient = v;
|
||||
return *this;
|
||||
}
|
||||
FCESConfig &set_direct_construction(bool v) {
|
||||
direct_construction = v;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace fces
|
||||
|
||||
@@ -27,7 +27,8 @@ constexpr int GENOME_INPUT_DIM = 14;
|
||||
constexpr int GENOME_HIDDEN_DIM = 8;
|
||||
// Controller output dimension: [multiplier, sign_gate, wd_mult]
|
||||
constexpr int GENOME_OUTPUT_DIM = 3;
|
||||
// Total genome size: input->hidden weights + hidden biases + hidden->output weights + output biases
|
||||
// Total genome size: input->hidden weights + hidden biases + hidden->output
|
||||
// weights + output biases
|
||||
constexpr int GENOME_SIZE =
|
||||
(GENOME_INPUT_DIM * GENOME_HIDDEN_DIM) + // input -> hidden weights
|
||||
GENOME_HIDDEN_DIM + // hidden biases
|
||||
@@ -45,7 +46,7 @@ struct Genome {
|
||||
float plasticity = 1.0f;
|
||||
|
||||
/// Initialize with random weights from a normal distribution
|
||||
void randomize(std::mt19937& rng);
|
||||
void randomize(std::mt19937 &rng);
|
||||
|
||||
/// Deep copy
|
||||
Genome clone() const;
|
||||
@@ -109,17 +110,12 @@ public:
|
||||
* @param projected_drift Projected loss drift
|
||||
* @return Tensor of shape [num_groups, 3] — (mult, sign_gate, wd_mult)
|
||||
*/
|
||||
torch::Tensor decide_update(
|
||||
const std::vector<std::vector<float>>& layer_stats,
|
||||
float loss_trend,
|
||||
float step_pct,
|
||||
float rollback_rate,
|
||||
float grad_stability,
|
||||
float spectral_alpha,
|
||||
float stagnation_intensity,
|
||||
float kzm_damping,
|
||||
float projected_drift
|
||||
);
|
||||
torch::Tensor
|
||||
decide_update(const std::vector<std::vector<float>> &layer_stats,
|
||||
float loss_trend, float step_pct, float rollback_rate,
|
||||
float grad_stability, float spectral_alpha,
|
||||
float stagnation_intensity, float kzm_damping,
|
||||
float projected_drift);
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Evolutionary Operators
|
||||
@@ -129,13 +125,15 @@ public:
|
||||
FuzzyController mutate(float current_loss, float sigma_scale = 1.0f) const;
|
||||
|
||||
/// Crossover with another controller
|
||||
FuzzyController crossover(const FuzzyController& partner, bool use_alignment = true) const;
|
||||
FuzzyController crossover(const FuzzyController &partner,
|
||||
bool use_alignment = true) const;
|
||||
|
||||
/// Create an orthogonal counter-strategy (Phoenix Rebirth)
|
||||
FuzzyController create_orthogonal_child(float intensity = 1.0f) const;
|
||||
|
||||
/// Banach-Tarski fission: split into two complementary children
|
||||
std::pair<FuzzyController, FuzzyController> banach_tarski_fission(float intensity = 1.0f) const;
|
||||
std::pair<FuzzyController, FuzzyController>
|
||||
banach_tarski_fission(float intensity = 1.0f) const;
|
||||
|
||||
private:
|
||||
static std::atomic<uint64_t> next_id_;
|
||||
|
||||
@@ -21,23 +21,16 @@ namespace fces {
|
||||
*/
|
||||
class EvolutionManager {
|
||||
public:
|
||||
explicit EvolutionManager(
|
||||
Population& population,
|
||||
int selection_interval = 50,
|
||||
explicit EvolutionManager(Population &population, int selection_interval = 50,
|
||||
bool auto_population = false,
|
||||
bool direct_construction = false
|
||||
);
|
||||
bool direct_construction = false);
|
||||
|
||||
/// Get the currently active controller
|
||||
FuzzyController& get_active_controller();
|
||||
FuzzyController &get_active_controller();
|
||||
|
||||
/// Update population dynamics based on current training state
|
||||
void update_population_dynamics(
|
||||
float loss_velocity,
|
||||
float ema_loss,
|
||||
int step_counter,
|
||||
int total_steps
|
||||
);
|
||||
void update_population_dynamics(float loss_velocity, float ema_loss,
|
||||
int step_counter, int total_steps);
|
||||
|
||||
/// Steps the active controller has been in control
|
||||
int steps_active = 0;
|
||||
@@ -46,7 +39,7 @@ public:
|
||||
int selection_interval;
|
||||
|
||||
private:
|
||||
Population& population_;
|
||||
Population &population_;
|
||||
bool auto_population_;
|
||||
bool direct_construction_;
|
||||
};
|
||||
|
||||
@@ -2,14 +2,15 @@
|
||||
|
||||
/**
|
||||
* @file fitness.hpp
|
||||
* @brief Fitness evaluation — loss signal processing and multi-objective evaluation.
|
||||
* @brief Fitness evaluation — loss signal processing and multi-objective
|
||||
* evaluation.
|
||||
*
|
||||
* Port of: packages/fces/core/fitness_engine.py + fitness.py
|
||||
*/
|
||||
|
||||
#include <cmath>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace fces {
|
||||
|
||||
@@ -48,7 +49,8 @@ public:
|
||||
* @param mode "relative" or "absolute"
|
||||
* @return Velocity signal (negative = improving)
|
||||
*/
|
||||
float calculate_loss_signal(float current_loss, float ema_loss, const std::string& mode = "relative") const;
|
||||
float calculate_loss_signal(float current_loss, float ema_loss,
|
||||
const std::string &mode = "relative") const;
|
||||
|
||||
/**
|
||||
* Compute Kibble-Zurek Mechanism damping factor.
|
||||
@@ -89,7 +91,7 @@ public:
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
const std::string& name() const noexcept { return name_; }
|
||||
const std::string &name() const noexcept { return name_; }
|
||||
|
||||
private:
|
||||
std::string name_;
|
||||
@@ -112,13 +114,14 @@ struct FitnessMetrics {
|
||||
};
|
||||
|
||||
/**
|
||||
* FuzzyFitnessEvaluator — multi-objective fitness evaluation with fuzzy weighting.
|
||||
* FuzzyFitnessEvaluator — multi-objective fitness evaluation with fuzzy
|
||||
* weighting.
|
||||
*/
|
||||
class FuzzyFitnessEvaluator {
|
||||
public:
|
||||
FuzzyFitnessEvaluator() noexcept;
|
||||
|
||||
float evaluate(const FitnessMetrics& metrics) const noexcept;
|
||||
float evaluate(const FitnessMetrics &metrics) const noexcept;
|
||||
|
||||
private:
|
||||
FuzzySet stability_set_;
|
||||
|
||||
@@ -5,17 +5,17 @@
|
||||
* @brief FCESOptimizer — the main entry point. libtorch-compatible optimizer.
|
||||
*/
|
||||
|
||||
#include <torch/torch.h>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <torch/torch.h>
|
||||
#include <vector>
|
||||
|
||||
#include "config.hpp"
|
||||
#include "population.hpp"
|
||||
#include "fitness.hpp"
|
||||
#include "evolution.hpp"
|
||||
#include "spectral.hpp"
|
||||
#include "fitness.hpp"
|
||||
#include "oscillation.hpp"
|
||||
#include "population.hpp"
|
||||
#include "spectral.hpp"
|
||||
#include "telemetry.hpp"
|
||||
|
||||
namespace fces {
|
||||
@@ -24,7 +24,8 @@ namespace fces {
|
||||
* FCESOptimizer — Fuzzy Controlled Evolutionary Search V49.0 (C++ Port).
|
||||
*
|
||||
* Usage:
|
||||
* auto optimizer = FCESOptimizer(model->parameters(), FCESConfig{}.set_lr(1.6e-3));
|
||||
* auto optimizer = FCESOptimizer(model->parameters(),
|
||||
* FCESConfig{}.set_lr(1.6e-3));
|
||||
* // In training loop:
|
||||
* optimizer.zero_grad();
|
||||
* auto loss = model->forward(input);
|
||||
@@ -32,7 +33,8 @@ namespace fces {
|
||||
* optimizer.step();
|
||||
* optimizer.update_fitness(loss.item<float>());
|
||||
*/
|
||||
struct FCESOptimizerOptions : public torch::optim::OptimizerCloneableOptions<FCESOptimizerOptions> {
|
||||
struct FCESOptimizerOptions
|
||||
: public torch::optim::OptimizerCloneableOptions<FCESOptimizerOptions> {
|
||||
explicit FCESOptimizerOptions(double lr = 0.01) : lr_(lr) {}
|
||||
|
||||
double get_lr() const override { return lr_; }
|
||||
@@ -43,10 +45,8 @@ struct FCESOptimizerOptions : public torch::optim::OptimizerCloneableOptions<FCE
|
||||
|
||||
class FCESOptimizer : public torch::optim::Optimizer {
|
||||
public:
|
||||
explicit FCESOptimizer(
|
||||
std::vector<torch::Tensor> params,
|
||||
FCESConfig config = FCESConfig{}
|
||||
);
|
||||
explicit FCESOptimizer(std::vector<torch::Tensor> params,
|
||||
FCESConfig config = FCESConfig{});
|
||||
|
||||
/// Perform a single optimization step
|
||||
torch::Tensor step(LossClosure closure = nullptr) override;
|
||||
@@ -97,7 +97,7 @@ private:
|
||||
|
||||
// Internal methods
|
||||
void gather_stats();
|
||||
void apply_parameter_updates(const torch::Tensor& actions);
|
||||
void apply_parameter_updates(const torch::Tensor &actions);
|
||||
void handle_rollback();
|
||||
};
|
||||
|
||||
|
||||
@@ -24,8 +24,9 @@ public:
|
||||
|
||||
private:
|
||||
std::vector<float> loss_history_;
|
||||
static std::vector<float> detrend(const std::vector<float>& signal);
|
||||
static std::vector<float> compute_power_spectrum(const std::vector<float>& signal);
|
||||
static std::vector<float> detrend(const std::vector<float> &signal);
|
||||
static std::vector<float>
|
||||
compute_power_spectrum(const std::vector<float> &signal);
|
||||
};
|
||||
|
||||
} // namespace fces
|
||||
|
||||
@@ -16,9 +16,9 @@
|
||||
* Port of: packages/fces/core/population.py (~1260 LOC)
|
||||
*/
|
||||
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <optional>
|
||||
|
||||
#include "controller.hpp"
|
||||
|
||||
@@ -50,46 +50,43 @@ public:
|
||||
// Construction
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
explicit Population(
|
||||
int active_size = 75,
|
||||
int repo_size = 10000,
|
||||
explicit Population(int active_size = 75, int repo_size = 10000,
|
||||
EliteStrategy elite_strategy = EliteStrategy::Cumulative,
|
||||
bool link_mutation = false,
|
||||
bool link_elite = false,
|
||||
bool link_violator = false,
|
||||
bool use_fuzzy_pacer = false,
|
||||
bool link_mutation = false, bool link_elite = false,
|
||||
bool link_violator = false, bool use_fuzzy_pacer = false,
|
||||
bool use_fuzzy_importance = false,
|
||||
bool direct_construction = false,
|
||||
bool use_banach_fission = false
|
||||
);
|
||||
bool use_banach_fission = false);
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Core API
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
/// Get the currently active controller (sticky selection)
|
||||
FuzzyController& get_active_controller();
|
||||
FuzzyController &get_active_controller();
|
||||
|
||||
/// Select a controller via fitness-weighted tournament
|
||||
FuzzyController& select_weighted();
|
||||
FuzzyController &select_weighted();
|
||||
|
||||
/// Get the best controller in the active population
|
||||
FuzzyController& get_best_active();
|
||||
FuzzyController &get_best_active();
|
||||
|
||||
/// Get the worst non-elite controller
|
||||
FuzzyController& get_worst_active();
|
||||
FuzzyController &get_worst_active();
|
||||
|
||||
/// Remove a specific controller (unless elite)
|
||||
void kill(FuzzyController& controller);
|
||||
void kill(FuzzyController &controller);
|
||||
|
||||
/// Update a controller's fitness
|
||||
void update_controller_fitness(FuzzyController& controller, float reward, bool increment_eval = true);
|
||||
void update_controller_fitness(FuzzyController &controller, float reward,
|
||||
bool increment_eval = true);
|
||||
|
||||
/// Mark a controller as a violator (rollback)
|
||||
void mark_violated(FuzzyController& controller);
|
||||
void mark_violated(FuzzyController &controller);
|
||||
|
||||
/// Get the effective fitness considering elite strategy and training progress
|
||||
float get_effective_fitness(const FuzzyController& controller, float training_progress) const;
|
||||
float get_effective_fitness(const FuzzyController &controller,
|
||||
float training_progress) const;
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Evolution
|
||||
@@ -102,7 +99,8 @@ public:
|
||||
* @param velocity Loss velocity
|
||||
* @param training_progress Training progress [0, 1]
|
||||
*/
|
||||
void evolve(float current_loss, float velocity = 0.0f, float training_progress = 0.0f);
|
||||
void evolve(float current_loss, float velocity = 0.0f,
|
||||
float training_progress = 0.0f);
|
||||
|
||||
/// Resize the population (dynamic expansion/contraction)
|
||||
void resize(int target_size, float training_progress = 0.5f);
|
||||
@@ -131,7 +129,7 @@ private:
|
||||
float global_sigma_modifier_ = 1.0f;
|
||||
|
||||
// Sticky controller selection
|
||||
FuzzyController* active_controller_ = nullptr;
|
||||
FuzzyController *active_controller_ = nullptr;
|
||||
int steps_active_ = 0;
|
||||
int selection_interval_ = 20;
|
||||
|
||||
@@ -158,8 +156,8 @@ private:
|
||||
// Internal
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
std::vector<FuzzyController*> get_elites();
|
||||
void add_to_repository(const FuzzyController& controller);
|
||||
std::vector<FuzzyController *> get_elites();
|
||||
void add_to_repository(const FuzzyController &controller);
|
||||
};
|
||||
|
||||
} // namespace fces
|
||||
|
||||
@@ -24,10 +24,10 @@ namespace fces {
|
||||
class SpectralSensor {
|
||||
public:
|
||||
SpectralSensor() = default;
|
||||
explicit SpectralSensor(torch::nn::Module& model);
|
||||
explicit SpectralSensor(torch::nn::Module &model);
|
||||
|
||||
/// Track a layer's weight tensor
|
||||
void track_layer(const std::string& name, const torch::Tensor& weight);
|
||||
void track_layer(const std::string &name, const torch::Tensor &weight);
|
||||
|
||||
/// Get the global (average) effective rank
|
||||
float get_global_rank() const;
|
||||
@@ -39,7 +39,7 @@ private:
|
||||
std::unordered_map<std::string, float> layer_ranks_;
|
||||
|
||||
/// Compute effective rank via SVD
|
||||
static float compute_effective_rank(const torch::Tensor& weight);
|
||||
static float compute_effective_rank(const torch::Tensor &weight);
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@@ -11,11 +11,11 @@ namespace fces {
|
||||
|
||||
class Telemetry {
|
||||
public:
|
||||
static Telemetry& get();
|
||||
static Telemetry &get();
|
||||
|
||||
void info(const std::string& event, const std::string& detail = "");
|
||||
void warning(const std::string& event, const std::string& detail = "");
|
||||
void error(const std::string& event, const std::string& detail = "");
|
||||
void info(const std::string &event, const std::string &detail = "");
|
||||
void warning(const std::string &event, const std::string &detail = "");
|
||||
void error(const std::string &event, const std::string &detail = "");
|
||||
|
||||
void push_to_remote();
|
||||
|
||||
|
||||
@@ -13,8 +13,8 @@
|
||||
#include <pybind11/stl.h>
|
||||
#include <torch/extension.h>
|
||||
|
||||
#include "fces/optimizer.hpp"
|
||||
#include "fces/config.hpp"
|
||||
#include "fces/optimizer.hpp"
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
@@ -26,22 +26,23 @@ PYBIND11_MODULE(fces_native, m) {
|
||||
.def_readwrite("lr", &fces::FCESConfig::lr)
|
||||
.def_readwrite("population_size", &fces::FCESConfig::population_size)
|
||||
.def_readwrite("total_steps", &fces::FCESConfig::total_steps)
|
||||
.def_readwrite("grokking_coefficient", &fces::FCESConfig::grokking_coefficient)
|
||||
.def_readwrite("direct_construction", &fces::FCESConfig::direct_construction);
|
||||
.def_readwrite("grokking_coefficient",
|
||||
&fces::FCESConfig::grokking_coefficient)
|
||||
.def_readwrite("direct_construction",
|
||||
&fces::FCESConfig::direct_construction);
|
||||
|
||||
py::class_<fces::FCESOptimizer>(m, "FCESOptimizer")
|
||||
.def(py::init<std::vector<torch::Tensor>, fces::FCESConfig>(),
|
||||
py::arg("params"),
|
||||
py::arg("config") = fces::FCESConfig{})
|
||||
py::arg("params"), py::arg("config") = fces::FCESConfig{})
|
||||
.def("step", &fces::FCESOptimizer::step)
|
||||
.def("update_fitness", &fces::FCESOptimizer::update_fitness)
|
||||
.def("backup_to_ram", &fces::FCESOptimizer::backup_to_ram)
|
||||
.def("restore_from_ram", &fces::FCESOptimizer::restore_from_ram)
|
||||
.def("step_count", &fces::FCESOptimizer::step_count)
|
||||
.def("calculate_sparsity", &fces::FCESOptimizer::calculate_sparsity)
|
||||
.def("zero_grad", [](fces::FCESOptimizer& self) {
|
||||
for (auto& group : self.param_groups()) {
|
||||
for (auto& p : group.params()) {
|
||||
.def("zero_grad", [](fces::FCESOptimizer &self) {
|
||||
for (auto &group : self.param_groups()) {
|
||||
for (auto &p : group.params()) {
|
||||
if (p.grad().defined()) {
|
||||
p.grad().zero_();
|
||||
}
|
||||
|
||||
@@ -13,9 +13,9 @@ thread_local std::mt19937 FuzzyController::rng_{std::random_device{}()};
|
||||
// Genome
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
void Genome::randomize(std::mt19937& rng) {
|
||||
void Genome::randomize(std::mt19937 &rng) {
|
||||
std::normal_distribution<float> dist(0.0f, 0.5f);
|
||||
for (auto& w : weights) {
|
||||
for (auto &w : weights) {
|
||||
w = dist(rng);
|
||||
}
|
||||
gene_success.fill(0.0f);
|
||||
@@ -29,11 +29,11 @@ Genome Genome::clone() const {
|
||||
// FuzzyController
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
FuzzyController::FuzzyController()
|
||||
: id(next_id_++), origin("random") {
|
||||
FuzzyController::FuzzyController() : id(next_id_++), origin("random") {
|
||||
genome.randomize(rng_);
|
||||
// Bias output toward acceleration (V2.1 insight)
|
||||
// Set output biases (last GENOME_OUTPUT_DIM elements) to +2.0, -1.0, 0.0 with noise
|
||||
// Set output biases (last GENOME_OUTPUT_DIM elements) to +2.0, -1.0, 0.0 with
|
||||
// noise
|
||||
constexpr int bias_start = GENOME_SIZE - GENOME_OUTPUT_DIM;
|
||||
std::normal_distribution<float> bias_noise(0.0f, 0.5f);
|
||||
genome.weights[bias_start] = 2.0f + bias_noise(rng_);
|
||||
@@ -51,30 +51,27 @@ FuzzyController::FuzzyController(Genome genome)
|
||||
: id(next_id_++), genome(std::move(genome)), origin("constructed") {}
|
||||
|
||||
torch::Tensor FuzzyController::decide_update(
|
||||
const std::vector<std::vector<float>>& layer_stats,
|
||||
float loss_trend,
|
||||
float step_pct,
|
||||
float rollback_rate,
|
||||
float grad_stability,
|
||||
float spectral_alpha,
|
||||
float stagnation_intensity,
|
||||
float kzm_damping,
|
||||
float projected_drift
|
||||
) {
|
||||
const std::vector<std::vector<float>> &layer_stats, float loss_trend,
|
||||
float step_pct, float rollback_rate, float grad_stability,
|
||||
float spectral_alpha, float stagnation_intensity, float kzm_damping,
|
||||
float projected_drift) {
|
||||
const int num_groups = static_cast<int>(layer_stats.size());
|
||||
auto actions = torch::zeros({num_groups, GENOME_OUTPUT_DIM});
|
||||
|
||||
// Extract weight views for the micro-MLP
|
||||
const float* w = genome.weights.data();
|
||||
const float *w = genome.weights.data();
|
||||
|
||||
// Layer 1: input -> hidden
|
||||
const float* W1 = w; // [(GENOME_INPUT_DIM + 1) x GENOME_HIDDEN_DIM]
|
||||
const float *W1 = w; // [(GENOME_INPUT_DIM + 1) x GENOME_HIDDEN_DIM]
|
||||
// Layer 2: hidden -> output
|
||||
const float* W2 = w + ((GENOME_INPUT_DIM + 1) * GENOME_HIDDEN_DIM); // [(GENOME_HIDDEN_DIM + 1) x GENOME_OUTPUT_DIM]
|
||||
const float *W2 =
|
||||
w + ((GENOME_INPUT_DIM + 1) *
|
||||
GENOME_HIDDEN_DIM); // [(GENOME_HIDDEN_DIM + 1) x GENOME_OUTPUT_DIM]
|
||||
|
||||
for (int g = 0; g < num_groups; ++g) {
|
||||
// One-Hot Layer Type: Clamp to 5 to avoid overflow for new categories
|
||||
float layer_type_val = (layer_stats[g].size() >= 3) ? layer_stats[g][2] : 5.0f;
|
||||
float layer_type_val =
|
||||
(layer_stats[g].size() >= 3) ? layer_stats[g][2] : 5.0f;
|
||||
int type_idx = std::min(5, static_cast<int>(layer_type_val));
|
||||
std::array<float, 5> type_onehot{0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
|
||||
if (type_idx >= 0 && type_idx < 5) {
|
||||
@@ -87,19 +84,26 @@ torch::Tensor FuzzyController::decide_update(
|
||||
float sp = (layer_stats[g].size() >= 2) ? layer_stats[g][1] : 0.0f;
|
||||
|
||||
// Sanitization matching nan_to_num
|
||||
if (!std::isfinite(gn) || std::isnan(gn)) gn = 0.0f;
|
||||
if (gn > 10.0f) gn = 10.0f;
|
||||
if (gn < 0.0f) gn = 0.0f;
|
||||
if (!std::isfinite(gn) || std::isnan(gn))
|
||||
gn = 0.0f;
|
||||
if (gn > 10.0f)
|
||||
gn = 10.0f;
|
||||
if (gn < 0.0f)
|
||||
gn = 0.0f;
|
||||
|
||||
if (!std::isfinite(sp) || std::isnan(sp)) sp = 0.0f;
|
||||
if (sp > 1.0f) sp = 1.0f;
|
||||
if (sp < 0.0f) sp = 0.0f;
|
||||
if (!std::isfinite(sp) || std::isnan(sp))
|
||||
sp = 0.0f;
|
||||
if (sp > 1.0f)
|
||||
sp = 1.0f;
|
||||
if (sp < 0.0f)
|
||||
sp = 0.0f;
|
||||
|
||||
input[0] = gn;
|
||||
input[1] = sp;
|
||||
input[2] = loss_trend;
|
||||
input[3] = step_pct;
|
||||
input[4] = (num_groups > 1) ? static_cast<float>(g) / (num_groups - 1.0f) : 0.0f;
|
||||
input[4] =
|
||||
(num_groups > 1) ? static_cast<float>(g) / (num_groups - 1.0f) : 0.0f;
|
||||
input[5] = rollback_rate;
|
||||
input[6] = grad_stability;
|
||||
input[7] = spectral_alpha;
|
||||
@@ -178,7 +182,8 @@ torch::Tensor FuzzyController::decide_update(
|
||||
return actions;
|
||||
}
|
||||
|
||||
FuzzyController FuzzyController::mutate(float current_loss, float sigma_scale) const {
|
||||
FuzzyController FuzzyController::mutate(float current_loss,
|
||||
float sigma_scale) const {
|
||||
Genome child_genome = genome.clone();
|
||||
std::normal_distribution<float> std_normal(0.0f, 1.0f);
|
||||
|
||||
@@ -207,7 +212,8 @@ FuzzyController FuzzyController::mutate(float current_loss, float sigma_scale) c
|
||||
return child;
|
||||
}
|
||||
|
||||
FuzzyController FuzzyController::crossover(const FuzzyController& partner, bool /*use_alignment*/) const {
|
||||
FuzzyController FuzzyController::crossover(const FuzzyController &partner,
|
||||
bool /*use_alignment*/) const {
|
||||
Genome child_genome;
|
||||
std::uniform_real_distribution<float> u_dist(0.0f, 1.0f);
|
||||
|
||||
@@ -234,15 +240,18 @@ FuzzyController FuzzyController::crossover(const FuzzyController& partner, bool
|
||||
}
|
||||
}
|
||||
|
||||
child_genome.sigma_gene = (genome.sigma_gene + partner.genome.sigma_gene) * 0.5f;
|
||||
child_genome.plasticity = (genome.plasticity + partner.genome.plasticity) * 0.5f;
|
||||
child_genome.sigma_gene =
|
||||
(genome.sigma_gene + partner.genome.sigma_gene) * 0.5f;
|
||||
child_genome.plasticity =
|
||||
(genome.plasticity + partner.genome.plasticity) * 0.5f;
|
||||
|
||||
FuzzyController child(child_genome);
|
||||
child.origin = "crossover";
|
||||
return child;
|
||||
}
|
||||
|
||||
FuzzyController FuzzyController::create_orthogonal_child(float intensity) const {
|
||||
FuzzyController
|
||||
FuzzyController::create_orthogonal_child(float intensity) const {
|
||||
Genome child_genome;
|
||||
std::normal_distribution<float> norm_dist(0.0f, 1.0f);
|
||||
|
||||
@@ -262,7 +271,8 @@ FuzzyController FuzzyController::create_orthogonal_child(float intensity) const
|
||||
std::array<float, GENOME_SIZE> orthogonal_vec{};
|
||||
float norm_ortho = 0.0f;
|
||||
for (size_t i = 0; i < GENOME_SIZE; ++i) {
|
||||
float projection = (dot_product / (norm_elite * norm_elite)) * genome.weights[i];
|
||||
float projection =
|
||||
(dot_product / (norm_elite * norm_elite)) * genome.weights[i];
|
||||
orthogonal_vec[i] = random_vec[i] - projection;
|
||||
norm_ortho += orthogonal_vec[i] * orthogonal_vec[i];
|
||||
}
|
||||
@@ -294,7 +304,8 @@ FuzzyController FuzzyController::create_orthogonal_child(float intensity) const
|
||||
return child;
|
||||
}
|
||||
|
||||
std::pair<FuzzyController, FuzzyController> FuzzyController::banach_tarski_fission(float intensity) const {
|
||||
std::pair<FuzzyController, FuzzyController>
|
||||
FuzzyController::banach_tarski_fission(float intensity) const {
|
||||
Genome plus_genome;
|
||||
Genome minus_genome;
|
||||
|
||||
@@ -321,13 +332,15 @@ std::pair<FuzzyController, FuzzyController> FuzzyController::banach_tarski_fissi
|
||||
std::array<float, GENOME_SIZE> fission_vec{};
|
||||
float norm_fission = 0.0f;
|
||||
for (size_t i = 0; i < GENOME_SIZE; ++i) {
|
||||
fission_vec[i] = noise[i] - (dot_product / (norm_parent * norm_parent)) * genome.weights[i];
|
||||
fission_vec[i] = noise[i] - (dot_product / (norm_parent * norm_parent)) *
|
||||
genome.weights[i];
|
||||
norm_fission += fission_vec[i] * fission_vec[i];
|
||||
}
|
||||
norm_fission = std::sqrt(norm_fission) + 1e-9f;
|
||||
|
||||
for (size_t i = 0; i < GENOME_SIZE; ++i) {
|
||||
float scaled_fission = fission_vec[i] * (norm_parent / norm_fission) * 0.1f * intensity;
|
||||
float scaled_fission =
|
||||
fission_vec[i] * (norm_parent / norm_fission) * 0.1f * intensity;
|
||||
plus_genome.weights[i] = genome.weights[i] + scaled_fission;
|
||||
minus_genome.weights[i] = genome.weights[i] - scaled_fission;
|
||||
|
||||
|
||||
@@ -2,30 +2,25 @@
|
||||
|
||||
namespace fces {
|
||||
|
||||
EvolutionManager::EvolutionManager(
|
||||
Population& population, int selection_interval,
|
||||
bool auto_population, bool direct_construction
|
||||
)
|
||||
: population_(population),
|
||||
selection_interval(selection_interval),
|
||||
EvolutionManager::EvolutionManager(Population &population,
|
||||
int selection_interval, bool auto_population,
|
||||
bool direct_construction)
|
||||
: population_(population), selection_interval(selection_interval),
|
||||
auto_population_(auto_population),
|
||||
direct_construction_(direct_construction) {}
|
||||
|
||||
FuzzyController& EvolutionManager::get_active_controller() {
|
||||
FuzzyController &EvolutionManager::get_active_controller() {
|
||||
return population_.get_active_controller();
|
||||
}
|
||||
|
||||
void EvolutionManager::update_population_dynamics(
|
||||
float loss_velocity, float ema_loss, int step_counter, int total_steps
|
||||
) {
|
||||
void EvolutionManager::update_population_dynamics(float loss_velocity,
|
||||
float ema_loss,
|
||||
int step_counter,
|
||||
int total_steps) {
|
||||
float progress = static_cast<float>(step_counter) / std::max(1, total_steps);
|
||||
|
||||
if (step_counter % 20 == 0) {
|
||||
population_.evolve(
|
||||
std::abs(loss_velocity),
|
||||
loss_velocity,
|
||||
progress
|
||||
);
|
||||
population_.evolve(std::abs(loss_velocity), loss_velocity, progress);
|
||||
}
|
||||
|
||||
if (!auto_population_ || step_counter % 50 != 0) {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#include "fces/fitness.hpp"
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <numeric>
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
|
||||
namespace fces {
|
||||
@@ -20,12 +20,14 @@ void RunningStats::update(float value) {
|
||||
|
||||
float RunningStats::z_score(float value) const {
|
||||
float s = get_std();
|
||||
if (s < 1e-8f) return 0.0f;
|
||||
if (s < 1e-8f)
|
||||
return 0.0f;
|
||||
return (value - mean_) / s;
|
||||
}
|
||||
|
||||
float RunningStats::get_std() const {
|
||||
if (count_ < 2) return 1.0f;
|
||||
if (count_ < 2)
|
||||
return 1.0f;
|
||||
return std::sqrt(m2_ / static_cast<float>(count_ - 1));
|
||||
}
|
||||
|
||||
@@ -42,8 +44,10 @@ void RunningStats::reset() {
|
||||
FitnessEngine::FitnessEngine(float grokking_coefficient)
|
||||
: grokking_coefficient_(grokking_coefficient) {}
|
||||
|
||||
float FitnessEngine::calculate_loss_signal(float current_loss, float ema_loss, const std::string& mode) const {
|
||||
if (ema_loss < 1e-8f) return 0.0f;
|
||||
float FitnessEngine::calculate_loss_signal(float current_loss, float ema_loss,
|
||||
const std::string &mode) const {
|
||||
if (ema_loss < 1e-8f)
|
||||
return 0.0f;
|
||||
|
||||
if (mode == "relative") {
|
||||
return (current_loss - ema_loss) / (ema_loss + 1e-8f);
|
||||
@@ -68,7 +72,8 @@ FuzzyFitnessEvaluator::FuzzyFitnessEvaluator() noexcept
|
||||
consistency_set_("Consistent", -1.0f, 0.0f, 0.02f, 0.1f),
|
||||
rank_set_("LowRank", -1.0f, 0.0f, 5.0f, 20.0f) {}
|
||||
|
||||
float FuzzyFitnessEvaluator::evaluate(const FitnessMetrics& metrics) const noexcept {
|
||||
float FuzzyFitnessEvaluator::evaluate(
|
||||
const FitnessMetrics &metrics) const noexcept {
|
||||
float m_stability = stability_set_.membership(metrics.grad_cv);
|
||||
float m_train = train_set_.membership(metrics.training_advantage);
|
||||
float m_val = val_set_.membership(metrics.validation_advantage);
|
||||
@@ -76,15 +81,12 @@ float FuzzyFitnessEvaluator::evaluate(const FitnessMetrics& metrics) const noexc
|
||||
float m_consistency = consistency_set_.membership(metrics.consistency_gap);
|
||||
float m_rank = rank_set_.membership(metrics.stable_rank);
|
||||
|
||||
float weighted_score =
|
||||
m_stability * w_stability_ +
|
||||
m_train * w_train_ +
|
||||
m_val * w_val_ +
|
||||
m_sparsity * w_sparsity_ +
|
||||
m_consistency * w_consistency_ +
|
||||
m_rank * w_rank_;
|
||||
float weighted_score = m_stability * w_stability_ + m_train * w_train_ +
|
||||
m_val * w_val_ + m_sparsity * w_sparsity_ +
|
||||
m_consistency * w_consistency_ + m_rank * w_rank_;
|
||||
|
||||
float total_weight = w_stability_ + w_train_ + w_val_ + w_sparsity_ + w_consistency_ + w_rank_;
|
||||
float total_weight =
|
||||
w_stability_ + w_train_ + w_val_ + w_sparsity_ + w_consistency_ + w_rank_;
|
||||
if (total_weight > 0.0f) {
|
||||
weighted_score /= total_weight;
|
||||
}
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
#include "fces/optimizer.hpp"
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
|
||||
namespace fces {
|
||||
|
||||
namespace {
|
||||
|
||||
int classify_layer_by_shape(const torch::Tensor& p) {
|
||||
int classify_layer_by_shape(const torch::Tensor &p) {
|
||||
auto dims = p.sizes();
|
||||
if (dims.size() == 2) {
|
||||
int64_t d0 = dims[0];
|
||||
@@ -31,8 +31,10 @@ int classify_layer_by_shape(const torch::Tensor& p) {
|
||||
return 5; // Other
|
||||
}
|
||||
|
||||
torch::Tensor apply_trust_clipping(const torch::Tensor& p, torch::Tensor update, float trust_region_clip) {
|
||||
if (torch::isnan(update).any().item<bool>() || torch::isinf(update).any().item<bool>()) {
|
||||
torch::Tensor apply_trust_clipping(const torch::Tensor &p, torch::Tensor update,
|
||||
float trust_region_clip) {
|
||||
if (torch::isnan(update).any().item<bool>() ||
|
||||
torch::isinf(update).any().item<bool>()) {
|
||||
return torch::zeros_like(update);
|
||||
}
|
||||
|
||||
@@ -50,14 +52,16 @@ torch::Tensor apply_trust_clipping(const torch::Tensor& p, torch::Tensor update,
|
||||
}
|
||||
}
|
||||
|
||||
if (torch::isnan(update).any().item<bool>() || torch::isinf(update).any().item<bool>()) {
|
||||
if (torch::isnan(update).any().item<bool>() ||
|
||||
torch::isinf(update).any().item<bool>()) {
|
||||
return torch::zeros_like(update);
|
||||
}
|
||||
|
||||
return update;
|
||||
}
|
||||
|
||||
float calculate_parasitic_reward(const torch::Tensor& p, float mult, const RunningStats& grad_norm_tracker) {
|
||||
float calculate_parasitic_reward(const torch::Tensor &p, float mult,
|
||||
const RunningStats &grad_norm_tracker) {
|
||||
if (!p.grad().defined()) {
|
||||
return 0.0f;
|
||||
}
|
||||
@@ -66,31 +70,26 @@ float calculate_parasitic_reward(const torch::Tensor& p, float mult, const Runni
|
||||
return z_g * (mult - 1.0f);
|
||||
}
|
||||
|
||||
std::unique_ptr<torch::optim::OptimizerOptions> make_optimizer_options(double lr) {
|
||||
std::unique_ptr<torch::optim::OptimizerOptions>
|
||||
make_optimizer_options(double lr) {
|
||||
return std::make_unique<FCESOptimizerOptions>(lr);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
FCESOptimizer::FCESOptimizer(
|
||||
std::vector<torch::Tensor> params,
|
||||
FCESConfig config
|
||||
)
|
||||
FCESOptimizer::FCESOptimizer(std::vector<torch::Tensor> params,
|
||||
FCESConfig config)
|
||||
: torch::optim::Optimizer(
|
||||
{torch::optim::OptimizerParamGroup(std::move(params))},
|
||||
make_optimizer_options(config.lr)
|
||||
),
|
||||
make_optimizer_options(config.lr)),
|
||||
config_(std::move(config)),
|
||||
population_(config_.population_size, 10000,
|
||||
EliteStrategy::Cumulative,
|
||||
population_(config_.population_size, 10000, EliteStrategy::Cumulative,
|
||||
false, false, false, false, false,
|
||||
config_.direct_construction,
|
||||
config_.use_banach_fission),
|
||||
config_.direct_construction, config_.use_banach_fission),
|
||||
fitness_engine_(config_.grokking_coefficient) {
|
||||
|
||||
evolution_manager_ = std::make_unique<EvolutionManager>(
|
||||
population_, 50, config_.auto_population, config_.direct_construction
|
||||
);
|
||||
population_, 50, config_.auto_population, config_.direct_construction);
|
||||
|
||||
spectral_sensor_ = std::make_unique<SpectralSensor>();
|
||||
|
||||
@@ -98,7 +97,8 @@ FCESOptimizer::FCESOptimizer(
|
||||
backup_to_ram();
|
||||
|
||||
Telemetry::get().info("optimizer_initialized",
|
||||
"version=0.1.0 pop_size=" + std::to_string(config_.population_size));
|
||||
"version=0.1.0 pop_size=" +
|
||||
std::to_string(config_.population_size));
|
||||
}
|
||||
|
||||
torch::Tensor FCESOptimizer::step(LossClosure closure) {
|
||||
@@ -115,41 +115,51 @@ torch::Tensor FCESOptimizer::step(LossClosure closure) {
|
||||
gather_stats();
|
||||
|
||||
// 2. Strategy: Population Selection & Dynamics
|
||||
auto& active_controller = evolution_manager_->get_active_controller();
|
||||
auto &active_controller = evolution_manager_->get_active_controller();
|
||||
|
||||
// 3. Decision: Neural Decisions from Controllers
|
||||
float current_loss_val = (loss.defined()) ? loss.item<float>() : last_step_loss_;
|
||||
float current_loss_val =
|
||||
(loss.defined()) ? loss.item<float>() : last_step_loss_;
|
||||
|
||||
// Emergency Brake - NaN/Inf Detection
|
||||
if (std::isnan(current_loss_val) || !std::isfinite(current_loss_val)) {
|
||||
Telemetry::get().error("emergency_brake_nan", "NaN/Inf loss detected in step " + std::to_string(step_counter_));
|
||||
Telemetry::get().error("emergency_brake_nan",
|
||||
"NaN/Inf loss detected in step " +
|
||||
std::to_string(step_counter_));
|
||||
handle_rollback();
|
||||
return loss;
|
||||
}
|
||||
|
||||
float loss_velocity = fitness_engine_.calculate_loss_signal(current_loss_val, ema_loss_, config_.signal_mode);
|
||||
float loss_velocity = fitness_engine_.calculate_loss_signal(
|
||||
current_loss_val, ema_loss_, config_.signal_mode);
|
||||
last_loss_velocity_ = loss_velocity;
|
||||
|
||||
float progress = std::min(1.0f, static_cast<float>(step_counter_) / std::max(1, config_.total_steps));
|
||||
float grad_cv = grad_norm_tracker_.get_std() / (grad_norm_tracker_.get_mean() + 1e-8f);
|
||||
float progress = std::min(1.0f, static_cast<float>(step_counter_) /
|
||||
std::max(1, config_.total_steps));
|
||||
float grad_cv =
|
||||
grad_norm_tracker_.get_std() / (grad_norm_tracker_.get_mean() + 1e-8f);
|
||||
|
||||
float csr_factor = 1.0f;
|
||||
if (config_.csr_enabled) {
|
||||
if (step_counter_ < config_.csr_warmup_steps) {
|
||||
csr_factor = 0.0f;
|
||||
} else {
|
||||
float steps_since_warmup = static_cast<float>(step_counter_ - config_.csr_warmup_steps);
|
||||
csr_factor = std::min(1.0f, steps_since_warmup / std::max(1.0f, static_cast<float>(config_.csr_ramp_steps)));
|
||||
float steps_since_warmup =
|
||||
static_cast<float>(step_counter_ - config_.csr_warmup_steps);
|
||||
csr_factor = std::min(
|
||||
1.0f, steps_since_warmup /
|
||||
std::max(1.0f, static_cast<float>(config_.csr_ramp_steps)));
|
||||
}
|
||||
}
|
||||
|
||||
// Update spectral sensing rank
|
||||
float spectral_alpha = 0.0f;
|
||||
if (config_.grokking_coefficient > 0.0f && spectral_sensor_) {
|
||||
if (step_counter_ % config_.spectral_frequency == 0 || last_spectral_rank_ == 0.0f) {
|
||||
if (step_counter_ % config_.spectral_frequency == 0 ||
|
||||
last_spectral_rank_ == 0.0f) {
|
||||
int param_idx = 0;
|
||||
for (auto& group : param_groups()) {
|
||||
for (auto& p : group.params()) {
|
||||
for (auto &group : param_groups()) {
|
||||
for (auto &p : group.params()) {
|
||||
if (p.dim() >= 2) {
|
||||
std::string name = "layer_" + std::to_string(param_idx);
|
||||
spectral_sensor_->track_layer(name, p);
|
||||
@@ -164,33 +174,30 @@ torch::Tensor FCESOptimizer::step(LossClosure closure) {
|
||||
|
||||
float effective_alpha = spectral_alpha * csr_factor;
|
||||
float kzm_damping = fitness_engine_.compute_kzm_damping(effective_alpha);
|
||||
float stagnation_intensity = std::min(1.0f, static_cast<float>(stagnation_counter_) / 500.0f);
|
||||
float stagnation_intensity =
|
||||
std::min(1.0f, static_cast<float>(stagnation_counter_) / 500.0f);
|
||||
float log_spectral_alpha = std::log(effective_alpha + 1e-6f);
|
||||
|
||||
// Call decide_update
|
||||
auto actions = active_controller.decide_update(
|
||||
layer_stats_,
|
||||
loss_velocity,
|
||||
progress,
|
||||
rollback_ema_,
|
||||
grad_cv,
|
||||
log_spectral_alpha,
|
||||
stagnation_intensity,
|
||||
kzm_damping,
|
||||
loss_velocity
|
||||
);
|
||||
layer_stats_, loss_velocity, progress, rollback_ema_, grad_cv,
|
||||
log_spectral_alpha, stagnation_intensity, kzm_damping, loss_velocity);
|
||||
|
||||
// Bandit-style Early Stopping
|
||||
if (step_counter_ % 5 == 0 && loss_velocity > 0.05f) {
|
||||
Telemetry::get().warning("early_stopping_poor_controller",
|
||||
"controller_id=" + std::to_string(active_controller.id) + " velocity=" + std::to_string(loss_velocity));
|
||||
Telemetry::get().warning(
|
||||
"early_stopping_poor_controller",
|
||||
"controller_id=" + std::to_string(active_controller.id) +
|
||||
" velocity=" + std::to_string(loss_velocity));
|
||||
evolution_manager_->steps_active = evolution_manager_->selection_interval;
|
||||
}
|
||||
|
||||
if (torch::isnan(actions).any().item<bool>()) {
|
||||
Telemetry::get().error("controller_nan_actions", "NaN actions returned by controller ID " + std::to_string(active_controller.id));
|
||||
Telemetry::get().error("controller_nan_actions",
|
||||
"NaN actions returned by controller ID " +
|
||||
std::to_string(active_controller.id));
|
||||
population_.kill(active_controller);
|
||||
auto& new_controller = evolution_manager_->get_active_controller();
|
||||
auto &new_controller = evolution_manager_->get_active_controller();
|
||||
actions = torch::zeros_like(actions);
|
||||
for (int i = 0; i < actions.size(0); ++i) {
|
||||
actions[i][0] = 0.5f; // log_mult default
|
||||
@@ -203,11 +210,7 @@ torch::Tensor FCESOptimizer::step(LossClosure closure) {
|
||||
// 5. Evolution & Maintenance
|
||||
if (current_loss_val > 0.0f) {
|
||||
evolution_manager_->update_population_dynamics(
|
||||
loss_velocity,
|
||||
ema_loss_,
|
||||
step_counter_,
|
||||
config_.total_steps
|
||||
);
|
||||
loss_velocity, ema_loss_, step_counter_, config_.total_steps);
|
||||
}
|
||||
|
||||
if (step_counter_ % 50 == 0) {
|
||||
@@ -220,9 +223,13 @@ torch::Tensor FCESOptimizer::step(LossClosure closure) {
|
||||
void FCESOptimizer::update_fitness(float loss) {
|
||||
// 1. Divergence Safety
|
||||
bool is_nan = std::isnan(loss) || !std::isfinite(loss);
|
||||
bool is_spike = (step_counter_ > 1) && (ema_loss_ > 0.0f) && (loss > config_.rollback_threshold * ema_loss_) && (ema_loss_ > 0.1f);
|
||||
bool is_spike = (step_counter_ > 1) && (ema_loss_ > 0.0f) &&
|
||||
(loss > config_.rollback_threshold * ema_loss_) &&
|
||||
(ema_loss_ > 0.1f);
|
||||
if (is_nan || is_spike) {
|
||||
Telemetry::get().warning("divergence_detected", "loss=" + std::to_string(loss) + " ema=" + std::to_string(ema_loss_));
|
||||
Telemetry::get().warning("divergence_detected",
|
||||
"loss=" + std::to_string(loss) +
|
||||
" ema=" + std::to_string(ema_loss_));
|
||||
handle_rollback();
|
||||
return;
|
||||
}
|
||||
@@ -245,14 +252,18 @@ void FCESOptimizer::update_fitness(float loss) {
|
||||
float grad_mean = grad_norm_tracker_.get_mean();
|
||||
float grad_cv = grad_std / (grad_mean + 1e-8f);
|
||||
|
||||
float raw_rank = (spectral_sensor_) ? spectral_sensor_->get_global_rank() : 0.0f;
|
||||
float raw_rank =
|
||||
(spectral_sensor_) ? spectral_sensor_->get_global_rank() : 0.0f;
|
||||
float csr_factor = 1.0f;
|
||||
if (config_.csr_enabled) {
|
||||
if (step_counter_ < config_.csr_warmup_steps) {
|
||||
csr_factor = 0.0f;
|
||||
} else {
|
||||
float steps_since_warmup = static_cast<float>(step_counter_ - config_.csr_warmup_steps);
|
||||
csr_factor = std::min(1.0f, steps_since_warmup / std::max(1.0f, static_cast<float>(config_.csr_ramp_steps)));
|
||||
float steps_since_warmup =
|
||||
static_cast<float>(step_counter_ - config_.csr_warmup_steps);
|
||||
csr_factor = std::min(
|
||||
1.0f, steps_since_warmup /
|
||||
std::max(1.0f, static_cast<float>(config_.csr_ramp_steps)));
|
||||
}
|
||||
}
|
||||
float effective_rank = config_.csr_enabled ? raw_rank * csr_factor : raw_rank;
|
||||
@@ -282,7 +293,7 @@ void FCESOptimizer::update_fitness(float loss) {
|
||||
}
|
||||
|
||||
// 5. Apply to Population
|
||||
auto& active_controller = evolution_manager_->get_active_controller();
|
||||
auto &active_controller = evolution_manager_->get_active_controller();
|
||||
population_.update_controller_fitness(active_controller, final_fitness);
|
||||
|
||||
Telemetry::get().info("fitness_calculated",
|
||||
@@ -293,8 +304,8 @@ void FCESOptimizer::update_fitness(float loss) {
|
||||
|
||||
void FCESOptimizer::backup_to_ram() {
|
||||
ram_backup_.clear();
|
||||
for (auto& group : param_groups()) {
|
||||
for (auto& p : group.params()) {
|
||||
for (auto &group : param_groups()) {
|
||||
for (auto &p : group.params()) {
|
||||
ram_backup_.push_back(p.data().clone().cpu());
|
||||
}
|
||||
}
|
||||
@@ -302,8 +313,8 @@ void FCESOptimizer::backup_to_ram() {
|
||||
|
||||
void FCESOptimizer::restore_from_ram() {
|
||||
int idx = 0;
|
||||
for (auto& group : param_groups()) {
|
||||
for (auto& p : group.params()) {
|
||||
for (auto &group : param_groups()) {
|
||||
for (auto &p : group.params()) {
|
||||
if (idx < static_cast<int>(ram_backup_.size())) {
|
||||
p.data().copy_(ram_backup_[idx].to(p.device()));
|
||||
idx++;
|
||||
@@ -314,8 +325,8 @@ void FCESOptimizer::restore_from_ram() {
|
||||
|
||||
float FCESOptimizer::calculate_sparsity() const {
|
||||
int64_t total = 0, zeros = 0;
|
||||
for (const auto& group : param_groups()) {
|
||||
for (const auto& p : group.params()) {
|
||||
for (const auto &group : param_groups()) {
|
||||
for (const auto &p : group.params()) {
|
||||
total += p.numel();
|
||||
zeros += (p.data().abs() < 1e-5f).sum().item<int64_t>();
|
||||
}
|
||||
@@ -331,15 +342,16 @@ void FCESOptimizer::gather_stats() {
|
||||
bool has_nan_or_inf = false;
|
||||
float max_grad_norm = 0.0f;
|
||||
|
||||
for (auto& group : param_groups()) {
|
||||
for (auto& p : group.params()) {
|
||||
for (auto &group : param_groups()) {
|
||||
for (auto &p : group.params()) {
|
||||
if (!p.grad().defined()) {
|
||||
param_group_mapping_.push_back(-1);
|
||||
continue;
|
||||
}
|
||||
|
||||
auto grad = p.grad();
|
||||
if (torch::isnan(grad).any().item<bool>() || torch::isinf(grad).any().item<bool>()) {
|
||||
if (torch::isnan(grad).any().item<bool>() ||
|
||||
torch::isinf(grad).any().item<bool>()) {
|
||||
has_nan_or_inf = true;
|
||||
}
|
||||
|
||||
@@ -355,11 +367,14 @@ void FCESOptimizer::gather_stats() {
|
||||
|
||||
int64_t total_elements = grad.numel();
|
||||
int64_t zeros = (grad.abs() < 1e-5f).sum().item<int64_t>();
|
||||
float sparsity = (total_elements > 0) ? static_cast<float>(zeros) / total_elements : 0.0f;
|
||||
float sparsity = (total_elements > 0)
|
||||
? static_cast<float>(zeros) / total_elements
|
||||
: 0.0f;
|
||||
|
||||
int layer_type = classify_layer_by_shape(p);
|
||||
int group_idx = static_cast<int>(layer_stats_.size());
|
||||
layer_stats_.push_back({grad_norm, sparsity, static_cast<float>(layer_type)});
|
||||
layer_stats_.push_back(
|
||||
{grad_norm, sparsity, static_cast<float>(layer_type)});
|
||||
param_group_mapping_.push_back(group_idx);
|
||||
|
||||
if (spectral_sensor_ && p.dim() >= 2) {
|
||||
@@ -373,17 +388,20 @@ void FCESOptimizer::gather_stats() {
|
||||
|
||||
if (has_nan_or_inf) {
|
||||
Telemetry::get().error("poisoned_gradients_detected",
|
||||
"NaN/Inf detected in gradients during step " + std::to_string(step_counter_));
|
||||
"NaN/Inf detected in gradients during step " +
|
||||
std::to_string(step_counter_));
|
||||
handle_rollback();
|
||||
return;
|
||||
}
|
||||
|
||||
if (step_counter_ == 1 && max_grad_norm > 1.0f) {
|
||||
float safe_lr = 0.01f / (max_grad_norm + 1e-8f);
|
||||
for (auto& group : param_groups()) {
|
||||
for (auto &group : param_groups()) {
|
||||
if (group.options().get_lr() > safe_lr) {
|
||||
Telemetry::get().info("auto_calibration_throttled_lr",
|
||||
"old=" + std::to_string(group.options().get_lr()) + " new=" + std::to_string(safe_lr));
|
||||
Telemetry::get().info(
|
||||
"auto_calibration_throttled_lr",
|
||||
"old=" + std::to_string(group.options().get_lr()) +
|
||||
" new=" + std::to_string(safe_lr));
|
||||
group.options().set_lr(safe_lr);
|
||||
config_.lr = safe_lr;
|
||||
}
|
||||
@@ -396,18 +414,18 @@ void FCESOptimizer::gather_stats() {
|
||||
}
|
||||
}
|
||||
|
||||
void FCESOptimizer::apply_parameter_updates(const torch::Tensor& actions) {
|
||||
void FCESOptimizer::apply_parameter_updates(const torch::Tensor &actions) {
|
||||
int param_idx = 0;
|
||||
float parasitic_accum = 0.0f;
|
||||
int count_updated = 0;
|
||||
|
||||
auto& active_controller = evolution_manager_->get_active_controller();
|
||||
auto &active_controller = evolution_manager_->get_active_controller();
|
||||
|
||||
for (auto& group : param_groups()) {
|
||||
for (auto &group : param_groups()) {
|
||||
float lr = static_cast<float>(group.options().get_lr());
|
||||
float wd = config_.weight_decay;
|
||||
|
||||
for (auto& p : group.params()) {
|
||||
for (auto &p : group.params()) {
|
||||
if (!p.grad().defined()) {
|
||||
param_idx++;
|
||||
continue;
|
||||
@@ -421,7 +439,8 @@ void FCESOptimizer::apply_parameter_updates(const torch::Tensor& actions) {
|
||||
|
||||
float mult = actions[g_idx][0].item<float>();
|
||||
float sign_gate = actions[g_idx][1].item<float>();
|
||||
float wd_mult = (actions.size(1) > 2) ? actions[g_idx][2].item<float>() : 1.0f;
|
||||
float wd_mult =
|
||||
(actions.size(1) > 2) ? actions[g_idx][2].item<float>() : 1.0f;
|
||||
|
||||
bool use_sign = sign_gate > 0.0f;
|
||||
if (config_.ablation_mode == "force_sign") {
|
||||
@@ -445,7 +464,8 @@ void FCESOptimizer::apply_parameter_updates(const torch::Tensor& actions) {
|
||||
p.data().add_(update);
|
||||
|
||||
if (config_.parasitic_mode) {
|
||||
parasitic_accum += calculate_parasitic_reward(p, mult, grad_norm_tracker_);
|
||||
parasitic_accum +=
|
||||
calculate_parasitic_reward(p, mult, grad_norm_tracker_);
|
||||
}
|
||||
|
||||
param_idx++;
|
||||
@@ -455,7 +475,8 @@ void FCESOptimizer::apply_parameter_updates(const torch::Tensor& actions) {
|
||||
|
||||
if (config_.parasitic_mode && count_updated > 0) {
|
||||
float reward = parasitic_accum / static_cast<float>(count_updated);
|
||||
population_.update_controller_fitness(active_controller, reward * 10.0f, false);
|
||||
population_.update_controller_fitness(active_controller, reward * 10.0f,
|
||||
false);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#include "fces/oscillation.hpp"
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <numeric>
|
||||
#include <algorithm>
|
||||
|
||||
namespace fces {
|
||||
|
||||
@@ -17,7 +17,8 @@ bool OscillationDetector::detect() const {
|
||||
}
|
||||
|
||||
float OscillationDetector::get_score() const {
|
||||
if (static_cast<int>(loss_history_.size()) < WINDOW_SIZE) return 0.0f;
|
||||
if (static_cast<int>(loss_history_.size()) < WINDOW_SIZE)
|
||||
return 0.0f;
|
||||
|
||||
auto detrended = detrend(loss_history_);
|
||||
auto power = compute_power_spectrum(detrended);
|
||||
@@ -34,12 +35,14 @@ float OscillationDetector::get_score() const {
|
||||
}
|
||||
}
|
||||
|
||||
if (total_power < 1e-8f) return 0.0f;
|
||||
if (total_power < 1e-8f)
|
||||
return 0.0f;
|
||||
return osc_power / total_power;
|
||||
}
|
||||
|
||||
float OscillationDetector::get_variance_50() const {
|
||||
if (loss_history_.size() < 50) return 0.0f;
|
||||
if (loss_history_.size() < 50)
|
||||
return 0.0f;
|
||||
auto start = loss_history_.end() - 50;
|
||||
float mean = std::accumulate(start, loss_history_.end(), 0.0f) / 50.0f;
|
||||
float var = 0.0f;
|
||||
@@ -50,13 +53,13 @@ float OscillationDetector::get_variance_50() const {
|
||||
return var / 50.0f;
|
||||
}
|
||||
|
||||
void OscillationDetector::reset() {
|
||||
loss_history_.clear();
|
||||
}
|
||||
void OscillationDetector::reset() { loss_history_.clear(); }
|
||||
|
||||
std::vector<float> OscillationDetector::detrend(const std::vector<float>& signal) {
|
||||
std::vector<float>
|
||||
OscillationDetector::detrend(const std::vector<float> &signal) {
|
||||
int n = static_cast<int>(signal.size());
|
||||
if (n < 2) return signal;
|
||||
if (n < 2)
|
||||
return signal;
|
||||
|
||||
// Remove linear trend via least squares
|
||||
float sum_x = 0, sum_y = 0, sum_xy = 0, sum_xx = 0;
|
||||
@@ -66,7 +69,8 @@ std::vector<float> OscillationDetector::detrend(const std::vector<float>& signal
|
||||
sum_xy += i * signal[i];
|
||||
sum_xx += i * i;
|
||||
}
|
||||
float slope = (n * sum_xy - sum_x * sum_y) / (n * sum_xx - sum_x * sum_x + 1e-8f);
|
||||
float slope =
|
||||
(n * sum_xy - sum_x * sum_y) / (n * sum_xx - sum_x * sum_x + 1e-8f);
|
||||
float intercept = (sum_y - slope * sum_x) / n;
|
||||
|
||||
std::vector<float> result(n);
|
||||
@@ -76,8 +80,10 @@ std::vector<float> OscillationDetector::detrend(const std::vector<float>& signal
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<float> OscillationDetector::compute_power_spectrum(const std::vector<float>& signal) {
|
||||
// Simple DFT (for WINDOW_SIZE=64, this is fast enough; upgrade to FFT if needed)
|
||||
std::vector<float>
|
||||
OscillationDetector::compute_power_spectrum(const std::vector<float> &signal) {
|
||||
// Simple DFT (for WINDOW_SIZE=64, this is fast enough; upgrade to FFT if
|
||||
// needed)
|
||||
int n = static_cast<int>(signal.size());
|
||||
int half = n / 2;
|
||||
std::vector<float> power(half);
|
||||
|
||||
@@ -4,22 +4,20 @@
|
||||
|
||||
namespace fces {
|
||||
|
||||
Population::Population(
|
||||
int active_size, int repo_size, EliteStrategy elite_strategy,
|
||||
bool link_mutation, bool link_elite, bool link_violator,
|
||||
Population::Population(int active_size, int repo_size,
|
||||
EliteStrategy elite_strategy, bool link_mutation,
|
||||
bool link_elite, bool link_violator,
|
||||
bool use_fuzzy_pacer, bool use_fuzzy_importance,
|
||||
bool direct_construction, bool use_banach_fission
|
||||
)
|
||||
: elite_strategy_(elite_strategy),
|
||||
link_mutation_(link_mutation),
|
||||
link_elite_(link_elite),
|
||||
link_violator_(link_violator),
|
||||
bool direct_construction, bool use_banach_fission)
|
||||
: elite_strategy_(elite_strategy), link_mutation_(link_mutation),
|
||||
link_elite_(link_elite), link_violator_(link_violator),
|
||||
use_fuzzy_pacer_(use_fuzzy_pacer),
|
||||
use_fuzzy_importance_(use_fuzzy_importance),
|
||||
direct_construction_(direct_construction),
|
||||
use_banach_fission_(use_banach_fission) {
|
||||
|
||||
if (direct_construction) active_size = 1;
|
||||
if (direct_construction)
|
||||
active_size = 1;
|
||||
|
||||
gladiators_.reserve(active_size);
|
||||
for (int i = 0; i < active_size; ++i) {
|
||||
@@ -28,7 +26,7 @@ Population::Population(
|
||||
repository_.reserve(repo_size);
|
||||
}
|
||||
|
||||
FuzzyController& Population::get_active_controller() {
|
||||
FuzzyController &Population::get_active_controller() {
|
||||
if (active_controller_ == nullptr || steps_active_ >= selection_interval_) {
|
||||
active_controller_ = &select_weighted();
|
||||
steps_active_ = 0;
|
||||
@@ -37,37 +35,42 @@ FuzzyController& Population::get_active_controller() {
|
||||
return *active_controller_;
|
||||
}
|
||||
|
||||
FuzzyController& Population::select_weighted() {
|
||||
FuzzyController &Population::select_weighted() {
|
||||
static thread_local std::mt19937 rng{std::random_device{}()};
|
||||
if (gladiators_.empty()) {
|
||||
throw std::runtime_error("Empty gladiators population");
|
||||
}
|
||||
|
||||
float sum_fit = 0.0f;
|
||||
for (const auto& g : gladiators_) {
|
||||
for (const auto &g : gladiators_) {
|
||||
sum_fit += std::max(0.0f, g.fitness);
|
||||
}
|
||||
if (sum_fit == 0.0f) {
|
||||
std::uniform_int_distribution<int> dist(0, static_cast<int>(gladiators_.size()) - 1);
|
||||
std::uniform_int_distribution<int> dist(
|
||||
0, static_cast<int>(gladiators_.size()) - 1);
|
||||
return gladiators_[dist(rng)];
|
||||
}
|
||||
|
||||
// Select 3 random candidates for tournament
|
||||
std::uniform_int_distribution<int> dist(0, static_cast<int>(gladiators_.size()) - 1);
|
||||
std::uniform_int_distribution<int> dist(
|
||||
0, static_cast<int>(gladiators_.size()) - 1);
|
||||
int idx1 = dist(rng);
|
||||
int idx2 = dist(rng);
|
||||
int idx3 = dist(rng);
|
||||
|
||||
auto get_score = [this](const FuzzyController& c) {
|
||||
auto get_score = [this](const FuzzyController &c) {
|
||||
float base_score = c.fitness + (0.01f * static_cast<float>(c.age));
|
||||
// Add novelty score if archive has enough entries
|
||||
if (behavioral_archive_.size() >= 5) {
|
||||
float novelty = 0.0f;
|
||||
// Get behavioral vector: first 20 weights
|
||||
std::vector<float> behavior(c.genome.weights.begin(), c.genome.weights.begin() + std::min(20, static_cast<int>(c.genome.weights.size())));
|
||||
std::vector<float> behavior(
|
||||
c.genome.weights.begin(),
|
||||
c.genome.weights.begin() +
|
||||
std::min(20, static_cast<int>(c.genome.weights.size())));
|
||||
std::vector<float> distances;
|
||||
distances.reserve(behavioral_archive_.size());
|
||||
for (const auto& archived : behavioral_archive_) {
|
||||
for (const auto &archived : behavioral_archive_) {
|
||||
float dist_sum = 0.0f;
|
||||
for (size_t i = 0; i < behavior.size() && i < archived.size(); ++i) {
|
||||
float diff = behavior[i] - archived[i];
|
||||
@@ -81,23 +84,24 @@ FuzzyController& Population::select_weighted() {
|
||||
for (int i = 0; i < k; ++i) {
|
||||
avg_dist += distances[i];
|
||||
}
|
||||
if (k > 0) avg_dist /= static_cast<float>(k);
|
||||
if (k > 0)
|
||||
avg_dist /= static_cast<float>(k);
|
||||
base_score += NOVELTY_WEIGHT * avg_dist;
|
||||
}
|
||||
return base_score;
|
||||
};
|
||||
|
||||
FuzzyController* best = &gladiators_[idx1];
|
||||
FuzzyController *best = &gladiators_[idx1];
|
||||
float best_score = get_score(*best);
|
||||
|
||||
FuzzyController* cand2 = &gladiators_[idx2];
|
||||
FuzzyController *cand2 = &gladiators_[idx2];
|
||||
float score2 = get_score(*cand2);
|
||||
if (score2 > best_score) {
|
||||
best = cand2;
|
||||
best_score = score2;
|
||||
}
|
||||
|
||||
FuzzyController* cand3 = &gladiators_[idx3];
|
||||
FuzzyController *cand3 = &gladiators_[idx3];
|
||||
float score3 = get_score(*cand3);
|
||||
if (score3 > best_score) {
|
||||
best = cand3;
|
||||
@@ -107,19 +111,20 @@ FuzzyController& Population::select_weighted() {
|
||||
return *best;
|
||||
}
|
||||
|
||||
FuzzyController& Population::get_best_active() {
|
||||
return *std::max_element(gladiators_.begin(), gladiators_.end(),
|
||||
[](const FuzzyController& a, const FuzzyController& b) {
|
||||
FuzzyController &Population::get_best_active() {
|
||||
return *std::max_element(
|
||||
gladiators_.begin(), gladiators_.end(),
|
||||
[](const FuzzyController &a, const FuzzyController &b) {
|
||||
return a.fitness < b.fitness;
|
||||
});
|
||||
}
|
||||
|
||||
FuzzyController& Population::get_worst_active() {
|
||||
FuzzyController &Population::get_worst_active() {
|
||||
auto elites = get_elites();
|
||||
std::vector<FuzzyController*> non_elites;
|
||||
for (auto& g : gladiators_) {
|
||||
std::vector<FuzzyController *> non_elites;
|
||||
for (auto &g : gladiators_) {
|
||||
bool is_elite = false;
|
||||
for (auto* e : elites) {
|
||||
for (auto *e : elites) {
|
||||
if (e->id == g.id) {
|
||||
is_elite = true;
|
||||
break;
|
||||
@@ -131,28 +136,33 @@ FuzzyController& Population::get_worst_active() {
|
||||
}
|
||||
|
||||
if (non_elites.empty()) {
|
||||
return *std::min_element(gladiators_.begin(), gladiators_.end(),
|
||||
[](const FuzzyController& a, const FuzzyController& b) {
|
||||
return *std::min_element(
|
||||
gladiators_.begin(), gladiators_.end(),
|
||||
[](const FuzzyController &a, const FuzzyController &b) {
|
||||
return a.fitness < b.fitness;
|
||||
});
|
||||
}
|
||||
|
||||
return **std::min_element(non_elites.begin(), non_elites.end(),
|
||||
[](const FuzzyController* a, const FuzzyController* b) {
|
||||
// cppcheck-suppress returnReference; False Positive: elements of non_elites
|
||||
// point to members of gladiators_
|
||||
return **std::min_element(
|
||||
non_elites.begin(), non_elites.end(),
|
||||
[](const FuzzyController *a, const FuzzyController *b) {
|
||||
return a->fitness < b->fitness;
|
||||
});
|
||||
}
|
||||
|
||||
void Population::kill(FuzzyController& controller) {
|
||||
void Population::kill(FuzzyController &controller) {
|
||||
auto elites = get_elites();
|
||||
for (auto* e : elites) {
|
||||
for (auto *e : elites) {
|
||||
if (e->id == controller.id) {
|
||||
return; // Elite protection
|
||||
}
|
||||
}
|
||||
|
||||
auto it = std::find_if(gladiators_.begin(), gladiators_.end(),
|
||||
[&](const FuzzyController& c) { return c.id == controller.id; });
|
||||
auto it = std::find_if(
|
||||
gladiators_.begin(), gladiators_.end(),
|
||||
[&](const FuzzyController &c) { return c.id == controller.id; });
|
||||
if (it != gladiators_.end()) {
|
||||
gladiators_.erase(it);
|
||||
if (gladiators_.empty()) {
|
||||
@@ -161,7 +171,8 @@ void Population::kill(FuzzyController& controller) {
|
||||
}
|
||||
}
|
||||
|
||||
void Population::update_controller_fitness(FuzzyController& controller, float reward, bool increment_eval) {
|
||||
void Population::update_controller_fitness(FuzzyController &controller,
|
||||
float reward, bool increment_eval) {
|
||||
if (increment_eval) {
|
||||
controller.age++;
|
||||
controller.evaluation_count++;
|
||||
@@ -177,7 +188,8 @@ void Population::update_controller_fitness(FuzzyController& controller, float re
|
||||
|
||||
if (elite_strategy_ == EliteStrategy::EMA) {
|
||||
constexpr float EMA_ALPHA = 0.1f;
|
||||
controller.ema_fitness = (1.0f - EMA_ALPHA) * controller.ema_fitness + EMA_ALPHA * reward;
|
||||
controller.ema_fitness =
|
||||
(1.0f - EMA_ALPHA) * controller.ema_fitness + EMA_ALPHA * reward;
|
||||
controller.fitness = reward;
|
||||
} else if (elite_strategy_ == EliteStrategy::Rolling) {
|
||||
controller.fitness = reward;
|
||||
@@ -186,44 +198,53 @@ void Population::update_controller_fitness(FuzzyController& controller, float re
|
||||
}
|
||||
}
|
||||
|
||||
void Population::mark_violated(FuzzyController& controller) {
|
||||
auto it = std::find_if(violated_controllers_.begin(), violated_controllers_.end(),
|
||||
[&](const FuzzyController& c) { return c.id == controller.id; });
|
||||
void Population::mark_violated(FuzzyController &controller) {
|
||||
auto it = std::find_if(
|
||||
violated_controllers_.begin(), violated_controllers_.end(),
|
||||
[&](const FuzzyController &c) { return c.id == controller.id; });
|
||||
if (it == violated_controllers_.end()) {
|
||||
violated_controllers_.push_back(controller);
|
||||
}
|
||||
}
|
||||
|
||||
float Population::get_effective_fitness(const FuzzyController& controller, float training_progress) const {
|
||||
float Population::get_effective_fitness(const FuzzyController &controller,
|
||||
float training_progress) const {
|
||||
float recent_avg = 0.0f;
|
||||
if (!controller.fitness_history.empty()) {
|
||||
float sum = 0.0f;
|
||||
for (float f : controller.fitness_history) sum += f;
|
||||
for (float f : controller.fitness_history)
|
||||
sum += f;
|
||||
recent_avg = sum / controller.fitness_history.size();
|
||||
}
|
||||
|
||||
float lifetime_avg = 0.0f;
|
||||
if (controller.evaluation_count > 0) {
|
||||
lifetime_avg = controller.lifetime_fitness / static_cast<float>(controller.evaluation_count);
|
||||
lifetime_avg = controller.lifetime_fitness /
|
||||
static_cast<float>(controller.evaluation_count);
|
||||
}
|
||||
|
||||
float alpha = 0.2f + 0.6f * training_progress;
|
||||
return alpha * recent_avg + (1.0f - alpha) * lifetime_avg;
|
||||
}
|
||||
|
||||
void Population::evolve(float current_loss, float velocity, float training_progress) {
|
||||
void Population::evolve(float current_loss, float velocity,
|
||||
float training_progress) {
|
||||
static thread_local std::mt19937 rng{std::random_device{}()};
|
||||
std::uniform_real_distribution<float> coin(0.0f, 1.0f);
|
||||
|
||||
if (gladiators_.empty()) return;
|
||||
if (gladiators_.empty())
|
||||
return;
|
||||
|
||||
FuzzyController& worst = get_worst_active();
|
||||
FuzzyController& best_active = get_best_active();
|
||||
FuzzyController &worst = get_worst_active();
|
||||
FuzzyController &best_active = get_best_active();
|
||||
auto elites = get_elites();
|
||||
|
||||
// Update behavioral archive for novelty search
|
||||
if (best_active.fitness > -999.0f) {
|
||||
std::vector<float> behavior(best_active.genome.weights.begin(), best_active.genome.weights.begin() + std::min(20, static_cast<int>(best_active.genome.weights.size())));
|
||||
std::vector<float> behavior(
|
||||
best_active.genome.weights.begin(),
|
||||
best_active.genome.weights.begin() +
|
||||
std::min(20, static_cast<int>(best_active.genome.weights.size())));
|
||||
behavioral_archive_.push_back(behavior);
|
||||
if (behavioral_archive_.size() > BEHAVIORAL_ARCHIVE_SIZE) {
|
||||
behavioral_archive_.erase(behavioral_archive_.begin());
|
||||
@@ -256,23 +277,26 @@ void Population::evolve(float current_loss, float velocity, float training_progr
|
||||
}
|
||||
float violator_prob = 0.1f;
|
||||
if (link_violator_) {
|
||||
violator_prob = std::max(0.0f, std::min(0.5f, (current_loss - 1.0f) / 4.0f));
|
||||
violator_prob =
|
||||
std::max(0.0f, std::min(0.5f, (current_loss - 1.0f) / 4.0f));
|
||||
}
|
||||
|
||||
// Select parent
|
||||
FuzzyController* parent = &best_active;
|
||||
std::vector<FuzzyController*> partner_pool;
|
||||
FuzzyController *parent = &best_active;
|
||||
std::vector<FuzzyController *> partner_pool;
|
||||
|
||||
float roll = coin(rng);
|
||||
if (roll < elite_prob && !elites.empty()) {
|
||||
std::uniform_int_distribution<int> elite_dist(0, static_cast<int>(elites.size()) - 1);
|
||||
std::uniform_int_distribution<int> elite_dist(
|
||||
0, static_cast<int>(elites.size()) - 1);
|
||||
parent = elites[elite_dist(rng)];
|
||||
partner_pool = elites;
|
||||
} else if (roll < elite_prob + violator_prob && !violated_controllers_.empty()) {
|
||||
} else if (roll < elite_prob + violator_prob &&
|
||||
!violated_controllers_.empty()) {
|
||||
parent = &best_active;
|
||||
// Filter living violators
|
||||
for (auto& v : violated_controllers_) {
|
||||
for (auto& g : gladiators_) {
|
||||
for (auto &v : violated_controllers_) {
|
||||
for (auto &g : gladiators_) {
|
||||
if (g.id == v.id) {
|
||||
partner_pool.push_back(&g);
|
||||
break;
|
||||
@@ -281,13 +305,15 @@ void Population::evolve(float current_loss, float velocity, float training_progr
|
||||
}
|
||||
if (partner_pool.empty()) {
|
||||
// Fallback
|
||||
for (size_t i = 0; i < std::min(static_cast<size_t>(10), gladiators_.size()); ++i) {
|
||||
for (size_t i = 0;
|
||||
i < std::min(static_cast<size_t>(10), gladiators_.size()); ++i) {
|
||||
partner_pool.push_back(&gladiators_[i]);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
parent = &best_active;
|
||||
for (size_t i = 0; i < std::min(static_cast<size_t>(10), gladiators_.size()); ++i) {
|
||||
for (size_t i = 0;
|
||||
i < std::min(static_cast<size_t>(10), gladiators_.size()); ++i) {
|
||||
partner_pool.push_back(&gladiators_[i]);
|
||||
}
|
||||
}
|
||||
@@ -295,11 +321,12 @@ void Population::evolve(float current_loss, float velocity, float training_progr
|
||||
// Crossover or mutation
|
||||
FuzzyController child;
|
||||
if (coin(rng) < 0.7f && partner_pool.size() > 1) {
|
||||
std::uniform_int_distribution<int> pool_dist(0, static_cast<int>(partner_pool.size()) - 1);
|
||||
FuzzyController* partner = partner_pool[pool_dist(rng)];
|
||||
std::uniform_int_distribution<int> pool_dist(
|
||||
0, static_cast<int>(partner_pool.size()) - 1);
|
||||
FuzzyController *partner = partner_pool[pool_dist(rng)];
|
||||
if (partner->id == parent->id) {
|
||||
// Pick another if possible
|
||||
for (auto* p : partner_pool) {
|
||||
for (auto *p : partner_pool) {
|
||||
if (p->id != parent->id) {
|
||||
partner = p;
|
||||
break;
|
||||
@@ -345,12 +372,13 @@ void Population::evolve(float current_loss, float velocity, float training_progr
|
||||
|
||||
// Banach-Tarski Fission
|
||||
if (use_banach_fission_ && coin(rng) < 0.2f && !elites.empty()) {
|
||||
auto* prime_elite = elites[0];
|
||||
auto fission_pair = prime_elite->banach_tarski_fission(phase_phoenix_intensity);
|
||||
auto *prime_elite = elites[0];
|
||||
auto fission_pair =
|
||||
prime_elite->banach_tarski_fission(phase_phoenix_intensity);
|
||||
|
||||
// Find second worst
|
||||
FuzzyController* second_worst = nullptr;
|
||||
for (auto& g : gladiators_) {
|
||||
FuzzyController *second_worst = nullptr;
|
||||
for (auto &g : gladiators_) {
|
||||
if (g.id != worst.id) {
|
||||
if (second_worst == nullptr || g.fitness < second_worst->fitness) {
|
||||
second_worst = &g;
|
||||
@@ -361,7 +389,9 @@ void Population::evolve(float current_loss, float velocity, float training_progr
|
||||
// Replace worst and second_worst with plus and minus child
|
||||
if (second_worst) {
|
||||
uint64_t sw_id = second_worst->id;
|
||||
auto it = std::find_if(gladiators_.begin(), gladiators_.end(), [&](const FuzzyController& c) { return c.id == sw_id; });
|
||||
auto it =
|
||||
std::find_if(gladiators_.begin(), gladiators_.end(),
|
||||
[&](const FuzzyController &c) { return c.id == sw_id; });
|
||||
if (it != gladiators_.end()) {
|
||||
gladiators_.erase(it);
|
||||
}
|
||||
@@ -369,7 +399,9 @@ void Population::evolve(float current_loss, float velocity, float training_progr
|
||||
}
|
||||
|
||||
uint64_t w_id = worst.id;
|
||||
auto it = std::find_if(gladiators_.begin(), gladiators_.end(), [&](const FuzzyController& c) { return c.id == w_id; });
|
||||
auto it =
|
||||
std::find_if(gladiators_.begin(), gladiators_.end(),
|
||||
[&](const FuzzyController &c) { return c.id == w_id; });
|
||||
if (it != gladiators_.end()) {
|
||||
gladiators_.erase(it);
|
||||
}
|
||||
@@ -377,14 +409,17 @@ void Population::evolve(float current_loss, float velocity, float training_progr
|
||||
} else {
|
||||
// Phoenix Rebirth or Standard replacement
|
||||
uint64_t w_id = worst.id;
|
||||
auto it = std::find_if(gladiators_.begin(), gladiators_.end(), [&](const FuzzyController& c) { return c.id == w_id; });
|
||||
auto it =
|
||||
std::find_if(gladiators_.begin(), gladiators_.end(),
|
||||
[&](const FuzzyController &c) { return c.id == w_id; });
|
||||
if (it != gladiators_.end()) {
|
||||
gladiators_.erase(it);
|
||||
}
|
||||
|
||||
if (coin(rng) < 0.1f && !elites.empty()) {
|
||||
auto* prime_elite = elites[0];
|
||||
gladiators_.push_back(prime_elite->create_orthogonal_child(phase_phoenix_intensity));
|
||||
auto *prime_elite = elites[0];
|
||||
gladiators_.push_back(
|
||||
prime_elite->create_orthogonal_child(phase_phoenix_intensity));
|
||||
} else {
|
||||
gladiators_.push_back(child);
|
||||
}
|
||||
@@ -395,7 +430,7 @@ void Population::evolve(float current_loss, float velocity, float training_progr
|
||||
reset_step_counter_++;
|
||||
if (reset_step_counter_ >= 500) {
|
||||
reset_step_counter_ = 0;
|
||||
for (auto& g : gladiators_) {
|
||||
for (auto &g : gladiators_) {
|
||||
g.fitness = 0.0f;
|
||||
g.ema_fitness = 0.0f;
|
||||
g.fitness_history.clear();
|
||||
@@ -411,33 +446,35 @@ void Population::evolve(float current_loss, float velocity, float training_progr
|
||||
|
||||
void Population::resize(int target_size, float training_progress) {
|
||||
int current_size = static_cast<int>(gladiators_.size());
|
||||
if (current_size == target_size) return;
|
||||
if (current_size == target_size)
|
||||
return;
|
||||
|
||||
static thread_local std::mt19937 rng{std::random_device{}()};
|
||||
|
||||
if (current_size < target_size) {
|
||||
int needed = target_size - current_size;
|
||||
bool has_eval = false;
|
||||
for (const auto& g : gladiators_) {
|
||||
for (const auto &g : gladiators_) {
|
||||
if (g.evaluation_count > 0) {
|
||||
has_eval = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (has_eval) {
|
||||
std::vector<std::pair<float, FuzzyController*>> candidates;
|
||||
for (auto& g : gladiators_) {
|
||||
std::vector<std::pair<float, FuzzyController *>> candidates;
|
||||
for (auto &g : gladiators_) {
|
||||
candidates.push_back({get_effective_fitness(g, training_progress), &g});
|
||||
}
|
||||
std::sort(candidates.begin(), candidates.end(),
|
||||
[](const std::pair<float, FuzzyController*>& a, const std::pair<float, FuzzyController*>& b) {
|
||||
[](const std::pair<float, FuzzyController *> &a,
|
||||
const std::pair<float, FuzzyController *> &b) {
|
||||
return a.first > b.first;
|
||||
});
|
||||
|
||||
int limit = std::min(10, static_cast<int>(candidates.size()));
|
||||
std::uniform_int_distribution<int> cand_dist(0, limit - 1);
|
||||
for (int i = 0; i < needed; ++i) {
|
||||
FuzzyController* parent = candidates[cand_dist(rng)].second;
|
||||
FuzzyController *parent = candidates[cand_dist(rng)].second;
|
||||
float mutation_str = 0.1f;
|
||||
auto child = parent->mutate(mutation_str, 1.0f);
|
||||
|
||||
@@ -454,9 +491,9 @@ void Population::resize(int target_size, float training_progress) {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
std::vector<FuzzyController*> evaluated;
|
||||
std::vector<FuzzyController*> unevaluated;
|
||||
for (auto& g : gladiators_) {
|
||||
std::vector<FuzzyController *> evaluated;
|
||||
std::vector<FuzzyController *> unevaluated;
|
||||
for (auto &g : gladiators_) {
|
||||
if (g.evaluation_count > 0) {
|
||||
evaluated.push_back(&g);
|
||||
} else {
|
||||
@@ -465,17 +502,21 @@ void Population::resize(int target_size, float training_progress) {
|
||||
}
|
||||
|
||||
std::sort(evaluated.begin(), evaluated.end(),
|
||||
[this, training_progress](const FuzzyController* a, const FuzzyController* b) {
|
||||
return get_effective_fitness(*a, training_progress) > get_effective_fitness(*b, training_progress);
|
||||
[this, training_progress](const FuzzyController *a,
|
||||
const FuzzyController *b) {
|
||||
return get_effective_fitness(*a, training_progress) >
|
||||
get_effective_fitness(*b, training_progress);
|
||||
});
|
||||
|
||||
std::vector<FuzzyController> new_pop;
|
||||
new_pop.reserve(target_size);
|
||||
for (int i = 0; i < std::min(target_size, static_cast<int>(evaluated.size())); ++i) {
|
||||
for (int i = 0;
|
||||
i < std::min(target_size, static_cast<int>(evaluated.size())); ++i) {
|
||||
new_pop.push_back(*evaluated[i]);
|
||||
}
|
||||
int remaining = target_size - static_cast<int>(new_pop.size());
|
||||
for (int i = 0; i < std::min(remaining, static_cast<int>(unevaluated.size())); ++i) {
|
||||
for (int i = 0;
|
||||
i < std::min(remaining, static_cast<int>(unevaluated.size())); ++i) {
|
||||
new_pop.push_back(*unevaluated[i]);
|
||||
}
|
||||
|
||||
@@ -489,14 +530,16 @@ void Population::calm_down() {
|
||||
}
|
||||
|
||||
float Population::get_diversity_index() const {
|
||||
if (gladiators_.size() < 2) return 0.0f;
|
||||
if (gladiators_.size() < 2)
|
||||
return 0.0f;
|
||||
float sum_dist = 0.0f;
|
||||
int count = 0;
|
||||
for (size_t i = 0; i < gladiators_.size(); ++i) {
|
||||
for (size_t j = i + 1; j < gladiators_.size(); ++j) {
|
||||
float dist_sq = 0.0f;
|
||||
for (size_t w = 0; w < GENOME_SIZE; ++w) {
|
||||
float diff = gladiators_[i].genome.weights[w] - gladiators_[j].genome.weights[w];
|
||||
float diff =
|
||||
gladiators_[i].genome.weights[w] - gladiators_[j].genome.weights[w];
|
||||
dist_sq += diff * diff;
|
||||
}
|
||||
sum_dist += std::sqrt(dist_sq);
|
||||
@@ -506,28 +549,30 @@ float Population::get_diversity_index() const {
|
||||
return sum_dist / static_cast<float>(count);
|
||||
}
|
||||
|
||||
std::vector<FuzzyController*> Population::get_elites() {
|
||||
std::vector<FuzzyController *> Population::get_elites() {
|
||||
if (gladiators_.size() <= static_cast<size_t>(ELITE_COUNT)) {
|
||||
std::vector<FuzzyController*> ptrs;
|
||||
std::vector<FuzzyController *> ptrs;
|
||||
ptrs.reserve(gladiators_.size());
|
||||
for (auto& g : gladiators_) {
|
||||
for (auto &g : gladiators_) {
|
||||
ptrs.push_back(&g);
|
||||
}
|
||||
return ptrs;
|
||||
}
|
||||
|
||||
std::vector<std::pair<float, FuzzyController*>> candidates;
|
||||
std::vector<std::pair<float, FuzzyController *>> candidates;
|
||||
candidates.reserve(gladiators_.size());
|
||||
for (auto& g : gladiators_) {
|
||||
for (auto &g : gladiators_) {
|
||||
float effective_fitness = 0.0f;
|
||||
if (elite_strategy_ == EliteStrategy::AgePenalty) {
|
||||
effective_fitness = g.fitness / std::log(static_cast<float>(g.age) + 2.0f);
|
||||
effective_fitness =
|
||||
g.fitness / std::log(static_cast<float>(g.age) + 2.0f);
|
||||
} else if (elite_strategy_ == EliteStrategy::EMA) {
|
||||
effective_fitness = g.ema_fitness;
|
||||
} else if (elite_strategy_ == EliteStrategy::Rolling) {
|
||||
if (!g.fitness_history.empty()) {
|
||||
float sum = 0.0f;
|
||||
for (float f : g.fitness_history) sum += f;
|
||||
for (float f : g.fitness_history)
|
||||
sum += f;
|
||||
effective_fitness = sum / g.fitness_history.size();
|
||||
} else {
|
||||
effective_fitness = g.fitness;
|
||||
@@ -539,11 +584,12 @@ std::vector<FuzzyController*> Population::get_elites() {
|
||||
}
|
||||
|
||||
std::sort(candidates.begin(), candidates.end(),
|
||||
[](const std::pair<float, FuzzyController*>& a, const std::pair<float, FuzzyController*>& b) {
|
||||
[](const std::pair<float, FuzzyController *> &a,
|
||||
const std::pair<float, FuzzyController *> &b) {
|
||||
return a.first > b.first;
|
||||
});
|
||||
|
||||
std::vector<FuzzyController*> elites;
|
||||
std::vector<FuzzyController *> elites;
|
||||
elites.reserve(ELITE_COUNT);
|
||||
for (int i = 0; i < ELITE_COUNT; ++i) {
|
||||
elites.push_back(candidates[i].second);
|
||||
@@ -551,9 +597,10 @@ std::vector<FuzzyController*> Population::get_elites() {
|
||||
return elites;
|
||||
}
|
||||
|
||||
void Population::add_to_repository(const FuzzyController& controller) {
|
||||
auto it = std::lower_bound(repository_.begin(), repository_.end(), controller,
|
||||
[](const FuzzyController& a, const FuzzyController& b) {
|
||||
void Population::add_to_repository(const FuzzyController &controller) {
|
||||
auto it =
|
||||
std::lower_bound(repository_.begin(), repository_.end(), controller,
|
||||
[](const FuzzyController &a, const FuzzyController &b) {
|
||||
return a.fitness > b.fitness;
|
||||
});
|
||||
repository_.insert(it, controller);
|
||||
|
||||
@@ -3,28 +3,28 @@
|
||||
|
||||
namespace fces {
|
||||
|
||||
SpectralSensor::SpectralSensor(torch::nn::Module& /*model*/) {}
|
||||
SpectralSensor::SpectralSensor(torch::nn::Module & /*model*/) {}
|
||||
|
||||
void SpectralSensor::track_layer(const std::string& name, const torch::Tensor& weight) {
|
||||
void SpectralSensor::track_layer(const std::string &name,
|
||||
const torch::Tensor &weight) {
|
||||
if (weight.dim() >= 2) {
|
||||
layer_ranks_[name] = compute_effective_rank(weight);
|
||||
}
|
||||
}
|
||||
|
||||
float SpectralSensor::get_global_rank() const {
|
||||
if (layer_ranks_.empty()) return 0.0f;
|
||||
if (layer_ranks_.empty())
|
||||
return 0.0f;
|
||||
float sum = 0.0f;
|
||||
for (const auto& [_, rank] : layer_ranks_) {
|
||||
for (const auto &[_, rank] : layer_ranks_) {
|
||||
sum += rank;
|
||||
}
|
||||
return sum / static_cast<float>(layer_ranks_.size());
|
||||
}
|
||||
|
||||
void SpectralSensor::reset() {
|
||||
layer_ranks_.clear();
|
||||
}
|
||||
void SpectralSensor::reset() { layer_ranks_.clear(); }
|
||||
|
||||
float SpectralSensor::compute_effective_rank(const torch::Tensor& weight) {
|
||||
float SpectralSensor::compute_effective_rank(const torch::Tensor &weight) {
|
||||
// SVD-based effective rank (Shannon entropy of normalized singular values)
|
||||
auto svd_result = torch::svd(weight.to(torch::kFloat32));
|
||||
auto svd = std::get<1>(svd_result);
|
||||
@@ -34,7 +34,8 @@ float SpectralSensor::compute_effective_rank(const torch::Tensor& weight) {
|
||||
return std::exp(entropy);
|
||||
}
|
||||
|
||||
float SpectralController::compute_alpha(float global_rank, float grokking_coefficient) const {
|
||||
float SpectralController::compute_alpha(float global_rank,
|
||||
float grokking_coefficient) const {
|
||||
return global_rank * grokking_coefficient;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,29 +1,32 @@
|
||||
#include "fces/telemetry.hpp"
|
||||
#include <iostream>
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
|
||||
namespace fces {
|
||||
|
||||
Telemetry& Telemetry::get() {
|
||||
Telemetry &Telemetry::get() {
|
||||
static Telemetry instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
void Telemetry::info(const std::string& event, const std::string& detail) {
|
||||
void Telemetry::info(const std::string &event, const std::string &detail) {
|
||||
std::cout << "[INFO] " << event;
|
||||
if (!detail.empty()) std::cout << " | " << detail;
|
||||
if (!detail.empty())
|
||||
std::cout << " | " << detail;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
void Telemetry::warning(const std::string& event, const std::string& detail) {
|
||||
void Telemetry::warning(const std::string &event, const std::string &detail) {
|
||||
std::cerr << "[WARN] " << event;
|
||||
if (!detail.empty()) std::cerr << " | " << detail;
|
||||
if (!detail.empty())
|
||||
std::cerr << " | " << detail;
|
||||
std::cerr << std::endl;
|
||||
}
|
||||
|
||||
void Telemetry::error(const std::string& event, const std::string& detail) {
|
||||
void Telemetry::error(const std::string &event, const std::string &detail) {
|
||||
std::cerr << "[ERROR] " << event;
|
||||
if (!detail.empty()) std::cerr << " | " << detail;
|
||||
if (!detail.empty())
|
||||
std::cerr << " | " << detail;
|
||||
std::cerr << std::endl;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include "fces/controller.hpp"
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
using namespace fces;
|
||||
|
||||
@@ -40,7 +40,8 @@ TEST(ControllerTest, Crossover) {
|
||||
TEST(ControllerTest, DecideUpdate) {
|
||||
FuzzyController ctrl;
|
||||
std::vector<std::vector<float>> stats = {{0.1f, 0.2f, 0.3f, 0.4f, 0.5f}};
|
||||
auto actions = ctrl.decide_update(stats, 0.0f, 0.5f, 0.0f, 0.1f, 0.0f, 0.0f, 1.0f, 0.0f);
|
||||
auto actions =
|
||||
ctrl.decide_update(stats, 0.0f, 0.5f, 0.0f, 0.1f, 0.0f, 0.0f, 1.0f, 0.0f);
|
||||
EXPECT_EQ(actions.size(0), 1);
|
||||
EXPECT_EQ(actions.size(1), GENOME_OUTPUT_DIM);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include "fces/fitness.hpp"
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
using namespace fces;
|
||||
|
||||
@@ -14,7 +14,8 @@ TEST(RunningStatsTest, BasicUpdate) {
|
||||
|
||||
TEST(RunningStatsTest, ZScore) {
|
||||
RunningStats stats;
|
||||
for (int i = 0; i < 100; ++i) stats.update(static_cast<float>(i));
|
||||
for (int i = 0; i < 100; ++i)
|
||||
stats.update(static_cast<float>(i));
|
||||
float z = stats.z_score(50.0f);
|
||||
EXPECT_NEAR(z, 0.0f, 0.1f);
|
||||
}
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
#include "fces/optimizer.hpp"
|
||||
#include <gtest/gtest.h>
|
||||
#include <torch/torch.h>
|
||||
#include "fces/optimizer.hpp"
|
||||
|
||||
using namespace fces;
|
||||
|
||||
TEST(OptimizerTest, Construction) {
|
||||
auto model = torch::nn::Linear(10, 5);
|
||||
std::vector<torch::Tensor> params;
|
||||
for (auto& p : model->parameters()) params.push_back(p);
|
||||
for (auto &p : model->parameters())
|
||||
params.push_back(p);
|
||||
|
||||
FCESOptimizer opt(params, FCESConfig{}.set_lr(1e-3f));
|
||||
EXPECT_EQ(opt.step_count(), 0);
|
||||
@@ -16,7 +17,8 @@ TEST(OptimizerTest, Construction) {
|
||||
TEST(OptimizerTest, StepUpdatesCounter) {
|
||||
auto model = torch::nn::Linear(10, 5);
|
||||
std::vector<torch::Tensor> params;
|
||||
for (auto& p : model->parameters()) params.push_back(p);
|
||||
for (auto &p : model->parameters())
|
||||
params.push_back(p);
|
||||
|
||||
FCESOptimizer opt(params, FCESConfig{}.set_lr(1e-3f));
|
||||
|
||||
@@ -33,7 +35,8 @@ TEST(OptimizerTest, StepUpdatesCounter) {
|
||||
TEST(OptimizerTest, UpdateFitness) {
|
||||
auto model = torch::nn::Linear(10, 5);
|
||||
std::vector<torch::Tensor> params;
|
||||
for (auto& p : model->parameters()) params.push_back(p);
|
||||
for (auto &p : model->parameters())
|
||||
params.push_back(p);
|
||||
|
||||
FCESOptimizer opt(params);
|
||||
opt.update_fitness(3.0f);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include "fces/population.hpp"
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
using namespace fces;
|
||||
|
||||
@@ -9,14 +9,14 @@ TEST(PopulationTest, Construction) {
|
||||
}
|
||||
|
||||
TEST(PopulationTest, DirectConstruction) {
|
||||
Population pop(200, 10000, EliteStrategy::Cumulative,
|
||||
false, false, false, false, false, true);
|
||||
Population pop(200, 10000, EliteStrategy::Cumulative, false, false, false,
|
||||
false, false, true);
|
||||
EXPECT_EQ(pop.size(), 1);
|
||||
}
|
||||
|
||||
TEST(PopulationTest, GetBestActive) {
|
||||
Population pop(10);
|
||||
auto& best = pop.get_best_active();
|
||||
auto &best = pop.get_best_active();
|
||||
// Should not crash
|
||||
EXPECT_GE(best.id, 0u);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user