style: run clang-format and configure pre-commit hooks
This commit is contained in:
@@ -18,65 +18,80 @@ namespace fces {
|
||||
* All fields have sensible defaults matching the Python V49.0 implementation.
|
||||
*/
|
||||
struct FCESConfig {
|
||||
// Learning rate (V49 optimal default)
|
||||
float lr = 1.6e-3f;
|
||||
// Learning rate (V49 optimal default)
|
||||
float lr = 1.6e-3f;
|
||||
|
||||
// Weight decay coefficient
|
||||
float weight_decay = 0.0f;
|
||||
// Weight decay coefficient
|
||||
float weight_decay = 0.0f;
|
||||
|
||||
// Population size for evolutionary search
|
||||
int population_size = 200;
|
||||
// Population size for evolutionary search
|
||||
int population_size = 200;
|
||||
|
||||
// Total training steps (for progress-aware scheduling)
|
||||
int total_steps = 5000;
|
||||
// Total training steps (for progress-aware scheduling)
|
||||
int total_steps = 5000;
|
||||
|
||||
// Signal mode for loss velocity calculation
|
||||
std::string signal_mode = "relative";
|
||||
// Signal mode for loss velocity calculation
|
||||
std::string signal_mode = "relative";
|
||||
|
||||
// Grokking awareness coefficient (0.0 = disabled)
|
||||
float grokking_coefficient = 0.1f;
|
||||
// Grokking awareness coefficient (0.0 = disabled)
|
||||
float grokking_coefficient = 0.1f;
|
||||
|
||||
// Spectral sensing frequency (every N steps)
|
||||
int spectral_frequency = 10;
|
||||
// Spectral sensing frequency (every N steps)
|
||||
int spectral_frequency = 10;
|
||||
|
||||
// Curriculum Spectral Regularization
|
||||
bool csr_enabled = false;
|
||||
int csr_warmup_steps = 500;
|
||||
int csr_ramp_steps = 1000;
|
||||
// Curriculum Spectral Regularization
|
||||
bool csr_enabled = false;
|
||||
int csr_warmup_steps = 500;
|
||||
int csr_ramp_steps = 1000;
|
||||
|
||||
// Trust region clipping
|
||||
float trust_region_clip = 0.01f;
|
||||
// Trust region clipping
|
||||
float trust_region_clip = 0.01f;
|
||||
|
||||
// Rollback threshold
|
||||
float rollback_threshold = 1.5f;
|
||||
// Rollback threshold
|
||||
float rollback_threshold = 1.5f;
|
||||
|
||||
// Adaptive weight decay
|
||||
bool adaptive_wd = false;
|
||||
// Adaptive weight decay
|
||||
bool adaptive_wd = false;
|
||||
|
||||
// Parasitic mode (gradient alignment reward)
|
||||
bool parasitic_mode = false;
|
||||
// Parasitic mode (gradient alignment reward)
|
||||
bool parasitic_mode = false;
|
||||
|
||||
// Ablation mode: "", "force_sign", "force_grad"
|
||||
std::string ablation_mode = "";
|
||||
// Ablation mode: "", "force_sign", "force_grad"
|
||||
std::string ablation_mode = "";
|
||||
|
||||
// Fractional factorial scoring (CRO trick)
|
||||
bool use_fractional_scoring = false;
|
||||
// Fractional factorial scoring (CRO trick)
|
||||
bool use_fractional_scoring = false;
|
||||
|
||||
// Direct construction mode (pop_size=1)
|
||||
bool direct_construction = false;
|
||||
// Direct construction mode (pop_size=1)
|
||||
bool direct_construction = false;
|
||||
|
||||
// Banach-Tarski fission
|
||||
bool use_banach_fission = false;
|
||||
// Banach-Tarski fission
|
||||
bool use_banach_fission = false;
|
||||
|
||||
// Auto-population (stabilize on divergence)
|
||||
bool auto_population = false;
|
||||
// Auto-population (stabilize on divergence)
|
||||
bool auto_population = false;
|
||||
|
||||
// Builder pattern
|
||||
FCESConfig& set_lr(float v) { lr = v; return *this; }
|
||||
FCESConfig& set_population_size(int v) { population_size = v; return *this; }
|
||||
FCESConfig& set_total_steps(int v) { total_steps = v; return *this; }
|
||||
FCESConfig& set_grokking_coefficient(float v) { grokking_coefficient = v; return *this; }
|
||||
FCESConfig& set_direct_construction(bool v) { direct_construction = v; return *this; }
|
||||
// Builder pattern
|
||||
FCESConfig &set_lr(float v) {
|
||||
lr = v;
|
||||
return *this;
|
||||
}
|
||||
FCESConfig &set_population_size(int v) {
|
||||
population_size = v;
|
||||
return *this;
|
||||
}
|
||||
FCESConfig &set_total_steps(int v) {
|
||||
total_steps = v;
|
||||
return *this;
|
||||
}
|
||||
FCESConfig &set_grokking_coefficient(float v) {
|
||||
grokking_coefficient = v;
|
||||
return *this;
|
||||
}
|
||||
FCESConfig &set_direct_construction(bool v) {
|
||||
direct_construction = v;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace fces
|
||||
} // namespace fces
|
||||
|
||||
@@ -27,28 +27,29 @@ constexpr int GENOME_INPUT_DIM = 14;
|
||||
constexpr int GENOME_HIDDEN_DIM = 8;
|
||||
// Controller output dimension: [multiplier, sign_gate, wd_mult]
|
||||
constexpr int GENOME_OUTPUT_DIM = 3;
|
||||
// Total genome size: input->hidden weights + hidden biases + hidden->output weights + output biases
|
||||
// Total genome size: input->hidden weights + hidden biases + hidden->output
|
||||
// weights + output biases
|
||||
constexpr int GENOME_SIZE =
|
||||
(GENOME_INPUT_DIM * GENOME_HIDDEN_DIM) + // input -> hidden weights
|
||||
GENOME_HIDDEN_DIM + // hidden biases
|
||||
(GENOME_HIDDEN_DIM * GENOME_OUTPUT_DIM) + // hidden -> output weights
|
||||
GENOME_OUTPUT_DIM; // output biases
|
||||
(GENOME_INPUT_DIM * GENOME_HIDDEN_DIM) + // input -> hidden weights
|
||||
GENOME_HIDDEN_DIM + // hidden biases
|
||||
(GENOME_HIDDEN_DIM * GENOME_OUTPUT_DIM) + // hidden -> output weights
|
||||
GENOME_OUTPUT_DIM; // output biases
|
||||
|
||||
/**
|
||||
* Genome — the "DNA" of a fuzzy controller.
|
||||
* A flat array of floats encoding a micro-MLP.
|
||||
*/
|
||||
struct Genome {
|
||||
std::array<float, GENOME_SIZE> weights{};
|
||||
std::array<float, GENOME_SIZE> gene_success{};
|
||||
float sigma_gene = 0.1f;
|
||||
float plasticity = 1.0f;
|
||||
std::array<float, GENOME_SIZE> weights{};
|
||||
std::array<float, GENOME_SIZE> gene_success{};
|
||||
float sigma_gene = 0.1f;
|
||||
float plasticity = 1.0f;
|
||||
|
||||
/// Initialize with random weights from a normal distribution
|
||||
void randomize(std::mt19937& rng);
|
||||
/// Initialize with random weights from a normal distribution
|
||||
void randomize(std::mt19937 &rng);
|
||||
|
||||
/// Deep copy
|
||||
Genome clone() const;
|
||||
/// Deep copy
|
||||
Genome clone() const;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -62,84 +63,81 @@ struct Genome {
|
||||
*/
|
||||
class FuzzyController {
|
||||
public:
|
||||
/// Unique identifier
|
||||
uint64_t id;
|
||||
/// Unique identifier
|
||||
uint64_t id;
|
||||
|
||||
/// The neural genome
|
||||
Genome genome;
|
||||
/// The neural genome
|
||||
Genome genome;
|
||||
|
||||
/// Fitness scores
|
||||
float fitness = 0.0f;
|
||||
float lifetime_fitness = 0.0f;
|
||||
float ema_fitness = 0.0f;
|
||||
int evaluation_count = 0;
|
||||
int age = 0;
|
||||
/// Fitness scores
|
||||
float fitness = 0.0f;
|
||||
float lifetime_fitness = 0.0f;
|
||||
float ema_fitness = 0.0f;
|
||||
int evaluation_count = 0;
|
||||
int age = 0;
|
||||
|
||||
/// Origin tracking
|
||||
std::string origin = "random";
|
||||
/// Origin tracking
|
||||
std::string origin = "random";
|
||||
|
||||
/// Trust region violation counter
|
||||
int trust_violations = 0;
|
||||
/// Trust region violation counter
|
||||
int trust_violations = 0;
|
||||
|
||||
/// Rolling fitness history (for Phase 23 strategies)
|
||||
std::vector<float> fitness_history;
|
||||
/// Rolling fitness history (for Phase 23 strategies)
|
||||
std::vector<float> fitness_history;
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Construction
|
||||
// ---------------------------------------------------------------
|
||||
// ---------------------------------------------------------------
|
||||
// Construction
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
FuzzyController();
|
||||
explicit FuzzyController(Genome genome);
|
||||
FuzzyController();
|
||||
explicit FuzzyController(Genome genome);
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Core Operations
|
||||
// ---------------------------------------------------------------
|
||||
// ---------------------------------------------------------------
|
||||
// Core Operations
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Forward pass through the micro-MLP to produce update decisions.
|
||||
*
|
||||
* @param layer_stats Vector of per-layer feature maps
|
||||
* @param loss_trend Current loss velocity
|
||||
* @param step_pct Training progress [0, 1]
|
||||
* @param rollback_rate Rolling average rollback frequency
|
||||
* @param grad_stability Gradient coefficient of variation
|
||||
* @param spectral_alpha Log spectral rank
|
||||
* @param stagnation_intensity Stagnation counter / 500
|
||||
* @param kzm_damping Kibble-Zurek damping factor
|
||||
* @param projected_drift Projected loss drift
|
||||
* @return Tensor of shape [num_groups, 3] — (mult, sign_gate, wd_mult)
|
||||
*/
|
||||
torch::Tensor decide_update(
|
||||
const std::vector<std::vector<float>>& layer_stats,
|
||||
float loss_trend,
|
||||
float step_pct,
|
||||
float rollback_rate,
|
||||
float grad_stability,
|
||||
float spectral_alpha,
|
||||
float stagnation_intensity,
|
||||
float kzm_damping,
|
||||
float projected_drift
|
||||
);
|
||||
/**
|
||||
* Forward pass through the micro-MLP to produce update decisions.
|
||||
*
|
||||
* @param layer_stats Vector of per-layer feature maps
|
||||
* @param loss_trend Current loss velocity
|
||||
* @param step_pct Training progress [0, 1]
|
||||
* @param rollback_rate Rolling average rollback frequency
|
||||
* @param grad_stability Gradient coefficient of variation
|
||||
* @param spectral_alpha Log spectral rank
|
||||
* @param stagnation_intensity Stagnation counter / 500
|
||||
* @param kzm_damping Kibble-Zurek damping factor
|
||||
* @param projected_drift Projected loss drift
|
||||
* @return Tensor of shape [num_groups, 3] — (mult, sign_gate, wd_mult)
|
||||
*/
|
||||
torch::Tensor
|
||||
decide_update(const std::vector<std::vector<float>> &layer_stats,
|
||||
float loss_trend, float step_pct, float rollback_rate,
|
||||
float grad_stability, float spectral_alpha,
|
||||
float stagnation_intensity, float kzm_damping,
|
||||
float projected_drift);
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Evolutionary Operators
|
||||
// ---------------------------------------------------------------
|
||||
// ---------------------------------------------------------------
|
||||
// Evolutionary Operators
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
/// Create a mutated child
|
||||
FuzzyController mutate(float current_loss, float sigma_scale = 1.0f) const;
|
||||
/// Create a mutated child
|
||||
FuzzyController mutate(float current_loss, float sigma_scale = 1.0f) const;
|
||||
|
||||
/// Crossover with another controller
|
||||
FuzzyController crossover(const FuzzyController& partner, bool use_alignment = true) const;
|
||||
/// Crossover with another controller
|
||||
FuzzyController crossover(const FuzzyController &partner,
|
||||
bool use_alignment = true) const;
|
||||
|
||||
/// Create an orthogonal counter-strategy (Phoenix Rebirth)
|
||||
FuzzyController create_orthogonal_child(float intensity = 1.0f) const;
|
||||
/// Create an orthogonal counter-strategy (Phoenix Rebirth)
|
||||
FuzzyController create_orthogonal_child(float intensity = 1.0f) const;
|
||||
|
||||
/// Banach-Tarski fission: split into two complementary children
|
||||
std::pair<FuzzyController, FuzzyController> banach_tarski_fission(float intensity = 1.0f) const;
|
||||
/// Banach-Tarski fission: split into two complementary children
|
||||
std::pair<FuzzyController, FuzzyController>
|
||||
banach_tarski_fission(float intensity = 1.0f) const;
|
||||
|
||||
private:
|
||||
static std::atomic<uint64_t> next_id_;
|
||||
static thread_local std::mt19937 rng_;
|
||||
static std::atomic<uint64_t> next_id_;
|
||||
static thread_local std::mt19937 rng_;
|
||||
};
|
||||
|
||||
} // namespace fces
|
||||
} // namespace fces
|
||||
|
||||
@@ -21,34 +21,27 @@ namespace fces {
|
||||
*/
|
||||
class EvolutionManager {
|
||||
public:
|
||||
explicit EvolutionManager(
|
||||
Population& population,
|
||||
int selection_interval = 50,
|
||||
bool auto_population = false,
|
||||
bool direct_construction = false
|
||||
);
|
||||
explicit EvolutionManager(Population &population, int selection_interval = 50,
|
||||
bool auto_population = false,
|
||||
bool direct_construction = false);
|
||||
|
||||
/// Get the currently active controller
|
||||
FuzzyController& get_active_controller();
|
||||
/// Get the currently active controller
|
||||
FuzzyController &get_active_controller();
|
||||
|
||||
/// Update population dynamics based on current training state
|
||||
void update_population_dynamics(
|
||||
float loss_velocity,
|
||||
float ema_loss,
|
||||
int step_counter,
|
||||
int total_steps
|
||||
);
|
||||
/// Update population dynamics based on current training state
|
||||
void update_population_dynamics(float loss_velocity, float ema_loss,
|
||||
int step_counter, int total_steps);
|
||||
|
||||
/// Steps the active controller has been in control
|
||||
int steps_active = 0;
|
||||
/// Steps the active controller has been in control
|
||||
int steps_active = 0;
|
||||
|
||||
/// Selection interval (how long a controller stays active)
|
||||
int selection_interval;
|
||||
/// Selection interval (how long a controller stays active)
|
||||
int selection_interval;
|
||||
|
||||
private:
|
||||
Population& population_;
|
||||
bool auto_population_;
|
||||
bool direct_construction_;
|
||||
Population &population_;
|
||||
bool auto_population_;
|
||||
bool direct_construction_;
|
||||
};
|
||||
|
||||
} // namespace fces
|
||||
} // namespace fces
|
||||
|
||||
@@ -2,14 +2,15 @@
|
||||
|
||||
/**
|
||||
* @file fitness.hpp
|
||||
* @brief Fitness evaluation — loss signal processing and multi-objective evaluation.
|
||||
* @brief Fitness evaluation — loss signal processing and multi-objective
|
||||
* evaluation.
|
||||
*
|
||||
* Port of: packages/fces/core/fitness_engine.py + fitness.py
|
||||
*/
|
||||
|
||||
#include <cmath>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace fces {
|
||||
|
||||
@@ -19,18 +20,18 @@ namespace fces {
|
||||
*/
|
||||
class RunningStats {
|
||||
public:
|
||||
void update(float value);
|
||||
float z_score(float value) const;
|
||||
float get_mean() const { return mean_; }
|
||||
float get_std() const;
|
||||
int get_count() const { return count_; }
|
||||
void update(float value);
|
||||
float z_score(float value) const;
|
||||
float get_mean() const { return mean_; }
|
||||
float get_std() const;
|
||||
int get_count() const { return count_; }
|
||||
|
||||
void reset();
|
||||
void reset();
|
||||
|
||||
private:
|
||||
int count_ = 0;
|
||||
float mean_ = 0.0f;
|
||||
float m2_ = 0.0f;
|
||||
int count_ = 0;
|
||||
float mean_ = 0.0f;
|
||||
float m2_ = 0.0f;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -38,26 +39,27 @@ private:
|
||||
*/
|
||||
class FitnessEngine {
|
||||
public:
|
||||
explicit FitnessEngine(float grokking_coefficient = 0.1f);
|
||||
explicit FitnessEngine(float grokking_coefficient = 0.1f);
|
||||
|
||||
/**
|
||||
* Calculate loss velocity signal.
|
||||
*
|
||||
* @param current_loss Current step loss
|
||||
* @param ema_loss Exponential moving average loss
|
||||
* @param mode "relative" or "absolute"
|
||||
* @return Velocity signal (negative = improving)
|
||||
*/
|
||||
float calculate_loss_signal(float current_loss, float ema_loss, const std::string& mode = "relative") const;
|
||||
/**
|
||||
* Calculate loss velocity signal.
|
||||
*
|
||||
* @param current_loss Current step loss
|
||||
* @param ema_loss Exponential moving average loss
|
||||
* @param mode "relative" or "absolute"
|
||||
* @return Velocity signal (negative = improving)
|
||||
*/
|
||||
float calculate_loss_signal(float current_loss, float ema_loss,
|
||||
const std::string &mode = "relative") const;
|
||||
|
||||
/**
|
||||
* Compute Kibble-Zurek Mechanism damping factor.
|
||||
* Prevents topological defects during phase transitions.
|
||||
*/
|
||||
float compute_kzm_damping(float spectral_alpha) const;
|
||||
/**
|
||||
* Compute Kibble-Zurek Mechanism damping factor.
|
||||
* Prevents topological defects during phase transitions.
|
||||
*/
|
||||
float compute_kzm_damping(float spectral_alpha) const;
|
||||
|
||||
private:
|
||||
float grokking_coefficient_;
|
||||
float grokking_coefficient_;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -65,75 +67,76 @@ private:
|
||||
*/
|
||||
class FuzzySet {
|
||||
public:
|
||||
FuzzySet(std::string name, float a, float b, float c, float d) noexcept
|
||||
: name_(std::move(name)), a_(a), b_(b), c_(c), d_(d) {}
|
||||
FuzzySet(std::string name, float a, float b, float c, float d) noexcept
|
||||
: name_(std::move(name)), a_(a), b_(b), c_(c), d_(d) {}
|
||||
|
||||
float membership(float x) const noexcept {
|
||||
if (!std::isfinite(x)) {
|
||||
return 0.0f;
|
||||
}
|
||||
if (x <= a_ || x >= d_) {
|
||||
return 0.0f;
|
||||
}
|
||||
if (x >= b_ && x <= c_) {
|
||||
return 1.0f;
|
||||
}
|
||||
if (x > a_ && x < b_) {
|
||||
float range = b_ - a_;
|
||||
return (x - a_) / (range > 0.0f ? range : 1e-9f);
|
||||
}
|
||||
if (x > c_ && x < d_) {
|
||||
float range = d_ - c_;
|
||||
return (d_ - x) / (range > 0.0f ? range : 1e-9f);
|
||||
}
|
||||
return 0.0f;
|
||||
float membership(float x) const noexcept {
|
||||
if (!std::isfinite(x)) {
|
||||
return 0.0f;
|
||||
}
|
||||
if (x <= a_ || x >= d_) {
|
||||
return 0.0f;
|
||||
}
|
||||
if (x >= b_ && x <= c_) {
|
||||
return 1.0f;
|
||||
}
|
||||
if (x > a_ && x < b_) {
|
||||
float range = b_ - a_;
|
||||
return (x - a_) / (range > 0.0f ? range : 1e-9f);
|
||||
}
|
||||
if (x > c_ && x < d_) {
|
||||
float range = d_ - c_;
|
||||
return (d_ - x) / (range > 0.0f ? range : 1e-9f);
|
||||
}
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
const std::string& name() const noexcept { return name_; }
|
||||
const std::string &name() const noexcept { return name_; }
|
||||
|
||||
private:
|
||||
std::string name_;
|
||||
float a_;
|
||||
float b_;
|
||||
float c_;
|
||||
float d_;
|
||||
std::string name_;
|
||||
float a_;
|
||||
float b_;
|
||||
float c_;
|
||||
float d_;
|
||||
};
|
||||
|
||||
/**
|
||||
* Fitness metrics for multi-objective evaluation.
|
||||
*/
|
||||
struct FitnessMetrics {
|
||||
float training_advantage = 0.0f;
|
||||
float validation_advantage = 0.0f;
|
||||
float grad_cv = 0.0f;
|
||||
float sparsity_delta = 0.0f;
|
||||
float consistency_gap = 0.0f;
|
||||
float stable_rank = 0.0f;
|
||||
float training_advantage = 0.0f;
|
||||
float validation_advantage = 0.0f;
|
||||
float grad_cv = 0.0f;
|
||||
float sparsity_delta = 0.0f;
|
||||
float consistency_gap = 0.0f;
|
||||
float stable_rank = 0.0f;
|
||||
};
|
||||
|
||||
/**
|
||||
* FuzzyFitnessEvaluator — multi-objective fitness evaluation with fuzzy weighting.
|
||||
* FuzzyFitnessEvaluator — multi-objective fitness evaluation with fuzzy
|
||||
* weighting.
|
||||
*/
|
||||
class FuzzyFitnessEvaluator {
|
||||
public:
|
||||
FuzzyFitnessEvaluator() noexcept;
|
||||
FuzzyFitnessEvaluator() noexcept;
|
||||
|
||||
float evaluate(const FitnessMetrics& metrics) const noexcept;
|
||||
float evaluate(const FitnessMetrics &metrics) const noexcept;
|
||||
|
||||
private:
|
||||
FuzzySet stability_set_;
|
||||
FuzzySet train_set_;
|
||||
FuzzySet val_set_;
|
||||
FuzzySet sparsity_set_;
|
||||
FuzzySet consistency_set_;
|
||||
FuzzySet rank_set_;
|
||||
FuzzySet stability_set_;
|
||||
FuzzySet train_set_;
|
||||
FuzzySet val_set_;
|
||||
FuzzySet sparsity_set_;
|
||||
FuzzySet consistency_set_;
|
||||
FuzzySet rank_set_;
|
||||
|
||||
float w_stability_ = 0.2f;
|
||||
float w_train_ = 0.2f;
|
||||
float w_val_ = 0.3f;
|
||||
float w_sparsity_ = 0.1f;
|
||||
float w_consistency_ = 0.2f;
|
||||
float w_rank_ = 0.1f;
|
||||
float w_stability_ = 0.2f;
|
||||
float w_train_ = 0.2f;
|
||||
float w_val_ = 0.3f;
|
||||
float w_sparsity_ = 0.1f;
|
||||
float w_consistency_ = 0.2f;
|
||||
float w_rank_ = 0.1f;
|
||||
};
|
||||
|
||||
} // namespace fces
|
||||
} // namespace fces
|
||||
|
||||
@@ -5,17 +5,17 @@
|
||||
* @brief FCESOptimizer — the main entry point. libtorch-compatible optimizer.
|
||||
*/
|
||||
|
||||
#include <torch/torch.h>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <torch/torch.h>
|
||||
#include <vector>
|
||||
|
||||
#include "config.hpp"
|
||||
#include "population.hpp"
|
||||
#include "fitness.hpp"
|
||||
#include "evolution.hpp"
|
||||
#include "spectral.hpp"
|
||||
#include "fitness.hpp"
|
||||
#include "oscillation.hpp"
|
||||
#include "population.hpp"
|
||||
#include "spectral.hpp"
|
||||
#include "telemetry.hpp"
|
||||
|
||||
namespace fces {
|
||||
@@ -24,7 +24,8 @@ namespace fces {
|
||||
* FCESOptimizer — Fuzzy Controlled Evolutionary Search V49.0 (C++ Port).
|
||||
*
|
||||
* Usage:
|
||||
* auto optimizer = FCESOptimizer(model->parameters(), FCESConfig{}.set_lr(1.6e-3));
|
||||
* auto optimizer = FCESOptimizer(model->parameters(),
|
||||
* FCESConfig{}.set_lr(1.6e-3));
|
||||
* // In training loop:
|
||||
* optimizer.zero_grad();
|
||||
* auto loss = model->forward(input);
|
||||
@@ -32,73 +33,72 @@ namespace fces {
|
||||
* optimizer.step();
|
||||
* optimizer.update_fitness(loss.item<float>());
|
||||
*/
|
||||
struct FCESOptimizerOptions : public torch::optim::OptimizerCloneableOptions<FCESOptimizerOptions> {
|
||||
explicit FCESOptimizerOptions(double lr = 0.01) : lr_(lr) {}
|
||||
struct FCESOptimizerOptions
|
||||
: public torch::optim::OptimizerCloneableOptions<FCESOptimizerOptions> {
|
||||
explicit FCESOptimizerOptions(double lr = 0.01) : lr_(lr) {}
|
||||
|
||||
double get_lr() const override { return lr_; }
|
||||
void set_lr(const double lr) override { lr_ = lr; }
|
||||
double get_lr() const override { return lr_; }
|
||||
void set_lr(const double lr) override { lr_ = lr; }
|
||||
|
||||
double lr_;
|
||||
double lr_;
|
||||
};
|
||||
|
||||
class FCESOptimizer : public torch::optim::Optimizer {
|
||||
public:
|
||||
explicit FCESOptimizer(
|
||||
std::vector<torch::Tensor> params,
|
||||
FCESConfig config = FCESConfig{}
|
||||
);
|
||||
explicit FCESOptimizer(std::vector<torch::Tensor> params,
|
||||
FCESConfig config = FCESConfig{});
|
||||
|
||||
/// Perform a single optimization step
|
||||
torch::Tensor step(LossClosure closure = nullptr) override;
|
||||
/// Perform a single optimization step
|
||||
torch::Tensor step(LossClosure closure = nullptr) override;
|
||||
|
||||
/// Update evolutionary fitness with current loss
|
||||
void update_fitness(float loss);
|
||||
/// Update evolutionary fitness with current loss
|
||||
void update_fitness(float loss);
|
||||
|
||||
/// Backup model weights to CPU RAM
|
||||
void backup_to_ram();
|
||||
/// Backup model weights to CPU RAM
|
||||
void backup_to_ram();
|
||||
|
||||
/// Restore model weights from CPU RAM backup
|
||||
void restore_from_ram();
|
||||
/// Restore model weights from CPU RAM backup
|
||||
void restore_from_ram();
|
||||
|
||||
/// Get current step count
|
||||
int step_count() const { return step_counter_; }
|
||||
/// Get current step count
|
||||
int step_count() const { return step_counter_; }
|
||||
|
||||
/// Calculate model sparsity
|
||||
float calculate_sparsity() const;
|
||||
/// Calculate model sparsity
|
||||
float calculate_sparsity() const;
|
||||
|
||||
private:
|
||||
FCESConfig config_;
|
||||
Population population_;
|
||||
FitnessEngine fitness_engine_;
|
||||
FuzzyFitnessEvaluator fitness_evaluator_;
|
||||
std::unique_ptr<EvolutionManager> evolution_manager_;
|
||||
OscillationDetector oscillation_detector_;
|
||||
RunningStats grad_norm_tracker_;
|
||||
FCESConfig config_;
|
||||
Population population_;
|
||||
FitnessEngine fitness_engine_;
|
||||
FuzzyFitnessEvaluator fitness_evaluator_;
|
||||
std::unique_ptr<EvolutionManager> evolution_manager_;
|
||||
OscillationDetector oscillation_detector_;
|
||||
RunningStats grad_norm_tracker_;
|
||||
|
||||
// State
|
||||
int step_counter_ = 0;
|
||||
float ema_loss_ = 0.0f;
|
||||
float last_step_loss_ = 0.0f;
|
||||
float best_loss_window_ = std::numeric_limits<float>::infinity();
|
||||
float rollback_ema_ = 0.0f;
|
||||
int stagnation_counter_ = 0;
|
||||
float last_loss_velocity_ = 0.0f;
|
||||
float last_sparsity_ = 0.0f;
|
||||
// State
|
||||
int step_counter_ = 0;
|
||||
float ema_loss_ = 0.0f;
|
||||
float last_step_loss_ = 0.0f;
|
||||
float best_loss_window_ = std::numeric_limits<float>::infinity();
|
||||
float rollback_ema_ = 0.0f;
|
||||
int stagnation_counter_ = 0;
|
||||
float last_loss_velocity_ = 0.0f;
|
||||
float last_sparsity_ = 0.0f;
|
||||
|
||||
// RAM backup
|
||||
std::vector<torch::Tensor> ram_backup_;
|
||||
// RAM backup
|
||||
std::vector<torch::Tensor> ram_backup_;
|
||||
|
||||
// Layer stats and group mappings
|
||||
std::vector<std::vector<float>> layer_stats_;
|
||||
std::vector<int> param_group_mapping_;
|
||||
std::unique_ptr<SpectralSensor> spectral_sensor_;
|
||||
SpectralController spectral_controller_;
|
||||
float last_spectral_rank_ = 0.0f;
|
||||
// Layer stats and group mappings
|
||||
std::vector<std::vector<float>> layer_stats_;
|
||||
std::vector<int> param_group_mapping_;
|
||||
std::unique_ptr<SpectralSensor> spectral_sensor_;
|
||||
SpectralController spectral_controller_;
|
||||
float last_spectral_rank_ = 0.0f;
|
||||
|
||||
// Internal methods
|
||||
void gather_stats();
|
||||
void apply_parameter_updates(const torch::Tensor& actions);
|
||||
void handle_rollback();
|
||||
// Internal methods
|
||||
void gather_stats();
|
||||
void apply_parameter_updates(const torch::Tensor &actions);
|
||||
void handle_rollback();
|
||||
};
|
||||
|
||||
} // namespace fces
|
||||
} // namespace fces
|
||||
|
||||
@@ -11,21 +11,22 @@ namespace fces {
|
||||
|
||||
class OscillationDetector {
|
||||
public:
|
||||
static constexpr int WINDOW_SIZE = 64;
|
||||
static constexpr float POWER_THRESHOLD = 0.5f;
|
||||
static constexpr int MIN_PERIOD = 4;
|
||||
static constexpr int MAX_PERIOD = 16;
|
||||
static constexpr int WINDOW_SIZE = 64;
|
||||
static constexpr float POWER_THRESHOLD = 0.5f;
|
||||
static constexpr int MIN_PERIOD = 4;
|
||||
static constexpr int MAX_PERIOD = 16;
|
||||
|
||||
void update(float loss);
|
||||
bool detect() const;
|
||||
float get_score() const;
|
||||
float get_variance_50() const;
|
||||
void reset();
|
||||
void update(float loss);
|
||||
bool detect() const;
|
||||
float get_score() const;
|
||||
float get_variance_50() const;
|
||||
void reset();
|
||||
|
||||
private:
|
||||
std::vector<float> loss_history_;
|
||||
static std::vector<float> detrend(const std::vector<float>& signal);
|
||||
static std::vector<float> compute_power_spectrum(const std::vector<float>& signal);
|
||||
std::vector<float> loss_history_;
|
||||
static std::vector<float> detrend(const std::vector<float> &signal);
|
||||
static std::vector<float>
|
||||
compute_power_spectrum(const std::vector<float> &signal);
|
||||
};
|
||||
|
||||
} // namespace fces
|
||||
} // namespace fces
|
||||
|
||||
@@ -16,9 +16,9 @@
|
||||
* Port of: packages/fces/core/population.py (~1260 LOC)
|
||||
*/
|
||||
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <optional>
|
||||
|
||||
#include "controller.hpp"
|
||||
|
||||
@@ -28,11 +28,11 @@ namespace fces {
|
||||
* Elite selection strategy for stale elite mitigation (Phase 23).
|
||||
*/
|
||||
enum class EliteStrategy {
|
||||
Cumulative, // Raw cumulative fitness
|
||||
EMA, // Exponential moving average
|
||||
Rolling, // Rolling window average
|
||||
Reset, // Periodic reset every 500 steps
|
||||
AgePenalty // fitness / log(age + 2)
|
||||
Cumulative, // Raw cumulative fitness
|
||||
EMA, // Exponential moving average
|
||||
Rolling, // Rolling window average
|
||||
Reset, // Periodic reset every 500 steps
|
||||
AgePenalty // fitness / log(age + 2)
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -40,126 +40,124 @@ enum class EliteStrategy {
|
||||
*/
|
||||
class Population {
|
||||
public:
|
||||
// Configuration constants
|
||||
static constexpr int ELITE_COUNT = 2;
|
||||
static constexpr float NOVELTY_WEIGHT = 0.1f;
|
||||
static constexpr float ISLAND_MIGRATION_RATE = 0.05f;
|
||||
static constexpr int BEHAVIORAL_ARCHIVE_SIZE = 100;
|
||||
// Configuration constants
|
||||
static constexpr int ELITE_COUNT = 2;
|
||||
static constexpr float NOVELTY_WEIGHT = 0.1f;
|
||||
static constexpr float ISLAND_MIGRATION_RATE = 0.05f;
|
||||
static constexpr int BEHAVIORAL_ARCHIVE_SIZE = 100;
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Construction
|
||||
// ---------------------------------------------------------------
|
||||
// ---------------------------------------------------------------
|
||||
// Construction
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
explicit Population(
|
||||
int active_size = 75,
|
||||
int repo_size = 10000,
|
||||
EliteStrategy elite_strategy = EliteStrategy::Cumulative,
|
||||
bool link_mutation = false,
|
||||
bool link_elite = false,
|
||||
bool link_violator = false,
|
||||
bool use_fuzzy_pacer = false,
|
||||
bool use_fuzzy_importance = false,
|
||||
bool direct_construction = false,
|
||||
bool use_banach_fission = false
|
||||
);
|
||||
explicit Population(int active_size = 75, int repo_size = 10000,
|
||||
EliteStrategy elite_strategy = EliteStrategy::Cumulative,
|
||||
bool link_mutation = false, bool link_elite = false,
|
||||
bool link_violator = false, bool use_fuzzy_pacer = false,
|
||||
bool use_fuzzy_importance = false,
|
||||
bool direct_construction = false,
|
||||
bool use_banach_fission = false);
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Core API
|
||||
// ---------------------------------------------------------------
|
||||
// ---------------------------------------------------------------
|
||||
// Core API
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
/// Get the currently active controller (sticky selection)
|
||||
FuzzyController& get_active_controller();
|
||||
/// Get the currently active controller (sticky selection)
|
||||
FuzzyController &get_active_controller();
|
||||
|
||||
/// Select a controller via fitness-weighted tournament
|
||||
FuzzyController& select_weighted();
|
||||
/// Select a controller via fitness-weighted tournament
|
||||
FuzzyController &select_weighted();
|
||||
|
||||
/// Get the best controller in the active population
|
||||
FuzzyController& get_best_active();
|
||||
/// Get the best controller in the active population
|
||||
FuzzyController &get_best_active();
|
||||
|
||||
/// Get the worst non-elite controller
|
||||
FuzzyController& get_worst_active();
|
||||
/// Get the worst non-elite controller
|
||||
FuzzyController &get_worst_active();
|
||||
|
||||
/// Remove a specific controller (unless elite)
|
||||
void kill(FuzzyController& controller);
|
||||
/// Remove a specific controller (unless elite)
|
||||
void kill(FuzzyController &controller);
|
||||
|
||||
/// Update a controller's fitness
|
||||
void update_controller_fitness(FuzzyController& controller, float reward, bool increment_eval = true);
|
||||
/// Update a controller's fitness
|
||||
void update_controller_fitness(FuzzyController &controller, float reward,
|
||||
bool increment_eval = true);
|
||||
|
||||
/// Mark a controller as a violator (rollback)
|
||||
void mark_violated(FuzzyController& controller);
|
||||
/// Mark a controller as a violator (rollback)
|
||||
void mark_violated(FuzzyController &controller);
|
||||
|
||||
/// Get the effective fitness considering elite strategy and training progress
|
||||
float get_effective_fitness(const FuzzyController& controller, float training_progress) const;
|
||||
/// Get the effective fitness considering elite strategy and training progress
|
||||
float get_effective_fitness(const FuzzyController &controller,
|
||||
float training_progress) const;
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Evolution
|
||||
// ---------------------------------------------------------------
|
||||
// ---------------------------------------------------------------
|
||||
// Evolution
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Evolve the population: select parents, crossover/mutate, replace worst.
|
||||
*
|
||||
* @param current_loss Current training loss
|
||||
* @param velocity Loss velocity
|
||||
* @param training_progress Training progress [0, 1]
|
||||
*/
|
||||
void evolve(float current_loss, float velocity = 0.0f, float training_progress = 0.0f);
|
||||
/**
|
||||
* Evolve the population: select parents, crossover/mutate, replace worst.
|
||||
*
|
||||
* @param current_loss Current training loss
|
||||
* @param velocity Loss velocity
|
||||
* @param training_progress Training progress [0, 1]
|
||||
*/
|
||||
void evolve(float current_loss, float velocity = 0.0f,
|
||||
float training_progress = 0.0f);
|
||||
|
||||
/// Resize the population (dynamic expansion/contraction)
|
||||
void resize(int target_size, float training_progress = 0.5f);
|
||||
/// Resize the population (dynamic expansion/contraction)
|
||||
void resize(int target_size, float training_progress = 0.5f);
|
||||
|
||||
/// Reduce mutation variance after rollback
|
||||
void calm_down();
|
||||
/// Reduce mutation variance after rollback
|
||||
void calm_down();
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Accessors
|
||||
// ---------------------------------------------------------------
|
||||
// ---------------------------------------------------------------
|
||||
// Accessors
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
int size() const { return static_cast<int>(gladiators_.size()); }
|
||||
float global_sigma_modifier() const { return global_sigma_modifier_; }
|
||||
int size() const { return static_cast<int>(gladiators_.size()); }
|
||||
float global_sigma_modifier() const { return global_sigma_modifier_; }
|
||||
|
||||
/// Compute diversity index (behavioral spread)
|
||||
float get_diversity_index() const;
|
||||
/// Compute diversity index (behavioral spread)
|
||||
float get_diversity_index() const;
|
||||
|
||||
/// Serialization
|
||||
// TODO: state_dict / load_state_dict
|
||||
/// Serialization
|
||||
// TODO: state_dict / load_state_dict
|
||||
|
||||
private:
|
||||
std::vector<FuzzyController> gladiators_;
|
||||
std::vector<FuzzyController> repository_;
|
||||
std::vector<FuzzyController> violated_controllers_;
|
||||
std::vector<FuzzyController> gladiators_;
|
||||
std::vector<FuzzyController> repository_;
|
||||
std::vector<FuzzyController> violated_controllers_;
|
||||
|
||||
float global_sigma_modifier_ = 1.0f;
|
||||
float global_sigma_modifier_ = 1.0f;
|
||||
|
||||
// Sticky controller selection
|
||||
FuzzyController* active_controller_ = nullptr;
|
||||
int steps_active_ = 0;
|
||||
int selection_interval_ = 20;
|
||||
// Sticky controller selection
|
||||
FuzzyController *active_controller_ = nullptr;
|
||||
int steps_active_ = 0;
|
||||
int selection_interval_ = 20;
|
||||
|
||||
// Configuration
|
||||
EliteStrategy elite_strategy_;
|
||||
bool link_mutation_;
|
||||
bool link_elite_;
|
||||
bool link_violator_;
|
||||
bool use_fuzzy_pacer_;
|
||||
bool use_fuzzy_importance_;
|
||||
bool direct_construction_;
|
||||
bool use_banach_fission_;
|
||||
// Configuration
|
||||
EliteStrategy elite_strategy_;
|
||||
bool link_mutation_;
|
||||
bool link_elite_;
|
||||
bool link_violator_;
|
||||
bool use_fuzzy_pacer_;
|
||||
bool use_fuzzy_importance_;
|
||||
bool direct_construction_;
|
||||
bool use_banach_fission_;
|
||||
|
||||
// Novelty search
|
||||
std::vector<std::vector<float>> behavioral_archive_;
|
||||
// Novelty search
|
||||
std::vector<std::vector<float>> behavioral_archive_;
|
||||
|
||||
// Fitness history for fuzzy pacer
|
||||
std::vector<float> fitness_history_;
|
||||
// Fitness history for fuzzy pacer
|
||||
std::vector<float> fitness_history_;
|
||||
|
||||
// Phase 23: periodic reset counter
|
||||
int reset_step_counter_ = 0;
|
||||
// Phase 23: periodic reset counter
|
||||
int reset_step_counter_ = 0;
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Internal
|
||||
// ---------------------------------------------------------------
|
||||
// ---------------------------------------------------------------
|
||||
// Internal
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
std::vector<FuzzyController*> get_elites();
|
||||
void add_to_repository(const FuzzyController& controller);
|
||||
std::vector<FuzzyController *> get_elites();
|
||||
void add_to_repository(const FuzzyController &controller);
|
||||
};
|
||||
|
||||
} // namespace fces
|
||||
} // namespace fces
|
||||
|
||||
@@ -23,23 +23,23 @@ namespace fces {
|
||||
*/
|
||||
class SpectralSensor {
|
||||
public:
|
||||
SpectralSensor() = default;
|
||||
explicit SpectralSensor(torch::nn::Module& model);
|
||||
SpectralSensor() = default;
|
||||
explicit SpectralSensor(torch::nn::Module &model);
|
||||
|
||||
/// Track a layer's weight tensor
|
||||
void track_layer(const std::string& name, const torch::Tensor& weight);
|
||||
/// Track a layer's weight tensor
|
||||
void track_layer(const std::string &name, const torch::Tensor &weight);
|
||||
|
||||
/// Get the global (average) effective rank
|
||||
float get_global_rank() const;
|
||||
/// Get the global (average) effective rank
|
||||
float get_global_rank() const;
|
||||
|
||||
/// Reset all tracked layers
|
||||
void reset();
|
||||
/// Reset all tracked layers
|
||||
void reset();
|
||||
|
||||
private:
|
||||
std::unordered_map<std::string, float> layer_ranks_;
|
||||
std::unordered_map<std::string, float> layer_ranks_;
|
||||
|
||||
/// Compute effective rank via SVD
|
||||
static float compute_effective_rank(const torch::Tensor& weight);
|
||||
/// Compute effective rank via SVD
|
||||
static float compute_effective_rank(const torch::Tensor &weight);
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -47,8 +47,8 @@ private:
|
||||
*/
|
||||
class SpectralController {
|
||||
public:
|
||||
/// Compute the spectral alpha (gating factor for rank-aware updates)
|
||||
float compute_alpha(float global_rank, float grokking_coefficient) const;
|
||||
/// Compute the spectral alpha (gating factor for rank-aware updates)
|
||||
float compute_alpha(float global_rank, float grokking_coefficient) const;
|
||||
};
|
||||
|
||||
} // namespace fces
|
||||
} // namespace fces
|
||||
|
||||
@@ -11,16 +11,16 @@ namespace fces {
|
||||
|
||||
class Telemetry {
|
||||
public:
|
||||
static Telemetry& get();
|
||||
static Telemetry &get();
|
||||
|
||||
void info(const std::string& event, const std::string& detail = "");
|
||||
void warning(const std::string& event, const std::string& detail = "");
|
||||
void error(const std::string& event, const std::string& detail = "");
|
||||
void info(const std::string &event, const std::string &detail = "");
|
||||
void warning(const std::string &event, const std::string &detail = "");
|
||||
void error(const std::string &event, const std::string &detail = "");
|
||||
|
||||
void push_to_remote();
|
||||
void push_to_remote();
|
||||
|
||||
private:
|
||||
Telemetry() = default;
|
||||
Telemetry() = default;
|
||||
};
|
||||
|
||||
} // namespace fces
|
||||
} // namespace fces
|
||||
|
||||
Reference in New Issue
Block a user