diff --git a/CMakeLists.txt b/CMakeLists.txt index 424619b..3d4d36d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -129,6 +129,9 @@ if(FCES_BUILD_EXAMPLES) add_executable(pytorch_integration examples/pytorch_integration.cpp) target_link_libraries(pytorch_integration PRIVATE fces) + + add_executable(telemetry_and_inference examples/telemetry_and_inference.cpp) + target_link_libraries(telemetry_and_inference PRIVATE fces) endif() # ============================================================================ diff --git a/examples/telemetry_and_inference.cpp b/examples/telemetry_and_inference.cpp new file mode 100644 index 0000000..d7d6c0c --- /dev/null +++ b/examples/telemetry_and_inference.cpp @@ -0,0 +1,114 @@ +/** + * @file telemetry_and_inference.cpp + * @brief Example showcasing telemetry instrumentation and model inference. + */ + +#include +#include +#include +#include "fces/optimizer.hpp" +#include "fces/telemetry.hpp" + +// Define a simple neural network for nonlinear regression: y = x^2 +struct RegressionNet : torch::nn::Module { + torch::nn::Linear fc1{nullptr}, fc2{nullptr}; + + RegressionNet() { + fc1 = register_module("fc1", torch::nn::Linear(1, 16)); + fc2 = register_module("fc2", torch::nn::Linear(16, 1)); + } + + torch::Tensor forward(torch::Tensor x) { + x = torch::tanh(fc1->forward(x)); + return fc2->forward(x); + } +}; + +int main() { + fces::Telemetry::get().info("app_start", "Telemetry and Inference demo initialized."); + + // 1. Create Model and Data + auto model = std::make_shared(); + + // Generate training data: x in [-2, 2], y = x^2 + noise + auto x_train = torch::linspace(-2.0, 2.0, 100).unsqueeze(1); + auto y_train = x_train.pow(2) + 0.1 * torch::randn({100, 1}); + + // 2. Configure Optimizer + std::vector params; + for (auto& p : model->parameters()) { + params.push_back(p); + } + + fces::FCESOptimizer optimizer( + params, + fces::FCESConfig{} + .set_lr(2e-3f) + .set_population_size(150) + .set_total_steps(100) + ); + + fces::Telemetry::get().info("training_start", "Beginning neural net optimization with FCES."); + + auto start_train = std::chrono::high_resolution_clock::now(); + + // 3. Optimization Loop + for (int epoch = 0; epoch <= 100; ++epoch) { + optimizer.zero_grad(); + auto pred = model->forward(x_train); + auto loss = torch::mse_loss(pred, y_train); + loss.backward(); + optimizer.step(); + optimizer.update_fitness(loss.item()); + + if (epoch % 20 == 0) { + fces::Telemetry::get().info("epoch_checkpoint", + "Epoch " + std::to_string(epoch) + " | Loss: " + std::to_string(loss.item())); + } + } + + auto end_train = std::chrono::high_resolution_clock::now(); + double train_duration = std::chrono::duration(end_train - start_train).count(); + + fces::Telemetry::get().info("training_complete", + "Duration: " + std::to_string(train_duration) + " ms"); + + // 4. Inference Phase + fces::Telemetry::get().info("inference_phase_start", "Evaluating model on new test inputs."); + + // Generate test inputs + auto x_test = torch::tensor({-1.5f, -0.5f, 0.0f, 0.5f, 1.5f}).unsqueeze(1); + auto y_expected = x_test.pow(2); + + // Switch model to evaluation mode + model->eval(); + + // Run inference and measure latency + auto start_inf = std::chrono::high_resolution_clock::now(); + torch::Tensor y_pred; + { + torch::NoGradGuard no_grad; + y_pred = model->forward(x_test); + } + auto end_inf = std::chrono::high_resolution_clock::now(); + double inf_duration = std::chrono::duration(end_inf - start_inf).count(); + + // Log telemetry for inference performance + fces::Telemetry::get().info("inference_perf", + "Inputs: " + std::to_string(x_test.size(0)) + " | Latency: " + std::to_string(inf_duration) + " ms"); + + // Print predictions and expected values side-by-side + std::cout << "\n================ INFERENCE RESULTS ================" << std::endl; + std::cout << "Input (x) | Predicted (y_pred) | Expected (y_expected)" << std::endl; + std::cout << "----------------------------------------------------" << std::endl; + for (int i = 0; i < x_test.size(0); ++i) { + float x_val = x_test[i][0].item(); + float pred_val = y_pred[i][0].item(); + float exp_val = y_expected[i][0].item(); + std::printf(" %7.2f | %7.4f | %7.4f\n", x_val, pred_val, exp_val); + } + std::cout << "====================================================\n" << std::endl; + + fces::Telemetry::get().info("app_finish", "Exiting demo successfully."); + return 0; +} diff --git a/include/fces/fitness.hpp b/include/fces/fitness.hpp index 7a7c258..f529c6a 100644 --- a/include/fces/fitness.hpp +++ b/include/fces/fitness.hpp @@ -9,6 +9,7 @@ #include #include +#include namespace fces { diff --git a/include/fces/optimizer.hpp b/include/fces/optimizer.hpp index 6ea07d1..6d3e818 100644 --- a/include/fces/optimizer.hpp +++ b/include/fces/optimizer.hpp @@ -32,6 +32,15 @@ namespace fces { * optimizer.step(); * optimizer.update_fitness(loss.item()); */ +struct FCESOptimizerOptions : public torch::optim::OptimizerCloneableOptions { + explicit FCESOptimizerOptions(double lr = 0.01) : lr_(lr) {} + + double get_lr() const override { return lr_; } + void set_lr(const double lr) override { lr_ = lr; } + + double lr_; +}; + class FCESOptimizer : public torch::optim::Optimizer { public: explicit FCESOptimizer( diff --git a/include/fces/oscillation.hpp b/include/fces/oscillation.hpp index 061f848..49dc532 100644 --- a/include/fces/oscillation.hpp +++ b/include/fces/oscillation.hpp @@ -13,6 +13,8 @@ class OscillationDetector { public: static constexpr int WINDOW_SIZE = 64; static constexpr float POWER_THRESHOLD = 0.5f; + static constexpr int MIN_PERIOD = 4; + static constexpr int MAX_PERIOD = 16; void update(float loss); bool detect() const; diff --git a/src/controller.cpp b/src/controller.cpp index 3d4fbb4..60108b1 100644 --- a/src/controller.cpp +++ b/src/controller.cpp @@ -6,7 +6,7 @@ namespace fces { // Static members -std::atomic FuzzyController::next_id_{0}; +std::atomic FuzzyController::next_id_{1}; thread_local std::mt19937 FuzzyController::rng_{std::random_device{}()}; // --------------------------------------------------------------- @@ -30,7 +30,7 @@ Genome Genome::clone() const { // --------------------------------------------------------------- FuzzyController::FuzzyController() - : id(next_id_++), origin("genesis") { + : id(next_id_++), origin("random") { genome.randomize(rng_); // Bias output toward acceleration (V2.1 insight) // Set output biases (last GENOME_OUTPUT_DIM elements) to +2.0, -1.0, 0.0 with noise diff --git a/src/fitness.cpp b/src/fitness.cpp index 29861b8..e7635eb 100644 --- a/src/fitness.cpp +++ b/src/fitness.cpp @@ -2,6 +2,7 @@ #include #include #include +#include namespace fces { diff --git a/src/optimizer.cpp b/src/optimizer.cpp index e574194..c8643f8 100644 --- a/src/optimizer.cpp +++ b/src/optimizer.cpp @@ -1,16 +1,84 @@ #include "fces/optimizer.hpp" #include #include +#include namespace fces { +namespace { + +int classify_layer_by_shape(const torch::Tensor& p) { + auto dims = p.sizes(); + if (dims.size() == 2) { + int64_t d0 = dims[0]; + int64_t d1 = dims[1]; + if (d0 > 10000 || d1 > 10000) { + return 0; // Embedding + } else if (d0 * 3 == d1 || d0 == d1 * 3) { + return 1; // Attention QKV + } else if (d0 == d1) { + return 3; // MLP/FFN + } else { + return 2; // Attention Proj + } + } else if (dims.size() == 1) { + if (dims[0] < 128) { + return 4; // LayerNorm + } else { + return 5; // Other / bias + } + } + return 5; // Other +} + +torch::Tensor apply_trust_clipping(const torch::Tensor& p, torch::Tensor update, float trust_region_clip) { + if (torch::isnan(update).any().item() || torch::isinf(update).any().item()) { + return torch::zeros_like(update); + } + + float p_norm = p.norm().item(); + if (p_norm > 1e-6f) { + float update_mag = update.norm().item(); + if (!std::isfinite(update_mag)) { + return torch::zeros_like(update); + } + + float max_update = trust_region_clip * p_norm; + if (update_mag > max_update) { + float correction = max_update / (update_mag + 1e-8f); + update.mul_(correction); + } + } + + if (torch::isnan(update).any().item() || torch::isinf(update).any().item()) { + return torch::zeros_like(update); + } + + return update; +} + +float calculate_parasitic_reward(const torch::Tensor& p, float mult, const RunningStats& grad_norm_tracker) { + if (!p.grad().defined()) { + return 0.0f; + } + float g_norm = p.grad().abs().mean().item(); + float z_g = grad_norm_tracker.z_score(g_norm); + return z_g * (mult - 1.0f); +} + +std::unique_ptr make_optimizer_options(double lr) { + return std::make_unique(lr); +} + +} // namespace + FCESOptimizer::FCESOptimizer( std::vector params, FCESConfig config ) : torch::optim::Optimizer( {torch::optim::OptimizerParamGroup(std::move(params))}, - std::make_unique(config.lr) + make_optimizer_options(config.lr) ), config_(std::move(config)), population_(config_.population_size, 10000, @@ -24,6 +92,8 @@ FCESOptimizer::FCESOptimizer( population_, 50, config_.auto_population, config_.direct_construction ); + spectral_sensor_ = std::make_unique(); + // Initial RAM backup backup_to_ram(); @@ -37,26 +107,109 @@ torch::Tensor FCESOptimizer::step(LossClosure closure) { torch::Tensor loss = {}; if (closure) { + torch::AutoGradMode grad_mode(true); loss = closure(); } - // TODO: Port full step logic from Python: - // 1. _gather_stats() - // 2. get_active_controller() - // 3. _get_actions() - // 4. _apply_parameter_updates() - // 5. Evolution & maintenance + // 1. Gather Statistics + gather_stats(); - // Minimal stub: apply sign-SGD update - for (auto& group : param_groups()) { - for (auto& p : group.params()) { - if (!p.grad().defined()) continue; + // 2. Strategy: Population Selection & Dynamics + auto& active_controller = evolution_manager_->get_active_controller(); - auto update = torch::sign(p.grad()); - p.data().add_(update, -config_.lr); + // 3. Decision: Neural Decisions from Controllers + float current_loss_val = (loss.defined()) ? loss.item() : last_step_loss_; + + // Emergency Brake - NaN/Inf Detection + if (std::isnan(current_loss_val) || !std::isfinite(current_loss_val)) { + Telemetry::get().error("emergency_brake_nan", "NaN/Inf loss detected in step " + std::to_string(step_counter_)); + handle_rollback(); + return loss; + } + + float loss_velocity = fitness_engine_.calculate_loss_signal(current_loss_val, ema_loss_, config_.signal_mode); + last_loss_velocity_ = loss_velocity; + + float progress = std::min(1.0f, static_cast(step_counter_) / std::max(1, config_.total_steps)); + float grad_cv = grad_norm_tracker_.get_std() / (grad_norm_tracker_.get_mean() + 1e-8f); + + float csr_factor = 1.0f; + if (config_.csr_enabled) { + if (step_counter_ < config_.csr_warmup_steps) { + csr_factor = 0.0f; + } else { + float steps_since_warmup = static_cast(step_counter_ - config_.csr_warmup_steps); + csr_factor = std::min(1.0f, steps_since_warmup / std::max(1.0f, static_cast(config_.csr_ramp_steps))); } } + // Update spectral sensing rank + float spectral_alpha = 0.0f; + if (config_.grokking_coefficient > 0.0f && spectral_sensor_) { + if (step_counter_ % config_.spectral_frequency == 0 || last_spectral_rank_ == 0.0f) { + int param_idx = 0; + for (auto& group : param_groups()) { + for (auto& p : group.params()) { + if (p.dim() >= 2) { + std::string name = "layer_" + std::to_string(param_idx); + spectral_sensor_->track_layer(name, p); + } + param_idx++; + } + } + last_spectral_rank_ = spectral_sensor_->get_global_rank(); + } + spectral_alpha = last_spectral_rank_; + } + + float effective_alpha = spectral_alpha * csr_factor; + float kzm_damping = fitness_engine_.compute_kzm_damping(effective_alpha); + float stagnation_intensity = std::min(1.0f, static_cast(stagnation_counter_) / 500.0f); + float log_spectral_alpha = std::log(effective_alpha + 1e-6f); + + // Call decide_update + auto actions = active_controller.decide_update( + layer_stats_, + loss_velocity, + progress, + rollback_ema_, + grad_cv, + log_spectral_alpha, + stagnation_intensity, + kzm_damping, + loss_velocity + ); + + // Bandit-style Early Stopping + if (step_counter_ % 5 == 0 && loss_velocity > 0.05f) { + Telemetry::get().warning("early_stopping_poor_controller", + "controller_id=" + std::to_string(active_controller.id) + " velocity=" + std::to_string(loss_velocity)); + evolution_manager_->steps_active = evolution_manager_->selection_interval; + } + + if (torch::isnan(actions).any().item()) { + Telemetry::get().error("controller_nan_actions", "NaN actions returned by controller ID " + std::to_string(active_controller.id)); + population_.kill(active_controller); + auto& new_controller = evolution_manager_->get_active_controller(); + actions = torch::zeros_like(actions); + for (int i = 0; i < actions.size(0); ++i) { + actions[i][0] = 0.5f; // log_mult default + } + } + + // 4. Action: Apply Updates + apply_parameter_updates(actions); + + // 5. Evolution & Maintenance + if (current_loss_val > 0.0f) { + evolution_manager_->update_population_dynamics( + loss_velocity, + ema_loss_, + step_counter_, + config_.total_steps + ); + } + if (step_counter_ % 50 == 0) { backup_to_ram(); } @@ -65,21 +218,77 @@ torch::Tensor FCESOptimizer::step(LossClosure closure) { } void FCESOptimizer::update_fitness(float loss) { - // EMA loss tracking - if (step_counter_ == 1) { - ema_loss_ = loss; - } else { - ema_loss_ = 0.95f * ema_loss_ + 0.05f * loss; + // 1. Divergence Safety + bool is_nan = std::isnan(loss) || !std::isfinite(loss); + bool is_spike = (step_counter_ > 1) && (ema_loss_ > 0.0f) && (loss > config_.rollback_threshold * ema_loss_) && (ema_loss_ > 0.1f); + if (is_nan || is_spike) { + Telemetry::get().warning("divergence_detected", "loss=" + std::to_string(loss) + " ema=" + std::to_string(ema_loss_)); + handle_rollback(); + return; } - last_step_loss_ = loss; - // Update best loss window - if (loss < best_loss_window_) { + if (step_counter_ == 1 || ema_loss_ == 0.0f) { + ema_loss_ = loss; + last_step_loss_ = loss; + last_sparsity_ = calculate_sparsity(); + return; + } + + // 2. Metric Calculation + float train_adv = ema_loss_ - loss; + float val_adv = 0.0f; + float current_sparsity = calculate_sparsity(); + float sparsity_delta = current_sparsity - last_sparsity_; + float consistency_gap = std::max(0.0f, train_adv - val_adv); + + float grad_std = grad_norm_tracker_.get_std(); + float grad_mean = grad_norm_tracker_.get_mean(); + float grad_cv = grad_std / (grad_mean + 1e-8f); + + float raw_rank = (spectral_sensor_) ? spectral_sensor_->get_global_rank() : 0.0f; + float csr_factor = 1.0f; + if (config_.csr_enabled) { + if (step_counter_ < config_.csr_warmup_steps) { + csr_factor = 0.0f; + } else { + float steps_since_warmup = static_cast(step_counter_ - config_.csr_warmup_steps); + csr_factor = std::min(1.0f, steps_since_warmup / std::max(1.0f, static_cast(config_.csr_ramp_steps))); + } + } + float effective_rank = config_.csr_enabled ? raw_rank * csr_factor : raw_rank; + + FitnessMetrics metrics; + metrics.training_advantage = train_adv; + metrics.validation_advantage = val_adv; + metrics.grad_cv = grad_cv; + metrics.sparsity_delta = sparsity_delta; + metrics.consistency_gap = consistency_gap; + metrics.stable_rank = effective_rank; + + // 3. Fuzzy Evaluation + float final_fitness = fitness_evaluator_.evaluate(metrics); + + // 4. State Update + ema_loss_ = 0.95f * ema_loss_ + 0.05f * loss; + last_step_loss_ = loss; + last_sparsity_ = current_sparsity; + + // Stagnation logic + if (loss < best_loss_window_ * 0.995f) { best_loss_window_ = loss; stagnation_counter_ = 0; } else { stagnation_counter_++; } + + // 5. Apply to Population + auto& active_controller = evolution_manager_->get_active_controller(); + population_.update_controller_fitness(active_controller, final_fitness); + + Telemetry::get().info("fitness_calculated", + "loss=" + std::to_string(loss) + + " ema_loss=" + std::to_string(ema_loss_) + + " fitness=" + std::to_string(final_fitness)); } void FCESOptimizer::backup_to_ram() { @@ -115,17 +324,152 @@ float FCESOptimizer::calculate_sparsity() const { } void FCESOptimizer::gather_stats() { - // TODO: Port _gather_stats from Python + layer_stats_.clear(); + param_group_mapping_.clear(); + + int param_idx = 0; + bool has_nan_or_inf = false; + float max_grad_norm = 0.0f; + + for (auto& group : param_groups()) { + for (auto& p : group.params()) { + if (!p.grad().defined()) { + param_group_mapping_.push_back(-1); + continue; + } + + auto grad = p.grad(); + if (torch::isnan(grad).any().item() || torch::isinf(grad).any().item()) { + has_nan_or_inf = true; + } + + float grad_norm = grad.norm().item(); + if (std::isnan(grad_norm) || !std::isfinite(grad_norm)) { + has_nan_or_inf = true; + grad_norm = 0.0f; + } + + if (grad_norm > max_grad_norm) { + max_grad_norm = grad_norm; + } + + int64_t total_elements = grad.numel(); + int64_t zeros = (grad.abs() < 1e-5f).sum().item(); + float sparsity = (total_elements > 0) ? static_cast(zeros) / total_elements : 0.0f; + + int layer_type = classify_layer_by_shape(p); + int group_idx = static_cast(layer_stats_.size()); + layer_stats_.push_back({grad_norm, sparsity, static_cast(layer_type)}); + param_group_mapping_.push_back(group_idx); + + if (spectral_sensor_ && p.dim() >= 2) { + std::string name = "layer_" + std::to_string(param_idx); + spectral_sensor_->track_layer(name, p); + } + + param_idx++; + } + } + + if (has_nan_or_inf) { + Telemetry::get().error("poisoned_gradients_detected", + "NaN/Inf detected in gradients during step " + std::to_string(step_counter_)); + handle_rollback(); + return; + } + + if (step_counter_ == 1 && max_grad_norm > 1.0f) { + float safe_lr = 0.01f / (max_grad_norm + 1e-8f); + for (auto& group : param_groups()) { + if (group.options().get_lr() > safe_lr) { + Telemetry::get().info("auto_calibration_throttled_lr", + "old=" + std::to_string(group.options().get_lr()) + " new=" + std::to_string(safe_lr)); + group.options().set_lr(safe_lr); + config_.lr = safe_lr; + } + } + } + + if (!layer_stats_.empty()) { + float first_grad_norm = layer_stats_[0][0]; + grad_norm_tracker_.update(first_grad_norm); + } } -void FCESOptimizer::apply_parameter_updates(const torch::Tensor& /*actions*/) { - // TODO: Port _apply_parameter_updates from Python +void FCESOptimizer::apply_parameter_updates(const torch::Tensor& actions) { + int param_idx = 0; + float parasitic_accum = 0.0f; + int count_updated = 0; + + auto& active_controller = evolution_manager_->get_active_controller(); + + for (auto& group : param_groups()) { + float lr = static_cast(group.options().get_lr()); + float wd = config_.weight_decay; + + for (auto& p : group.params()) { + if (!p.grad().defined()) { + param_idx++; + continue; + } + + int g_idx = param_group_mapping_[param_idx]; + if (g_idx < 0 || g_idx >= actions.size(0)) { + param_idx++; + continue; + } + + float mult = actions[g_idx][0].item(); + float sign_gate = actions[g_idx][1].item(); + float wd_mult = (actions.size(1) > 2) ? actions[g_idx][2].item() : 1.0f; + + bool use_sign = sign_gate > 0.0f; + if (config_.ablation_mode == "force_sign") { + use_sign = true; + } else if (config_.ablation_mode == "force_grad") { + use_sign = false; + } + + if (wd > 0.0f) { + float effective_wd = wd; + if (config_.adaptive_wd) { + effective_wd *= wd_mult; + } + p.data().mul_(1.0f - lr * effective_wd); + } + + torch::Tensor update_vec = use_sign ? torch::sign(p.grad()) : p.grad(); + torch::Tensor update = -lr * mult * update_vec; + + update = apply_trust_clipping(p, update, config_.trust_region_clip); + p.data().add_(update); + + if (config_.parasitic_mode) { + parasitic_accum += calculate_parasitic_reward(p, mult, grad_norm_tracker_); + } + + param_idx++; + count_updated++; + } + } + + if (config_.parasitic_mode && count_updated > 0) { + float reward = parasitic_accum / static_cast(count_updated); + population_.update_controller_fitness(active_controller, reward * 10.0f, false); + } } void FCESOptimizer::handle_rollback() { restore_from_ram(); population_.calm_down(); rollback_ema_ = 0.9f * rollback_ema_ + 0.1f; + + ema_loss_ = 0.0f; + last_step_loss_ = 0.0f; + grad_norm_tracker_.reset(); + zero_grad(); + + Telemetry::get().warning("hard_reset_executed", "rollback_sanitization"); } -} // namespace fces +} // namespace fces diff --git a/src/spectral.cpp b/src/spectral.cpp index 8ea9943..7e339fd 100644 --- a/src/spectral.cpp +++ b/src/spectral.cpp @@ -26,10 +26,11 @@ void SpectralSensor::reset() { float SpectralSensor::compute_effective_rank(const torch::Tensor& weight) { // SVD-based effective rank (Shannon entropy of normalized singular values) - auto svd = torch::linalg::svdvals(weight.to(torch::kFloat32)); + auto svd_result = torch::svd(weight.to(torch::kFloat32)); + auto svd = std::get<1>(svd_result); auto s = svd / svd.sum(); - auto log_s = torch::log(s + 1e-10f); - float entropy = -(s * log_s).sum().item(); + auto log_s = (s + 1e-10f).log(); + float entropy = -s.mul(log_s).sum().item(); return std::exp(entropy); }