
I implemented a MLP Neural Network in C++, but if i normally compile it doesn't work, if i use the debugger with VS Code it works

I tried to implemt a MLP Neural Network for digit recognition using Eigen3, but if i normally compile and execute it at some point all the parameters (weights, biases, activations) become NaN. Instead if i debug it with VS Code it works.

This is the network

#include "..\Headers\Network.h"
#include <cmath>
#include <iostream>
#include <iterator>
#include <string>
#include <vector>
#include <algorithm>

using std::vector;
using std::string;
using Eigen::VectorXd;
using Eigen::MatrixXd;

double sigmoide (double x);
double sigmoide_derivative(double x);
VectorXd sigmoide_derivative(VectorXd vec);
void load_val (string pi, VectorXd& pixels);
std::ofstream _log ("log.txt");

template <typename T>
void print(T& mat)
    for (int r = 0; r < mat.rows(); r++)
        for (int c = 0; c < mat.cols(); c++)

Network::Network(vector<string> _data, int _l_rate, vector<int> dim) : data{_data}, l_rate{_l_rate}
    layers = dim.size();
    for (int i = 0; i < layers - 1; i++)
        MatrixXd m = MatrixXd::Random(dim[i + 1], dim[i]);
        VectorXd b = VectorXd::Random(dim[i + 1]);
        VectorXd _z (dim[i + 1]);
        VectorXd n (dim[i]);
    VectorXd n_f (dim[layers - 1]);

void Network::learn(int epoch, int mini_batch)

    for(int e = 0; e < epoch; e++)
        std::cout << "Epoch: " << e + 1 << "\n\n";
        double e_cost = 0;
        shuffle(begin(data), end(data), rng);
        for(unsigned long long int n = 0; n < data.size();)
            e_cost += SGD(mini_batch, n);

double Network::SGD (int mini_batch, unsigned long long int& n_data)
    vector<VectorXd> p_d_biases = vector<VectorXd>();
    vector<MatrixXd> p_d_weights = vector<MatrixXd>();
    double b_cost = 0;
    for(int m = 0; m < mini_batch && n_data != data.size(); m++, n_data++)
        SGD_b(p_d_biases, data.at(n_data));
        SGD_w(p_d_weights, p_d_biases);
        b_cost += cost(data.at(n_data));
    step(p_d_weights, p_d_biases, mini_batch);
    return b_cost / mini_batch;

double Network::cost(string sample)
    VectorXd e_values = VectorXd();
    exp_values(sample, e_values);
    double s_cost = 0;
    for(int i = 0; i < neurons[layers - 1].size(); i++)
        if(e_values[i] == 1)
            s_cost += std::pow(neurons.at(layers - 1)(i) - 1, 2);
            s_cost += std::pow(neurons.at(layers - 1)(i), 2);
    return s_cost;

void Network::step(const vector<MatrixXd>& p_d_weights, const vector<VectorXd>& p_d_biases, int mini_batch)
    for(int l = layers - 2, n = 0; l >= 0; l--, n++)
        VectorXd b_tmp (biases.at(l).rows());
        MatrixXd w_tmp (weights.at(l).rows(), weights.at(l).cols());
        for(int i = 0; i < layers - 1; i++)
            b_tmp += p_d_biases.at((i * (layers - 1)) + n);
            w_tmp += p_d_weights.at((i * (layers - 1)) + n);
        biases.at(l) -= l_rate * (b_tmp / mini_batch);
        weights.at(l) -= l_rate * (w_tmp / mini_batch);

void Network::SGD_w (vector<MatrixXd>& p_d_weights, const vector<VectorXd>& p_d_biases)
    for(int l = layers - 2; l >= 0; l--)
        p_d_weights.push_back(p_d_biases.at(p_d_biases.size() - (l + 1)) * neurons.at(l).transpose());

void Network::SGD_b (vector<VectorXd>& p_d_biases, string sample)
    VectorXd e_values = VectorXd();
    exp_values(sample, e_values);
    for(int l = layers - 1; l > 0; l--)
        if(l == (layers - 1))
            p_d_biases.push_back((2*(neurons.at(l) - e_values)).cwiseProduct(sigmoide_derivative(z.at(l-1))));
            VectorXd b =(weights.at(l).transpose() * p_d_biases.at(p_d_biases.size() - 1)).cwiseProduct(sigmoide_derivative(z.at(l - 1)));

void Network::feed_forward(string sample)
    load_val(sample, neurons[0]);
    for(int l = 0; l < layers - 1; l++)
        z.at(l) = weights.at(l) * neurons.at(l) + biases.at(l);
        for (int i = 0; i < biases.at(l).size(); i++)
            neurons.at(l + 1)(i) = sigmoide(z.at(l)(i));

void Network::exp_values (string sample, VectorXd& e_values)
    e_values.resize(neurons[layers - 1].size());
    short digit = std::stoi(string(sample.begin(), sample.begin() + 1));
    for(int i = 0;  i < e_values.size(); i++)
        if(i - digit == 0)
            e_values(i) = 1;
            e_values(i) = 0;

void load_val (string pi, VectorXd& pixels)
    int i = 0;
    for(auto s = pi.begin() + 3, p = pi.begin() + 1; s != pi.end(); s++)
        if(*s == ',')
            double t = std::stoi(std::string(p + 1, s)) / 255.;
            pixels(i) = t;
            p = s;
        if(s == (pi.end() - 1))
            double t = std::stoi(std::string(s, pi.end())) / 255.;
            pixels(i) = t;

VectorXd sigmoide_derivative(VectorXd vec)
    VectorXd result = VectorXd();
    for(int r = 0; r < vec.rows(); r++)
        result(r) = sigmoide_derivative(vec(r));
    return result;

double sigmoide_derivative(double x)
    return std::exp(x) / std::pow(1 + std::exp(x), 2);

double sigmoide (double x)
    return 1. / (1 + (1. / std::exp(x)));

This is the main

int main()
    ifstream f_data ("..\\csv_files\\mnist_train.csv");
    vector<string> data;
            string tmp;
            f_data >> tmp;
        vector<int> dim {784, 16, 10};
        Network n (data, 3, dim);
        n.learn(20, 10);
        ifstream t_data ("..\\csv_files\\mnist_test.csv");
        string s;
        t_data >> s;
        cout << string(s.begin(), s.begin() + 1) << endl;
    return 0;

I tried to disable the compiler optimizations, i controlled that everything was initiliazed, but other than this i don't know where to put my hands. I use as a compiler GCC from MinGW.


  • I found the solution, in the sigmoide and sigmoide_derivative function there are exp() functions that for values to high of x returns a value too large, so instead they give NaN. I added a control that in case of NaN value returns 0. Probably in the debug mode this cases were automatically managed.