c++neural-networkartificial-intelligencegenetic

Why is my genetic AI unable to solve the XOR problem?


I'm new to AI, but I've already written genetic AIs in Python and therefore wanted to challenge myself to program an AI from scratch in C. In order to take advantage of the object orientation, I am currently still writing it in C++, but if it really works then I want to rewrite all classes in structures, but that's why a lot is written in C and not in C++ style (output, random numbers ...). I wanted to start with the XOR problem but I have great difficulty getting a correct result but I don't know why. The output is the same for all 4 options.

E.g.

0 0 -> 0.34

0 1 -> 0.34

1 0 -> 0.34

1 1 -> 0.34

Can anyone find a solution?

I would also appreciate resources on genetic neural networks in C/C++, if anyone can find them.

#include <vector>
#include <cstdlib>
#include <ctime>
#include <cstdio>
#include <cmath>

using std::vector;

double random_double(double min, double max) {
    double f = (double) rand() / RAND_MAX;
    return min + f * (max - min);
}

int random_int(int min, int max) {
    return (int) random_double((float) min, (float) max + 1);
}

double sig(double x) {
    return 1 / (1 + exp(-x));
}

class Settings {
public:
    int n_pop = 100;
    int n_iter = 500;
    int n_selection = 5;
    double r_cross = 0.9;
    double r_mut = 0.2;

    int inputs;
    int outputs;
    int hidden_layers;
    int *n_hidden;

    Settings(int inputs, int outputs, int hidden_layers, int *n_hidden) : inputs(inputs), outputs(outputs),
                                                                          hidden_layers(hidden_layers),
                                                                          n_hidden(n_hidden) {}
};

class Network {
    vector<double> _nodes;

    int inputs, hidden_layers, outputs, n_nodes;
    int *hidden_nodes;
    vector<int> layers;

public:
    double fitness = 0;
    vector<double> edges;
    vector<double> biases;

    Network(int inputs, int hidden_layers, int *hidden_nodes, int outputs) : inputs(inputs),
                                                                             hidden_layers(hidden_layers),
                                                                             hidden_nodes(hidden_nodes),
                                                                             outputs(outputs) {
        n_nodes = inputs + outputs;
        for (int i = 0; i < hidden_layers; i++) {
            n_nodes += hidden_nodes[i];
        }
    }

    void print() {
        printf("Nodes:\n");
        for (int i = 0; i < n_nodes; i++) {
            printf("%.2lf ", _nodes[i]);
        }
        printf("\n\nEdges:\n");
        for (double edge : edges) {
            printf("%.2lf ", edge);
        }
        printf("\n\nBiases:\n");
        for (double bias : biases) {
            printf("%.2lf ", bias);
        }
        printf("\n");
    }

    void output() {
        double in[2] = {0, 0};
        printf("\n0 : 0 -> %.2lf\n", forward(in)[0]);
        in[1] = 1;
        printf("0 : 1 -> %.2lf\n", forward(in)[0]);
        in[0] = 1;
        in[1] = 0;
        printf("1 : 0 -> %.2lf\n", forward(in)[0]);
        in[1] = 1;
        printf("1 : 1 -> %.2lf\n", forward(in)[0]);
    }

    void generate() {
        layers.push_back(0);
        for (int i = 0; i < inputs; i++) {
            layers[0]++;
            _nodes.emplace_back(0);
        }

        for (int l = 0; l < hidden_layers; l++) {
            layers.push_back(0);
            for (int i = 0; i < hidden_nodes[l]; i++) {
                layers[l + 1]++;
                _nodes.emplace_back(0);
                biases.push_back(0.1);
            }
        }

        layers.push_back(0);
        for (int i = 0; i < outputs; i++) {
            _nodes.emplace_back(0);
            biases.push_back(0.1);
            layers[hidden_layers + 1]++;
        }

        for (int layer = 0; layer < layers.size() - 1; layer++) {
            for (int l = 0; l < layers[layer]; l++) {
                for (int r = 0; r < layers[layer + 1]; r++) {
                    edges.push_back(random_double(-1, 1));
                }
            }
        }
    }

    vector<double> forward(const double *in) {
        for (int i = 0; i < inputs; i++)
            _nodes[i] = in[i];

        double x;
        int edge = 0, node = 0;

        for (int l = 0; l < layers.size() - 1; l++) {
            for (int i = 0; i < layers[l + 1]; i++) {
                x = 0;
                for (int j = 0; j < layers[l]; j++) {
                    x += edges[edge] * _nodes[node];
                    edge++;
                }
                _nodes[node + inputs] = sig(x + biases[node]);
                node++;
            }
        }

        vector<double> ret = {};
        for (int i = n_nodes - outputs - 1; i < n_nodes; i++)
            ret.push_back(_nodes[i]);
        return ret;
    }

    void change_weight() {
        int type = random_int(0, 1);
        if (type == 0) {
            for (int e = 0; e < edges.size(); e++) {
                if (random_int(0, 1) == 1)
                    edges[e] += random_double(-1, 1);
            }
        } else {
            for (int e = 0; e < edges.size(); e++) {
                if (random_int(0, 1) == 1)
                    edges[e] = random_double(-1, 1);
            }
        }
    }

    void change_bias() {
        int type = random_int(0, 1);
        if (type == 0) {
            for (int b = 0; b < biases.size(); b++) {
                if (random_int(0, 1) == 1)
                    biases[b] += random_double(-1, 1);
            }
        } else {
            for (int b = 0; b < biases.size(); b++) {
                if (random_int(0, 1) == 1)
                    biases[b] = random_double(-1, 1);
            }
        }
    }
};

Network selection(vector<Network> pop, Settings *settings) {
    int x = rand() % (settings->n_pop - settings->n_selection);
    int selected = x;
    for (int i = x + 1; i < x + settings->n_selection; i++) {
        if (pop[i].fitness < pop[selected].fitness)
            selected = i;
    }
    return pop[selected];
}

vector<Network> crossover(Network *p1, Network *p2, Settings *settings) {
    Network c1(*p1);
    Network c2(*p2);

    double x = random_double(0, 1);
    if (x <= settings->r_cross) {
        int edge_size = (int) p1->edges.size();
        int s = random_int(0, edge_size - 1);
        if (random_int(1, 2) == 1) {
            for (int i = 0; i < s; i++) {
                c1.edges[i] = p1->edges[i];
                c2.edges[i] = p2->edges[i];
            }
            for (int i = s; i < edge_size; i++) {
                c1.edges[i] = p2->edges[i];
                c2.edges[i] = p1->edges[i];
            }
        } else {
            for (int i = 0; i < s; i++) {
                c1.edges[i] = p2->edges[i];
                c2.edges[i] = p1->edges[i];
            }
            for (int i = s; i < edge_size; i++) {
                c1.edges[i] = p1->edges[i];
                c2.edges[i] = p2->edges[i];
            }
        }
    }
    vector<Network> ret;
    ret.emplace_back(c1);
    ret.emplace_back(c2);
    return ret;
}

void mutate(Network *n, Settings *settings) {
    double x = random_double(0, 1);
    if (x <= settings->r_mut) {
        switch (random_int(1, 2)) {
            case 1:
                n->change_weight();
                break;
            case 2:
                n->change_bias();
        }
    }
}

double distance(double x1, double x2) {
    if (x1 > x2)
        return x1 - x2;
    else
        return x2 - x1;
}

double x_or(double r1, double r2, double r3, double r4) {
    double ret = 0;
    ret += distance(0, r1);
    ret += distance(1, r2);
    ret += distance(1, r3);
    ret += distance(0, r4);
    return ret;
}

void evolve(Network *n) {
    double in[2] = {0.0, 1.0};
    double r1 = n->forward(in)[0]; //0,1
    in[0] = 1.0;
    double r2 = n->forward(in)[0]; //1,1
    in[1] = 0.0;
    double r3 = n->forward(in)[0]; //1,0
    in[0] = 0.0;
    double r4 = n->forward(in)[0]; //0,0
    n->fitness = x_or(r4, r1, r3, r2);
}

Network genetic_algorithm(Settings *settings) {
    vector<Network> pop;
    vector<Network> selected;
    vector<Network> children;
    for (int i = 0; i < settings->n_pop; i++) {
        Network n = Network(settings->inputs, settings->hidden_layers, settings->n_hidden, settings->outputs);
        n.generate();
        pop.emplace_back(n);
        selected.emplace_back(n);
        children.emplace_back(n);
    }

    Network best = pop[0];
    evolve(&best);

    for (int gen = 0; gen < settings->n_iter; gen++) {
        printf("GEN: %d - %0.3lf\n", gen, best.fitness);
        for (int i = 0; i < settings->n_pop; i++) {
            evolve(&pop[i]);
            if (pop[i].fitness < best.fitness)
                best = pop[i];
        }

        for (int i = 0; i < settings->n_pop; i++) {
            selected[i] = selection(pop, settings);
        }

        for (int i = 0; i < settings->n_pop; i += 2) {
            auto ch = crossover(&selected[i], &selected[i + 1], settings);
            for (int c = 0; c < 2; c++) {
                mutate(&ch[c], settings);
                children[i + c] = ch[c];
            }
            children[i] = ch[0];
            children[i + 1] = ch[1];
        }

        pop = children;
    }
    return best;
}

int main() {
    srand(time(nullptr));
    int hidden[2] = {5, 5};
    Settings s = Settings(2, 1, 2, hidden);
    auto best = genetic_algorithm(&s);
    best.print();
    best.output();
}

Solution

  • The forward function seems wrong. Check this part out:

    double x;
    int edge = 0, node = 0;
    
    for (int l = 0; l < layers.size() - 1; l++) {
        for (int i = 0; i < layers[l + 1]; i++) {
            x = 0;
            for (int j = 0; j < layers[l]; j++) {
                x += edges[edge] * _nodes[node];
                edge++;
            }
            _nodes[node + inputs] = sig(x + biases[node]);
            node++;
        }
    }
    

    It uses only _nodes[node] (and some edge weights) to calculate x. Then the result is assigned to _nodes[node + inputs]. So, the value of _nodes[node + inputs] comes only from the _nodes[node] value, not from all the nodes in the previous layer.

    It looks like there is a confusion about what node variable holds. If it holds the index of the node whose new value is being calculated (node + inputs), then you should change the

    x += edges[edge] * _nodes[node];
    

    line to use all values from last layer.

    You can do this, for example, by keeping the index of the first node of previous layer and adding j to it inside the loop.