python neural-network backpropagation custom-training

Self made backpropagation doesnt work in python (2-neuron-network)

I tried to build a neural network with two neurons as described in the book Why machines learn on page 330. This is my code and I don't know why it didn't work. I tried something like this before with just one neuron but now with two, I can't explain why it wont work. All formulars I used are described in the book so I think they are correct. I can list those as well if needed.

def training(x_data, labels, w11=0, w12=0, w2=0, b1=0, b2=0, alpha=0.1): 
    w11_list = []
    w12_list = []
    b1_list = []
    w2_list = []
    b2_list = []

    L_list = []
    
    for i in range(len(x_data)): 
        z1 = neuron1(x_data[i][0], x_data[i][1], w11=w11, w12=w12, b1=b1)
        z2 = neuron2(z1, w2=w2, b2=b2)
        yhat = a(z2) # calculate the output of the neural net

        e = labels[i] - yhat # calculate error (Labels[i]=true_value)
        
        L_list.append(e**2)

        w_n2 = -2*e*a(z1)*(yhat*(1-yhat)) # formular to update weight
        w2_list.append(w_n2) # added to list to take the mean
        b_n2 = -2*e*(yhat*(1-yhat)) # formular to update bias
        b2_list.append(b_n2) # added to list to take the mean

        w1_n1 = -2*e*x_data[i][0]*w2*(yhat*(1-yhat))*(a(z1)*(1-a(z1))) #... (same as above just other weights and biases so the formular changes)
        w11_list.append(w1_n1) # ... 
        w2_n1 = -2*e*x_data[i][1]*w2*(yhat*(1-yhat))*a(z1)*(1-a(z1)) # ...
        w12_list.append(w2_n1) # ...
        b_n1 = -2*e*w2*(yhat*(1-yhat))*(a(z1)*(1-a(z1))) # ...
        b1_list.append(b_n1) # ...


    w11_sum = sum(w11_list)/len(w11_list) # average w11 value
    delta_w11 = -alpha*w11_sum # multiplied with learning_rate=alpha
    w11 = w11 + delta_w11 # added to old w11 value

    w12_sum = sum(w12_list)/len(w12_list) # ... (basically the same as above but different weight)
    delta_w12 = -alpha*w12_sum # ...
    w12 = w12 + delta_w12 # ...

    b1_sum = sum(b1_list)/len(b1_list) # ...
    delta_b1 = -alpha*b1_sum  # ...
    b1 = b1 + delta_b1 # ...

    w2_sum = sum(w2_list)/len(w2_list) # ...
    delta_w2 = -alpha*w2_sum  # ...
    w2 = w2 + delta_w2 # ...

    b2_sum = sum(b2_list)/len(b2_list) # ...
    delta_b2 = -alpha*b2_sum # ...
    b2 = b2 + delta_b2 # ....

    L_mean = sum(L_list)/len(L_list) # calculating the loss
    print(L_mean)


    return w11, w12, w2, b1, b2

Here are the functions I used in training().

import math 

def neuron1(x1: input, x2: input, w11, w12, b1): 
    z = w11*x1+w12*x2+b1
    y = a(z)
    return y 

def neuron2(y, w2, b2): 
    z = w2*y+b2 
    y = a(z)
    return y 

def a(z): 
    return 1/(1+math.e**-z)

I printed out the loss(L_mean in the code) but it didn't change. When I created the 1-neuron-network the loss went down and the model started too learn. This has not happened and the loss remains the same. I also changed the data I am training with and used the data I created for the 1-neuon-network but my 2-neuron-network still doesn't learn at all.

Tried to change some things with ChatGPT but ChatGPT wasn't of much help.

Solution

There are a couple of issues with your code.

First of, NEVER initialize all weights to the same constant. This will prevent learning.
You're mixing outputs of the activation function and the weighted sum of inputs for each neuron. Consequently your backpropagation derivations are incorrect

The partial derivatives with respect to weights on both levels go as follows:

The remaining partial derivatives are left as an exercise for you to derive/recheck.

I've corrected your code accordingly and observe a loss drop when training a simple AND function:

import math
import random


def training(x_data, labels, w11, w12, w2, b1, b2, alpha=0.1):
    w11_list = []
    w12_list = []
    b1_list = []
    w2_list = []
    b2_list = []

    L_list = []

    for i in range(len(x_data)):
        x1, x2 = x_data[i][0], x_data[i][1]
        # Foward pass
        z1 = neuron1(x1, x2, w11=w11, w12=w12, b1=b1)
        a1 = a(z1)
        z2 = neuron2(a1, w2=w2, b2=b2)
        yhat = a2 = a(z2)

        # Compute error (loss)
        e = labels[i] - yhat
        L_list.append(e**2)

        # Backward pass
        w_n2 = -2 * e * yhat * (1 - yhat) * a1
        w2_list.append(w_n2)
        b_n2 = -2 * e * yhat * (1 - yhat)
        b2_list.append(b_n2)

        w1_n1 = -2 * e * yhat * (1 - yhat) * w2 * a1 * (1 - a1) * x1
        w11_list.append(w1_n1)
        w2_n1 = -2 * e * yhat * (1 - yhat) * w2 * a1 * (1 - a1) * x2
        w12_list.append(w2_n1)
        b_n1 = -2 * e * yhat * (1 - yhat) * w2 * a1 * (1 - a1)

        b1_list.append(b_n1)

    # Update weights & biases
    w11_sum = sum(w11_list) / len(w11_list)
    delta_w11 = -alpha * w11_sum
    w11 = w11 + delta_w11
    w12_sum = sum(w12_list) / len(w12_list)
    delta_w12 = -alpha * w12_sum
    w12 = w12 + delta_w12

    b1_sum = sum(b1_list) / len(b1_list)
    delta_b1 = -alpha * b1_sum
    b1 = b1 + delta_b1

    w2_sum = sum(w2_list) / len(w2_list)
    delta_w2 = -alpha * w2_sum
    w2 = w2 + delta_w2

    b2_sum = sum(b2_list) / len(b2_list)
    delta_b2 = -alpha * b2_sum
    b2 = b2 + delta_b2

    L_mean = sum(L_list) / len(L_list)
    print(L_mean)

    return w11, w12, w2, b1, b2


def neuron1(x1: input, x2: input, w11, w12, b1):
    z = w11 * x1 + w12 * x2 + b1
    return z


def neuron2(y, w2, b2):
    z = w2 * y + b2
    return z


# Sigmoid activation function
def a(z):
    return 1 / (1 + math.e**-z)

##
# Simple AND example
##
data = [[0, 0], [0, 1], [1, 0], [1, 1]]
y = [0, 0, 0, 1]

w11 = random.uniform(0, 2)
w12 = random.uniform(0, 2)
w2 = random.uniform(0, 2)
b1 = b2 = 0
for i in range(20):
    w11, w12, w2, b1, b2 = training(data, y, w11, w12, w2, b1, b2)
    # print(w11, w12, b1, w2, b2)