pythondeep-learningpytorchneural-networkmlp

Pytorch Neural Networks Multilayer Perceptron Binary Classification i got always same accuracy


I'm trying to multilayer perceptrone binary classification my own datasets. but i always got same accuracy when i change epoch number and learning rate.

My Multilayer Perceptron class

class MyMLP(nn.Module):
    def __init__(self, num_input_features, num_hidden_neuron1, num_hidden_neuron2, num_output_neurons):
        super(MyMLP, self).__init__()
        self.hidden_layer1 = nn.Linear(num_input_features, num_hidden_neuron1)
        self.hidden_layer2 = nn.Linear(num_hidden_neuron1, num_hidden_neuron2)
        self.output_layer = nn.Linear(num_hidden_neuron2, num_output_neurons)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, X):
        X = torch.tensor(X, dtype=torch.float)
        hidden_res1 = self.relu(self.hidden_layer1(X))
        hidden_res2 = self.relu(self.hidden_layer2(hidden_res1))
        output = self.sigmoid(self.output_layer(hidden_res2))
        return output

My Dataset class

class PrincessDataset(Dataset):
    def __init__(self,dataName):
        #dataloading
        xy = np.loadtxt(dataName, delimiter=',', dtype=np.float32, skiprows=1)
        self.x = torch.from_numpy(xy[0:, :-1])
        self.y = torch.from_numpy(xy[:,-1])
        self.n_samples = xy.shape[0]
    def __getitem__(self, index):
        return self.x[index] , self.y[index]
    def __len__(self):
        return self.n_samples

My Code

batch_size = 16
num_workers = 2
test_data = PrincessDataset('cure_the_princess_test.csv')
train_data = PrincessDataset('cure_the_princess_train.csv')
validation_data = PrincessDataset('cure_the_princess_validation.csv')

train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=False, num_workers=num_workers)
validation_loader = torch.utils.data.DataLoader(validation_data, batch_size=batch_size, shuffle=False, num_workers=num_workers)

# func parameters num_input_features, num_hidden_neuron1, num_hidden_neuron2, num_output_neurons

num_input_features = 13
num_hidden_neuron1 = 100
num_hidden_neuron2 = 50
num_output_neuron = 1 #binary classification
####
num_epochs = 200
learning_rate = 0.001
patience = 5
patience_counter = 0
###
model = MyMLP(num_input_features,num_hidden_neuron1, num_hidden_neuron2,num_output_neuron)

criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
list_train_loss, list_val_loss = [], []
best_val_loss = None

for epoch in range(num_epochs):
    train_loss = 0.0
    train_count = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), labels)
        loss.backward()
        optimizer.step()
        train_count += 1.0
        train_loss += loss.item()

    validation_loss = 0.0
    with torch.no_grad():
        model.eval()
        for inputs, labels in validation_loader:
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), labels)
            validation_loss += loss.item()

    model.train()
    train_loss /= train_count
    validation_loss /= len(validation_loader)
    print("Epoch", epoch, "Training loss", train_loss,"Validation Loss :",validation_loss)

    list_train_loss.append(train_loss)
    list_val_loss.append(validation_loss)
    
    val_score = validation_loss
    if best_val_loss is None:
        best_val_loss = val_score # hafızada patience boyu tutmaya başla
        torch.save(model.state_dict(), "bestval.pt")
    elif best_val_loss < val_score: # patience counter
        patience_counter += 1
        print("Earlystopping Patience Counter:",patience_counter)
        if patience_counter == patience:
            break
    else:
        best_val_loss = val_score
        torch.save(model.state_dict(), "bestval.pt") # to keep the best model
        patience_counter = 0
                   

sns.set_style("darkgrid")
plt.plot(list_train_loss, label="Training loss")
plt.plot(list_val_loss, label="Validation loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.show()

Accuracy calculating

model = MyMLP(num_input_features,num_hidden_neuron1, num_hidden_neuron2,num_output_neuron)
model.load_state_dict(torch.load('bestval.pt'))
model.eval()
predicts =[]
real_labels = list()

n_correct = 0
n_samples = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _,predict = torch.max(outputs.data,1)
        n_samples += labels.size(0)
        predicts.extend(predict.tolist())
        real_labels.extend(labels.tolist())

from sklearn.metrics import f1_score,accuracy_score,classification_report
print("Accuracy score of this model: {}".format(accuracy_score(real_labels,predicts)))
print(classification_report(real_labels,predicts))

Accuracy Result :

Accuracy score of this model: 0.49740932642487046
              precision    recall  f1-score   support

         0.0       0.50      1.00      0.66       384
         1.0       0.00      0.00      0.00       388

    accuracy                           0.50       772
   macro avg       0.25      0.50      0.33       772
weighted avg       0.25      0.50      0.33       772

i got some accuracy score when i change epoch number, learning rate. im trying 3 days fix that problem. can you help me?

my csv files looks like this

Phoenix Feather,Unicorn Horn,Dragon's Blood,Mermaid Tears,Fairy Dust,Goblin Toes,Witch's Brew,Griffin Claw,Troll Hair,Kraken Ink,Minotaur Horn,Basilisk Scale,Chimera Fang,Cured
10.0,15.3,27.1,13.3,18.1,12.3,4.8,24.0,10.0,17.5,5.9,27.6,8.6,0
31.6,1.9,25.2,17.9,16.4,2.4,4.2,6.4,32.5,21.9,19.7,12.4,17.4,1
22.4,9.2,23.7,14.9,18.2,10.5,6.8,15.3,21.0,16.8,31.6,19.4,11.6,0
24.5,2.3,2.2,26.2,7.3,2.8,20.6,7.8,23.0,17.0,2.7,7.6,26.0,1
3.2,20.2,12.9,13.3,7.7,29.6,2.6,12.9,12.7,13.8,8.9,6.5,9.1,0
15.7,17.5,14.4,12.2,11.9,4.2,1.7,6.4,20.9,12.5,21.1,15.6,12.4,1
.
.
.

first row is label names, last column is classification 0 or 1, other columns is input values


Solution

  • This is a binary classification( your output is one dim), you should not use torch.max it will always return the same output, which is 0. Instead you should compare the output with threshold as follows:

    threshold = 0.5
    preds = (outputs >threshold).to(labels.dtype)