I'm trying to multilayer perceptrone binary classification my own datasets. but i always got same accuracy when i change epoch number and learning rate.
My Multilayer Perceptron class
class MyMLP(nn.Module):
def __init__(self, num_input_features, num_hidden_neuron1, num_hidden_neuron2, num_output_neurons):
super(MyMLP, self).__init__()
self.hidden_layer1 = nn.Linear(num_input_features, num_hidden_neuron1)
self.hidden_layer2 = nn.Linear(num_hidden_neuron1, num_hidden_neuron2)
self.output_layer = nn.Linear(num_hidden_neuron2, num_output_neurons)
self.relu = nn.ReLU()
self.sigmoid = nn.Sigmoid()
def forward(self, X):
X = torch.tensor(X, dtype=torch.float)
hidden_res1 = self.relu(self.hidden_layer1(X))
hidden_res2 = self.relu(self.hidden_layer2(hidden_res1))
output = self.sigmoid(self.output_layer(hidden_res2))
return output
My Dataset class
class PrincessDataset(Dataset):
def __init__(self,dataName):
#dataloading
xy = np.loadtxt(dataName, delimiter=',', dtype=np.float32, skiprows=1)
self.x = torch.from_numpy(xy[0:, :-1])
self.y = torch.from_numpy(xy[:,-1])
self.n_samples = xy.shape[0]
def __getitem__(self, index):
return self.x[index] , self.y[index]
def __len__(self):
return self.n_samples
My Code
batch_size = 16
num_workers = 2
test_data = PrincessDataset('cure_the_princess_test.csv')
train_data = PrincessDataset('cure_the_princess_train.csv')
validation_data = PrincessDataset('cure_the_princess_validation.csv')
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=False, num_workers=num_workers)
validation_loader = torch.utils.data.DataLoader(validation_data, batch_size=batch_size, shuffle=False, num_workers=num_workers)
# func parameters num_input_features, num_hidden_neuron1, num_hidden_neuron2, num_output_neurons
num_input_features = 13
num_hidden_neuron1 = 100
num_hidden_neuron2 = 50
num_output_neuron = 1 #binary classification
####
num_epochs = 200
learning_rate = 0.001
patience = 5
patience_counter = 0
###
model = MyMLP(num_input_features,num_hidden_neuron1, num_hidden_neuron2,num_output_neuron)
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
list_train_loss, list_val_loss = [], []
best_val_loss = None
for epoch in range(num_epochs):
train_loss = 0.0
train_count = 0.0
for inputs, labels in train_loader:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs.squeeze(), labels)
loss.backward()
optimizer.step()
train_count += 1.0
train_loss += loss.item()
validation_loss = 0.0
with torch.no_grad():
model.eval()
for inputs, labels in validation_loader:
outputs = model(inputs)
loss = criterion(outputs.squeeze(), labels)
validation_loss += loss.item()
model.train()
train_loss /= train_count
validation_loss /= len(validation_loader)
print("Epoch", epoch, "Training loss", train_loss,"Validation Loss :",validation_loss)
list_train_loss.append(train_loss)
list_val_loss.append(validation_loss)
val_score = validation_loss
if best_val_loss is None:
best_val_loss = val_score # hafızada patience boyu tutmaya başla
torch.save(model.state_dict(), "bestval.pt")
elif best_val_loss < val_score: # patience counter
patience_counter += 1
print("Earlystopping Patience Counter:",patience_counter)
if patience_counter == patience:
break
else:
best_val_loss = val_score
torch.save(model.state_dict(), "bestval.pt") # to keep the best model
patience_counter = 0
sns.set_style("darkgrid")
plt.plot(list_train_loss, label="Training loss")
plt.plot(list_val_loss, label="Validation loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.show()
Accuracy calculating
model = MyMLP(num_input_features,num_hidden_neuron1, num_hidden_neuron2,num_output_neuron)
model.load_state_dict(torch.load('bestval.pt'))
model.eval()
predicts =[]
real_labels = list()
n_correct = 0
n_samples = 0
with torch.no_grad():
for inputs, labels in test_loader:
outputs = model(inputs)
_,predict = torch.max(outputs.data,1)
n_samples += labels.size(0)
predicts.extend(predict.tolist())
real_labels.extend(labels.tolist())
from sklearn.metrics import f1_score,accuracy_score,classification_report
print("Accuracy score of this model: {}".format(accuracy_score(real_labels,predicts)))
print(classification_report(real_labels,predicts))
Accuracy Result :
Accuracy score of this model: 0.49740932642487046
precision recall f1-score support
0.0 0.50 1.00 0.66 384
1.0 0.00 0.00 0.00 388
accuracy 0.50 772
macro avg 0.25 0.50 0.33 772
weighted avg 0.25 0.50 0.33 772
i got some accuracy score when i change epoch number, learning rate. im trying 3 days fix that problem. can you help me?
my csv files looks like this
Phoenix Feather,Unicorn Horn,Dragon's Blood,Mermaid Tears,Fairy Dust,Goblin Toes,Witch's Brew,Griffin Claw,Troll Hair,Kraken Ink,Minotaur Horn,Basilisk Scale,Chimera Fang,Cured
10.0,15.3,27.1,13.3,18.1,12.3,4.8,24.0,10.0,17.5,5.9,27.6,8.6,0
31.6,1.9,25.2,17.9,16.4,2.4,4.2,6.4,32.5,21.9,19.7,12.4,17.4,1
22.4,9.2,23.7,14.9,18.2,10.5,6.8,15.3,21.0,16.8,31.6,19.4,11.6,0
24.5,2.3,2.2,26.2,7.3,2.8,20.6,7.8,23.0,17.0,2.7,7.6,26.0,1
3.2,20.2,12.9,13.3,7.7,29.6,2.6,12.9,12.7,13.8,8.9,6.5,9.1,0
15.7,17.5,14.4,12.2,11.9,4.2,1.7,6.4,20.9,12.5,21.1,15.6,12.4,1
.
.
.
first row is label names, last column is classification 0 or 1, other columns is input values
This is a binary classification( your output is one dim), you should not use torch.max
it will always return the same output, which is 0. Instead you should compare the output with threshold as follows:
threshold = 0.5
preds = (outputs >threshold).to(labels.dtype)