I'm working on a text classification task using the Hugging Face Transformers library in Python. My code is set up to use ROC AUC as the evaluation metric, but I need to change it to accuracy. I've made attempts to modify the code, but I'm running into issues.
Here's a simplified version of the code I'm working with:
# install packages
#!pip install torch transformers memory_profiler datasets accelerate
import time
import datetime
tic = time.time()
# import modules
import torch
import random
from transformers.file_utils import is_tf_available, is_torch_available, is_torch_tpu_available
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import load_metric
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np
# Function to define datasets
def create_datasets(X, y):
# Split Data
X_train, X_test, y_train, y_test = train_test_split(X.tolist(), y, test_size=0.2)
# Call the Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, do_lower_case=True)
# Tokenize the text
train_tokens = tokenizer(X_train, truncation=True, padding=True, max_length=512)
valid_tokens = tokenizer(X_test, truncation=True, padding=True, max_length=512)
class MakeTorchData(torch.utils.data.Dataset):
def __init__(self, tokens, labels):
self.tokens = tokens
self.labels = labels
def __getitem__(self, idx):
item = {k: torch.tensor(v[idx]) for k, v in self.tokens.items()}
item["labels"] = torch.tensor([self.labels[idx]])
return item
def __len__(self):
return len(self.labels)
# convert our tokenized data into a torch Dataset
train_dataset = MakeTorchData(train_tokens, y_train.ravel())
valid_dataset = MakeTorchData(valid_tokens, y_test.ravel())
return train_dataset, valid_dataset
# Import the required libraries
metric_name = "roc_auc"
metric = load_metric(metric_name)
# Define metrics
def compute_metrics(eval_pred):
predictions, labels = eval_pred
predictions = np.argmax(predictions, axis=1)
# 'micro', 'macro', etc. are for multi-label classification. If you are running a binary classification, leave it as default or specify "binary" for average
return metric.compute(prediction_scores=predictions, references=labels, average="macro")
# Create trainer
# Specifiy the arguments for the trainer
def create_trainer(model_name, train_dataset, valid_dataset, num_epochs=5):
training_args = TrainingArguments(
output_dir='./results', # output directory
num_train_epochs=num_epochs, # total number of training epochs
per_device_train_batch_size=8, # batch size per device during training
per_device_eval_batch_size=20, # batch size for evaluation
warmup_steps=500, # number of warmup steps for learning rate scheduler
weight_decay=0.01, # strength of weight decay
logging_dir='./logs', # directory for storing logs
load_best_model_at_end=True, # load the best model when finished training (default metric is loss)
metric_for_best_model = metric_name, # select the base metrics
logging_steps=200, # log & save weights each logging_steps
save_steps=200,
evaluation_strategy="steps", # evaluate each `logging_steps`
)
trainer = Trainer(
model=model, # the instantiated Transformers model to be trained
args=training_args, # training arguments, defined above
train_dataset=train_dataset, # training dataset
eval_dataset=valid_dataset, # evaluation dataset
compute_metrics=compute_metrics, # the callback that computes metrics of interest
)
return trainer
# Define model name
model_name = "sentence-transformers/all-distilroberta-v1"
# Load the data into a pandas DataFrame | remember to have a similar structure in your Drive so that the data can be read properly.
df = pd.read_csv(r"C:\path\essays.csv", encoding = "latin-1")
df = df.replace({'y': 1, 'n': 0})
# Define X and y and create datasets
# TO-DO: Define X and y
X = df['TEXT']
y = df['cEXT']
# Create datasets
train_dataset, valid_dataset = create_datasets(X, y)
# Define a list of dropout probabilities to iterate through
dropout_probs = [0.1, 0.2, 0.3, 0.4, 0.5]
# Double loop to iterate through all combinations
for hidden_dropout_prob in dropout_probs:
for attention_probs_dropout_prob in dropout_probs:
# Define the model with the current dropout probabilities
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2,
hidden_dropout_prob=hidden_dropout_prob,
attention_probs_dropout_prob=attention_probs_dropout_prob).to("cpu")
# Rest of your code for this specific model configuration
# Train the model
trainer = create_trainer(model, train_dataset, valid_dataset, num_epochs=3)
training_results = trainer.train() # Capture the training results
# Evaluate the model
results = trainer.evaluate()
# Print or save the results for this combination
print(f"Model with hidden_dropout_prob={hidden_dropout_prob} and attention_probs_dropout_prob={attention_probs_dropout_prob}:")
print(f"Training Results: {training_results}")
print(f"Results: {results}")
In this code, I've defined the compute_metrics function and the evaluation metric as ROC AUC, which works well. However, I would like to replace ROC AUC with accuracy in the evaluation of my models.
I would greatly appreciate any guidance on how to adjust this code to calculate accuracy as the evaluation metric instead of ROC AUC. What modifications should I make to the compute_metrics function and other relevant parts of the code?
My dataset looks like this:
#AUTHID TEXT cEXT \
0 1997_504851.txt Well, right now I just woke up from a mid-day ... 0
1 1997_605191.txt Well, here we go with the stream of consciousn... 0
2 1997_687252.txt An open keyboard and buttons to push. The thin... 0
3 1997_568848.txt I can't believe it! It's really happening! M... 1
4 1997_688160.txt Well, here I go with the good old stream of co... 1
You can modify the compute_metrics
function to calculate prediction accuracy:
from sklearn.metrics import accuracy_score
def compute_metrics(eval_pred):
predictions, labels = eval_pred
predictions = np.argmax(predictions, axis=1)
accuracy = accuracy_score(y_true=labels, y_pred=predictions)
return {"accuracy": accuracy}