I have the following code that I am using to identify the most influential words used to correctly predict the text in the test dataset
import pandas as pd
import torch
from torch.utils.data import DataLoader
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from sklearn.metrics import accuracy_score
from captum.attr import IntegratedGradients
# Loading data
train_df = pd.read_csv('train_dataset.csv')
test_df = pd.read_csv('test_dataset.csv')
# Tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
def preprocess_data(df, tokenizer, max_len=128):
inputs = tokenizer(list(df['text']), padding=True, truncation=True, max_length=max_len, return_tensors="pt")
labels = torch.tensor(df['label'].values)
return inputs, labels
train_inputs, train_labels = preprocess_data(train_df, tokenizer)
test_inputs, test_labels = preprocess_data(test_df, tokenizer)
# DataLoader
train_dataset = torch.utils.data.TensorDataset(train_inputs['input_ids'], train_inputs['attention_mask'], train_labels)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_dataset = torch.utils.data.TensorDataset(test_inputs['input_ids'], test_inputs['attention_mask'], test_labels)
test_loader = DataLoader(test_dataset, batch_size=16)
# Model setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2).to(device)
# Optimizer
optimizer = AdamW(model.parameters(), lr=5e-5)
# Training Loop
model.train()
for epoch in range(3): # Train for 3 epochs
for batch in train_loader:
input_ids, attention_mask, labels = [x.to(device) for x in batch]
optimizer.zero_grad()
outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
loss = outputs.loss
loss.backward()
optimizer.step()
print(f"Epoch {epoch+1} loss: {loss.item()}")
# Evaluation
model.eval()
correct_predictions = []
with torch.no_grad():
for batch in test_loader:
input_ids, attention_mask, labels = [x.to(device) for x in batch]
outputs = model(input_ids, attention_mask=attention_mask)
preds = torch.argmax(outputs.logits, dim=1)
correct_predictions.extend(
(preds == labels).cpu().numpy().tolist()
)
accuracy = accuracy_score(test_labels.numpy(), correct_predictions)
print(f"Test Accuracy: {accuracy:.2f}")
# Integrated Gradients
ig = IntegratedGradients(model)
def get_influential_words(input_text, model, tokenizer, ig, device):
model.eval()
# Tokenizing the input text
inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True, max_length=128)
input_ids = inputs['input_ids'].to(device, dtype=torch.long) # Explicitly convert to LongTensor
attention_mask = inputs['attention_mask'].to(device, dtype=torch.long) # Explicitly convert to LongTensor
print("Input IDs shape:", input_ids.shape, "dtype:", input_ids.dtype)
print("Attention mask shape:", attention_mask.shape, "dtype:", attention_mask.dtype)
# forward function for IG
def forward_func(input_ids):
outputs = model(input_ids, attention_mask=attention_mask)
return outputs.logits
# Applying Integrated Gradients
attributions, delta = ig.attribute(input_ids, target=1, return_convergence_delta=True)
tokens = tokenizer.convert_ids_to_tokens(input_ids[0].tolist())
token_importances = attributions.sum(dim=2).squeeze(0).detach().cpu().numpy()
return list(zip(tokens, token_importances))
# Analysing influential words for correctly predicted texts
for idx, correct in enumerate(correct_predictions):
if correct:
influential_words = get_influential_words(test_df['text'].iloc[idx], model, tokenizer, ig, device)
print(f"Influential words for text: {test_df['text'].iloc[idx]}")
print(influential_words)
But I am getting the following error in running the above.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
/usr/local/lib/python3.10/dist-packages/transformers/optimization.py:591: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning
warnings.warn(
Epoch 1 loss: 0.4719192385673523
Epoch 2 loss: 0.39585667848587036
Epoch 3 loss: 0.14659778773784637
Test Accuracy: 0.70
Input IDs shape: torch.Size([1, 8]) dtype: torch.int64
Attention mask shape: torch.Size([1, 8]) dtype: torch.int64
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-9-f047b509c98d> in <cell line: 90>()
90 for idx, correct in enumerate(correct_predictions):
91 if correct:
---> 92 influential_words = get_influential_words(test_df['text'].iloc[idx], model, tokenizer, ig, device)
93 print(f"Influential words for text: {test_df['text'].iloc[idx]}")
94 print(influential_words)
18 frames
/usr/local/lib/python3.10/dist-packages/torch/nn/functional.py in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
2549 # remove once script supports set_grad_enabled
2550 _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
-> 2551 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
2552
2553
RuntimeError: Expected tensor for argument #1 'indices' to have one of the following scalar types: Long, Int; but got torch.cuda.FloatTensor instead (while checking arguments for embedding)
You need to slightly change the gradients calculation class. Also, you didn't include forward_func into the gradients class constructor, so the attribute method was not able to launch the stuff properly.
I think that using LayerIntegratedGradients is better for debugging BERT - in line with this tutorial https://captum.ai/tutorials/Bert_SQUAD_Interpret
Below please find snippet that works:
from captum.attr import LayerIntegratedGradients
def custom_forward(inputs):
preds = predict(inputs)
return torch.softmax(preds, dim = 1)[0][1].unsqueeze(-1)
lig = LayerIntegratedGradients(custom_forward, model.bert.embeddings)
def get_influential_words(input_text, model, tokenizer, ig, device):
model.eval()
# Tokenizing the input text
inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True, max_length=128)
input_ids = inputs['input_ids'].to(device)
attention_mask = inputs['attention_mask'].to(device)
# print("Input IDs shape:", input_ids.shape, "dtype:", input_ids.dtype)
# print("Attention mask shape:", attention_mask.shape, "dtype:", attention_mask.dtype)
attributions, delta = lig.attribute(input_ids, return_convergence_delta=True)
tokens = tokenizer.convert_ids_to_tokens(input_ids[0].tolist())
token_importances = attributions.sum(dim=2).squeeze(0).detach().cpu().numpy()
return list(zip(tokens, token_importances))
results = []
for idx, correct in enumerate(correct_predictions):
if correct:
influential_words = get_influential_words(test_df['text'].iloc[idx], model, tokenizer, ig, device)
print(f"Influential words for text: {test_df['text'].iloc[idx]}")
print(influential_words)