python pytorch huggingface-transformers sentiment-analysis

Implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW

How to fix this deprecated AdamW model?

I tried to use the BERT model to perform a sentiment analysis on the hotel reviews, when I run this piece of code, it prompts the following warning. I am still studying the transformers and I don't want the code to be deprecated very soon. I searched on the web and I can't find the solution yet.

I found this piece of information, but I don't know how to apply it to my code.

To switch optimizer, put optim="adamw_torch" in your TrainingArguments (the default is "adamw_hf")

could anyone kindly help with this?

from transformers import BertTokenizer, BertForSequenceClassification
import torch_optimizer as optim
from torch.utils.data import DataLoader
from transformers import AdamW
import pandas as pd
import torch
import random
import numpy as np
import torch.nn as nn
from torch.nn import CrossEntropyLoss
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from tqdm.notebook import tqdm
import json
from collections import OrderedDict
import logging
from torch.utils.tensorboard import SummaryWriter

skip some code...

param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'gamma', 'beta']
optimizer_grouped_parameters = [{
    'params':
    [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
    'weight_decay_rate':
    0.01
}, {
    'params':
    [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
    'weight_decay_rate':
    0.0
}]

#
optimizer = AdamW(optimizer_grouped_parameters, lr=1e-5)           ##deprecated 
#optimizer = optim.AdamW(optimizer_grouped_parameters, lr=1e-5)      ##torch.optim.AdamW  (not working)

step = 0
best_acc = 0
epoch = 10
writer = SummaryWriter(log_dir='model_best')
for epoch in tqdm(range(epoch)):
    for idx, batch in tqdm(enumerate(train_loader),
                           total=len(train_texts) // batch_size,
                           leave=False):
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs[0]  # Calculate Loss
        logging.info(
            f'Epoch-{epoch}, Step-{step}, Loss: {loss.cpu().detach().numpy()}')
        step += 1
        loss.backward()
        optimizer.step()
        writer.add_scalar('train_loss', loss.item(), step)
    logging.info(f'Epoch {epoch}, present best acc: {best_acc}, start evaluating.')
    accuracy, precision, recall, f1 = eval_model(model, eval_loader)  # Evaluate Model
    writer.add_scalar('dev_accuracy', accuracy, step)
    writer.add_scalar('dev_precision', precision, step)
    writer.add_scalar('dev_recall', recall, step)
    writer.add_scalar('dev_f1', f1, step)
    if accuracy > best_acc:
        model.save_pretrained('model_best')  # Save Model
        tokenizer.save_pretrained('model_best')
        best_acc = accuracy

Solution

If you comment out both these lines:

import torch_optimizer as optim
from transformers import AdamW

and then use:

optimizer = torch.optim.AdamW(optimizer_grouped_parameters, lr=1e-5)

does it work? If not, what is the error?

To switch optimizer, put optim="adamw_torch" in your TrainingArguments (the default is "adamw_hf")

This is referring to Huggingface Trainer, which is configured with a TrainingArguments instance. But as you are using your own training loop, this is not applicable to you.