# **1. Installation of Required Libraries**

In [None]:
%pip install transformers datasets torch scikit-learn -q
%pip install transformers[torch] -q
%pip install accelerate -U -q

# **2. Implementation Code**

In [None]:
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

In [None]:
dataset = load_dataset("imdb")
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

In [None]:
device

In [None]:
def tokenize_function(example):
 return tokenizer(example["text"], padding="max_length", truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

In [None]:
train_dataset = tokenized_datasets["train"]
test_dataset = tokenized_datasets["test"]

small_train_dataset = train_dataset.shuffle(seed=42).select(range(20000))
small_eval_dataset = test_dataset.shuffle(seed=42).select(range(5000))

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
model.to(device)

In [None]:
training_args = TrainingArguments(
 output_dir="./results",
 evaluation_strategy="epoch",
 save_strategy="epoch",
 learning_rate=2e-5,
 per_device_train_batch_size=16,
 per_device_eval_batch_size=16,
 num_train_epochs=1,
 weight_decay=0.01,
 logging_dir='./logs',
 logging_steps=10,
 load_best_model_at_end=True,
 save_total_limit=2,
 save_steps=500,
)

In [None]:
def compute_metrics(pred):
 labels = pred.label_ids
 preds = pred.predictions.argmax(-1)
 precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average="binary")
 acc = accuracy_score(labels, preds)
 return {"accuracy": acc, "precision": precision, "recall": recall, "f1": f1}

In [None]:
trainer = Trainer(
 model=model,
 args=training_args,
 train_dataset=small_train_dataset,
 eval_dataset=small_eval_dataset,
 compute_metrics=compute_metrics,
)

In [None]:
trainer.train()

In [None]:
eval_results = trainer.evaluate(eval_dataset=test_dataset)

In [None]:
print(f"Test Accuracy: {eval_results['eval_accuracy']:.4f}")
print(f"Test Precision: {eval_results['eval_precision']:.4f}")
print(f"Test Recall: {eval_results['eval_recall']:.4f}")
print(f"Test F1-score: {eval_results['eval_f1']:.4f}")

In [None]:
model.save_pretrained("./fine-tuned-model")
tokenizer.save_pretrained("./fine-tuned-model")

# **3. Using the Model for Prediction**


In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

model_name = "./fine-tuned-model"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
def predict_sentiment(review_text):
 inputs = tokenizer(review_text, padding=True, truncation=True, return_tensors="pt").to(device)

 with torch.no_grad():
 outputs = model(**inputs)

 logits = outputs.logits
 predictions = torch.softmax(logits, dim=-1)

 predicted_label = torch.argmax(predictions, dim=-1).item()
 sentiment = "Positive" if predicted_label == 1 else "Negative"

 return sentiment, predictions[0].cpu().numpy()

In [None]:
review_text = "This movie was absolutely wonderful, with brilliant acting and a captivating story!"
sentiment, probs = predict_sentiment(review_text)

print(f"Review: {review_text}")
print(f"Predicted Sentiment: {sentiment}")
print(f"Probabilities: {probs}")

In [None]:
test_reviews = [
 "I loved this movie! The plot was amazing and the characters were so well developed.",
 "This was the worst movie I have ever seen. It was a complete waste of time.",
 "An average film with decent acting but nothing special.",
 "Absolutely fantastic! A must-watch for everyone.",
 "The story was boring and the acting was mediocre at best."
]

for review in test_reviews:
 sentiment, probs = predict_sentiment(review)
 print("----"*20)
 print(f"Review: {review}")
 print(f"Predicted Sentiment: {sentiment}")
 print(f"Probabilities: {probs}\n")
 print("----"*20)
