Achar's picture
Duplicate from bright1/sentiment-analysis-app-gradio
2054bfe
raw
history blame contribute delete
No virus
2.88 kB
import gradio as gr
import numpy as np
import pandas as pd
from transformers import AutoTokenizer, AutoConfig,AutoModelForSequenceClassification
from scipy.special import softmax
import os
# Requirements
def load_distilbert():
model_path = "bright1/fine-tuned-distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_path)
# config = AutoConfig.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)
return model, tokenizer
def load_roberta():
model_path = "bright1/fine-tuned-twitter-Roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(model_path)
# config = AutoConfig.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)
return model, tokenizer
# def check_csv(csv_file, data):
# if os.path.isfile(csv_file):
# data.to_csv(csv_file, mode='a', header=False, index=False, encoding='utf-8')
# else:
# history = data.copy()
# history.to_csv(csv_file, index=False)
#Preprocess text
def preprocess(text):
new_text = []
for t in text.split(" "):
t = "@user" if t.startswith("@") and len(t) > 1 else t
t = "http" if t.startswith("http") else t
print(t)
new_text.append(t)
print(new_text)
return " ".join(new_text)
#Process the input and return prediction
def sentiment_analysis(model_type, text):
if model_type== 'distilbert':
model, tokenizer = load_distilbert()
else:
model, tokenizer = load_roberta()
save_text = {'tweet': text}
text = preprocess(text)
encoded_input = tokenizer(text, return_tensors = "pt") # for PyTorch-based models
output = model(**encoded_input)
scores_ = output[0][0].detach().numpy()
scores_ = softmax(scores_)
# Format output dict of scores
labels = ["Negative", "Neutral", "Positive"]
scores = {l:float(s) for (l,s) in zip(labels, scores_) }
# save_text.update(scores)
# user_data = {key: [value] for key,value in save_text.items()}
# data = pd.DataFrame(user_data,)
# check_csv('history.csv', data)
# hist_df = pd.read_csv('history.csv')
return scores
# , hist_df.head()
model_type = gr.Radio(choices=['distilbert', 'roberta'], label='Select model type', value='roberta' )
#Gradio app interface
#Gradio app interface
demo = gr.Interface(fn = sentiment_analysis,
inputs = [model_type, gr.TextArea("Write your text or tweet here", label="Analyze your COVID-19 tweets" )],
outputs = ["label"],
title = "COVID-19 Vaccine Tweet Analyzer App",
description = "COVID-19 Tweets Analyzer",
interpretation = "default",
examples = [["roberta", "Being vaccinated is actually awesome :)"]]
).launch()