SP / app.py
pouchedfox's picture
init
037d195
import numpy as np
import csv
from typing import Optional
from urllib.request import urlopen
import gradio as gr
class SentimentTransform():
def __init__(
self,
model_name: str = "cardiffnlp/twitter-roberta-base-sentiment",
highlight: bool = False,
positive_sentiment_name: str = "positive",
max_number_of_shap_documents: Optional[int] = None,
min_abs_score: float = 0.1,
sensitivity: float = 0,
**kwargs,
):
"""
Sentiment Ops.
Parameters
-------------
model_name: str
The name of the model
sensitivity: float
How confident it is about being `neutral`. If you are dealing with news sources,
you probably want less sensitivity
"""
self.model_name = model_name
self.highlight = highlight
self.positive_sentiment_name = positive_sentiment_name
self.max_number_of_shap_documents = max_number_of_shap_documents
self.min_abs_score = min_abs_score
self.sensitivity = sensitivity
for k, v in kwargs.items():
setattr(self, k, v)
def preprocess(self, text: str):
new_text = []
for t in text.split(" "):
t = "@user" if t.startswith("@") and len(t) > 1 else t
t = "http" if t.startswith("http") else t
new_text.append(t)
return " ".join(new_text)
@property
def classifier(self):
if not hasattr(self, "_classifier"):
import transformers
self._classifier = transformers.pipeline(
return_all_scores=True,
model=self.model_name,
)
return self._classifier
def _get_label_mapping(self, task: str):
# Note: this is specific to the current model
labels = []
mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/{task}/mapping.txt"
with urlopen(mapping_link) as f:
html = f.read().decode("utf-8").split("\n")
csvreader = csv.reader(html, delimiter="\t")
labels = [row[1] for row in csvreader if len(row) > 1]
return labels
@property
def label_mapping(self):
return {"LABEL_0": "negative", "LABEL_1": "neutral", "LABEL_2": "positive"}
def analyze_sentiment(
self,
text,
highlight: bool = False,
positive_sentiment_name: str = "positive",
max_number_of_shap_documents: Optional[int] = None,
min_abs_score: float = 0.1,
):
if text is None:
return None
labels = self.classifier([str(text)], truncation=True, max_length=512)
ind_max = np.argmax([l["score"] for l in labels[0]])
sentiment = labels[0][ind_max]["label"]
max_score = labels[0][ind_max]["score"]
sentiment = self.label_mapping.get(sentiment, sentiment)
if sentiment.lower() == "neutral" and max_score > self.sensitivity:
overall_sentiment = 1e-5
elif sentiment.lower() == "neutral":
# get the next highest score
new_labels = labels[0][:ind_max] + labels[0][(ind_max + 1):]
new_ind_max = np.argmax([l["score"] for l in new_labels])
new_max_score = new_labels[new_ind_max]["score"]
new_sentiment = new_labels[new_ind_max]["label"]
new_sentiment = self.label_mapping.get(new_sentiment, new_sentiment)
overall_sentiment = self._calculate_overall_sentiment(
new_max_score, new_sentiment
)
else:
overall_sentiment = self._calculate_overall_sentiment(max_score, sentiment)
# Adjust to avoid bug
if overall_sentiment == 0:
overall_sentiment = 1e-5
if not highlight:
return {
"sentiment": sentiment,
"overall_sentiment_score": overall_sentiment,
}
shap_documents = self.get_shap_values(
text,
sentiment_ind=ind_max,
max_number_of_shap_documents=max_number_of_shap_documents,
min_abs_score=min_abs_score,
)
return {
"sentiment": sentiment,
"score": max_score,
"overall_sentiment": overall_sentiment,
"highlight_chunk_": shap_documents,
}
def _calculate_overall_sentiment(self, score: float, sentiment: str):
if sentiment.lower().strip() == self.positive_sentiment_name:
return score
else:
return -score
# def explainer(self):
# if hasattr(self, "_explainer"):
# return self._explainer
# else:
# try:
# import shap
# except ModuleNotFoundError:
# raise MissingPackageError("shap")
# self._explainer = shap.Explainer(self.classifier)
# return self._explainer
def get_shap_values(
self,
text: str,
sentiment_ind: int = 2,
max_number_of_shap_documents: Optional[int] = None,
min_abs_score: float = 0.1,
):
"""Get SHAP values"""
shap_values = self.explainer([text])
cohorts = {"": shap_values}
cohort_labels = list(cohorts.keys())
cohort_exps = list(cohorts.values())
features = cohort_exps[0].data
feature_names = cohort_exps[0].feature_names
values = np.array([cohort_exps[i].values for i in range(len(cohort_exps))])
shap_docs = [
{"text": v, "score": f}
for f, v in zip(
[x[sentiment_ind] for x in values[0][0].tolist()], feature_names[0]
)
]
if max_number_of_shap_documents is not None:
sorted_scores = sorted(shap_docs, key=lambda x: x["score"], reverse=True)
else:
sorted_scores = sorted(shap_docs, key=lambda x: x["score"], reverse=True)[
:max_number_of_shap_documents
]
return [d for d in sorted_scores if abs(d["score"]) > min_abs_score]
def transform(self, text):
# # For each document, update the field
# sentiment_docs = [{"_id": d["_id"]} for d in documents]
# for i, t in enumerate(self.text_fields):
# if self.output_fields is not None:
# output_field = self.output_fields[i]
# else:
# output_field = self._get_output_field(t)
sentiment = self.analyze_sentiment(
text,
highlight=self.highlight,
max_number_of_shap_documents=self.max_number_of_shap_documents,
min_abs_score=self.min_abs_score, )
return sentiment
def sentiment_classifier(text, model_type, sensitivity):
if model_type == 'Social Media Model':
model_name = "cardiffnlp/twitter-roberta-base-sentiment"
elif model_type == 'Survey Model':
model_name = "j-hartmann/sentiment-roberta-large-english-3-classes"
else:
model_name = "j-hartmann/sentiment-roberta-large-english-3-classes"
model = SentimentTransform(model_name=model_name, sensitivity=sensitivity)
res_dict = model.transform(text)
return res_dict['sentiment'], res_dict['overall_sentiment_score']
demo = gr.Interface(
fn=sentiment_classifier,
inputs=[gr.Textbox(placeholder="Put the text here and click 'submit' to predict its sentiment", label="Input Text"), gr.Dropdown(["Social Media Model", "Survey Model"], value="Survey Model", label="Select the Model that you want to use."), gr.Slider(0, 1, step = 0.01, label="Sensitivity (How confident it is about being `neutral`. If you are dealing with news sources, you probably want less sensitivity.)")],
outputs=[gr.Textbox(label='Sentiment'), gr.Textbox(label='Sentiment Score')],
)
demo.launch(debug=True)