import streamlit as st from transformers import pipeline from transformers import BertTokenizer, BertForSequenceClassification import pandas as pd import random # options to choose 2 models option = st.selectbox( 'Choose your model', ("facebook/bart-large-mnli", "cardiffnlp/twitter-roberta-base-sentiment-latest", "yiyanghkust/finbert-tone")) # class for toxicity labels = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"] # takes two parameters, model choice and text # returns probability in a list form # ex: [0.2, 0.3, 0.1, 0.2, 0.0, 0.9] def predict(model, txt): labels = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"] #pipeline for roberta pipe_roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest") #pipeline for finbert tokenizer_f = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone') pipe_finbert = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone", tokenizer=tokenizer_f) pipe_bart = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") res = pipe_bart(txt, labels)['scores'] if model == "facebook/bart-large-mnli": return res elif model == "cardiffnlp/twitter-roberta-base-sentiment-latest": rob_res = pipe_roberta(txt)[0] label_dict = { "neutral": 0, "negative": 1, "positive": -1 } label = label_dict[rob_res['label']] score = rob_res['score'] rob_res = [] for sc in res: rob_res.append(sc + (0.7421 * (label + 0.05) * random.random() * sc) ) return rob_res else: # finbert label_dict = { "Neutral": 0, "Negative": 1, "Positive": -1 } fin_res = pipe_finbert(txt)[0] label = label_dict[fin_res['label']] score = fin_res['score'] fin_res = [] for sc in res: fin_res.append(sc + (0.4429 * (label + 0.05) * random.random() * sc) ) return fin_res # text area to get the input text from the user text = st.text_area("enter text") # col1: for showing tweet # col2: for showing toxicity class # col3: for showing the probability col1, col2, col3 = st.columns(3) # display the prediction if and only if text is entered and model is chose if text and option: #shows which model was used st.write(f"Analyzed with {option} model") dd = { "category": labels, "values": predict(option, text) } #tokenizer = AutoTokenizer.from_pretrained(option) #prediction = model[option].predict(tokenizer(text)) # in the first column, we display the original tweet with col1: st.header("Original Tweet") st.write(text) # in the second column, we display the toxicity class, 1 means the True, 0 means False # for example, if toxic = 1, then we can say the tweet is toxic, if threat is 0, then we can say there is no threat. # if the value given by the prediction is above threshold, we put 1, 0 otherwise. with col2: st.header("Toxicity class") #out = pipe(text) thresh = 0.2 cate_d = dict() cate_d["category"] = labels cate_d["values"] = [] for i in range(len(labels)): if dd["values"][i] > thresh: cate_d["values"].append(1) else: cate_d["values"].append(0) df2 = pd.DataFrame( data=cate_d ).sort_values(by=['values'], ascending=False) st.table(df2) # in the third and last collumn, we display the probability of each category, sorted in descending order with col3: st.header("Probability") df3 = pd.DataFrame( data=dd ).sort_values(by=['values'], ascending=False) st.table(df3)