Rob Caamano
App.py New Model
94c9a58 unverified
raw
history blame
1.96 kB
import streamlit as st
import pandas as pd
from transformers import AutoTokenizer
from transformers import (
TFAutoModelForSequenceClassification as AutoModelForSequenceClassification,
)
st.title("Detecting Toxic Tweets")
demo = """Your words are like poison. They seep into my mind and make me feel worthless."""
text = st.text_area("Input Text", demo, height=250)
model_options = {
"DistilBERT Base Uncased (SST-2)": "distilbert-base-uncased-finetuned-sst-2-english",
"Fine-tuned Toxicity Model": "RobCaamano/toxicity",
"Fine-tuned Toxicity Model - Optimized": "RobCaamano/toxicity_optimized",
}
selected_model = st.selectbox("Select Model", options=list(model_options.keys()))
mod_name = model_options[selected_model]
tokenizer = AutoTokenizer.from_pretrained(mod_name)
model = AutoModelForSequenceClassification.from_pretrained(mod_name)
if selected_model in ["Fine-tuned Toxicity Model", "Fine-tuned Toxicity Model - Optimized"]:
toxicity_classes = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
model.config.id2label = {i: toxicity_classes[i] for i in range(model.config.num_labels)}
def get_toxicity_class(prediction):
max_index = prediction.argmax()
return model.config.id2label[max_index], prediction[max_index]
input = tokenizer(text, return_tensors="tf")
prediction = model(input)[0].numpy()[0]
if st.button("Submit", type="primary"):
label, probability = get_toxicity_class(prediction)
tweet_portion = text[:50] + "..." if len(text) > 50 else text
if selected_model in ["Fine-tuned Toxicity Model", "Model 3.0"]:
column_name = "Toxicity Class"
else:
column_name = "Prediction"
if probability < 0.1:
st.write("This text is not toxic.")
df = pd.DataFrame(
{
"Text (portion)": [tweet_portion],
column_name: [label],
"Probability": [probability],
}
)
st.table(df)