Spaces:

RobCaamano
/

Finetuning_Language_Models-Toxic_Tweets

Running

App Files Files Community

Finetuning_Language_Models-Toxic_Tweets / app.py

RobCaamano

Update app.py

4c23b4e over 1 year ago

raw

history blame contribute delete

2.63 kB

	import streamlit as st
	import pandas as pd
	from transformers import AutoTokenizer, pipeline
	from transformers import (
	TFAutoModelForSequenceClassification as AutoModelForSequenceClassification,
	)

	st.title("Classifier")

	demo_options = {
	"Non-toxic": "Had a wonderful weekend at the park. Enjoyed the beautiful weather!",
	"Obscene": "I don't give a fuck about your opinion",
	"Threat": "I will find and kill you",
	"Insult": "You are so stupid",
	"Identity Hate": "I hate gay people. Its just my opinion.",
	}

	selected_demo = st.selectbox("Demos", options=list(demo_options.keys()))
	text = st.text_area("Input text", demo_options[selected_demo], height=250)

	submit = False
	model_name = ""

	model_mapping = {
	"Toxicity - 1 Epoch": "RobCaamano/toxicity",
	"Toxicity - 8 Epochs": "RobCaamano/toxicity_update",
	"Toxicity - Weighted": "RobCaamano/toxicity_weighted",
	"DistilBERT Base Uncased (SST-2)": "distilbert-base-uncased-finetuned-sst-2-english",
	}

	with st.container():
	selected_model_display = st.selectbox(
	"Select Model",
	options=list(model_mapping.keys())
	)
	model_name = model_mapping[selected_model_display]
	submit = st.button("Submit", type="primary")

	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSequenceClassification.from_pretrained(model_name)
	clf = pipeline(
	"sentiment-analysis", model=model, tokenizer=tokenizer, return_all_scores=True
	)

	input = tokenizer(text, return_tensors="tf")

	if submit:
	results = dict(d.values() for d in clf(text)[0])

	if model_name in ["RobCaamano/toxicity", "RobCaamano/toxicity_update", "RobCaamano/toxicity_weighted"]:
	classes = {k: results[k] for k in results.keys() if not k == "toxic"}

	max_class = max(classes, key=classes.get)
	probability = classes[max_class]

	if results['toxic'] >= 0.5:
	result_df = pd.DataFrame({
	'Toxic': 'Yes',
	'Toxicity Class': [max_class],
	'Probability': [probability]
	}, index=[0])
	else:
	result_df = pd.DataFrame({
	'Toxic': 'No',
	'Toxicity Class': 'This text is not toxic',
	}, index=[0])

	elif model_name == "distilbert-base-uncased-finetuned-sst-2-english":
	result = max(results, key=results.get)
	probability = results[result]

	result_df = pd.DataFrame({
	'Result': [result],
	'Probability': [probability],
	}, index=[0])

	st.table(result_df)

	expander = st.expander("View Raw output")
	expander.write(results)