Spaces:
Runtime error
Runtime error
import streamlit as st # Web App | |
from main import classify | |
import pandas as pd | |
# demo_phrases = """ Here are some examples: | |
# this is a phrase | |
# is it neutral | |
# nothing else to say | |
# man I'm so damn angry | |
# sarcasm lol | |
# I love this product | |
# """ | |
#demo_phrases = ( | |
# pd.read_csv("./train.csv")["comment_text"].head(6).astype(str).str.cat(sep="\n") | |
#) | |
df = pd.read_csv("./train.csv") | |
toxic = df[df['toxic'] == 1]['comment_text'].head(3) | |
normal = df[df['toxic'] == 0]['comment_text'].head(3) | |
demo_phrases = pd.concat([toxic, normal]).astype(str).str.cat(sep="\n") | |
# title | |
st.title("Sentiment Analysis") | |
# subtitle | |
st.markdown("## A selection of popular sentiment analysis models - hosted on 🤗 Spaces") | |
model_name = st.selectbox( | |
"Select a pre-trained model", | |
[ | |
"finiteautomata/bertweet-base-sentiment-analysis", | |
"ahmedrachid/FinancialBERT-Sentiment-Analysis", | |
"finiteautomata/beto-sentiment-analysis", | |
"NativeVex/custom-fine-tuned", | |
], | |
) | |
input_sentences = st.text_area("Sentences", value=demo_phrases, height=200) | |
data = input_sentences.split("\n") | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
model_path = "bin/model4" | |
model = AutoModelForSequenceClassification.from_pretrained(model_path) | |
tokenizer = AutoTokenizer.from_pretrained(model_path) | |
from typing import List, Dict | |
import torch | |
import numpy as np | |
import pandas as pd | |
def infer(text: str) -> List[Dict[str, float]]: | |
"""Use custom model to infer sentiment | |
Args: | |
text (str): text to infer | |
Returns: | |
List[Dict[str, float]]: list of dictionaries with {sentiment: | |
probability} score pairs | |
""" | |
encoding = tokenizer(text, return_tensors="pt") | |
encoding = {k: v.to(model.device) for k, v in encoding.items()} | |
outputs = model(**encoding) | |
logits = outputs.logits | |
sigmoid = torch.nn.Sigmoid() | |
probs = sigmoid(logits.squeeze().cpu()) | |
predictions = np.zeros(probs.shape) | |
predictions[np.where(probs >= 0.5)] = 1 | |
predictions = pd.Series(predictions == 1) | |
l = pd.Series(zip(predictions.tolist(), probs.tolist())).apply(str) | |
l.index = [ | |
"toxic", | |
"severe_toxic", | |
"obscene", | |
"threat", | |
"insult", | |
"identity_hate", | |
] | |
#probs.index = predictions.index | |
return l.to_dict() | |
def wrapper(*args, **kwargs): | |
"""Wrapper function to use custom model | |
Behaves as a switchboard to redirect if custom model is selected | |
""" | |
if args[0] != "NativeVex/custom-fine-tuned": | |
return classify(*args, **kwargs) | |
else: | |
return infer(text=args[1]) | |
if st.button("Classify"): | |
if not model_name.strip() == "NativeVex/custom-fine-tuned": | |
st.write("Please allow a few minutes for the model to run/download") | |
for i in range(len(data)): | |
# j = wrapper(model_name.strip(), data[i])[0] | |
j = classify(model_name.strip(), data[i])[0] | |
sentiment = j["label"] | |
confidence = j["score"] | |
st.write( | |
f"{i}. {data[i]} :: Classification - {sentiment} with confidence {confidence}" | |
) | |
else: | |
st.write( | |
"To render the dataframe, all inputs must be sequentially" | |
" processed before displaying. Please allow a few minutes for longer" | |
" inputs." | |
) | |
internal_list = [infer(text=i) for i in data] | |
j = pd.DataFrame(internal_list) | |
st.dataframe(data=j) | |
st.markdown( | |
"Link to the app - [image-to-text-app on 🤗 Spaces](https://huggingface.co/spaces/Amrrs/image-to-text-app)" | |
) | |