Spaces:

FpOliveira
/

portuguese-hate-speech-identifier

Runtime error

App Files Files Community

FpOliveira commited on Nov 27, 2023

Commit

095a9fc

•

1 Parent(s): f4b1b2c

app: app changes

Browse files

Files changed (2) hide show

app.py +63 -30
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -3,16 +3,21 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 from collections import Counter
 from scipy.special import softmax
 article_string = "Author: <a href=\"https://huggingface.co/FpOliveira\">Felipe Ramos de Oliveira</a>. Read more about our <a href=\"https://github.com/Silly-Machine/TuPi-Portuguese-Hate-Speech-Dataset\">The Portuguese hate speech dataset (TuPI) </a>."
 app_title = "Portuguese hate speech classifier - Classicador de discurso de ódio em português"
 app_description = """
 EN: This application employs multiple models to identify hate speech in Portuguese texts. You have the option to enter your own phrases by filling in the "Text" field or choosing one of the examples provided below.
 \nPT: Esta aplicativo emprega múltiplos modelos para identificar discurso de ódio em textos portugueses. Você tem a opção de inserir suas próprias frases preenchendo o campo "Texto" ou escolhendo um dos exemplos abaixo
 """
 app_examples = [
     ["bom dia flor do dia!!!"],
     ["o ódio é muito grande no coração da ex-deputada federal joise hasselmann contra a família bolsonaro"],
@@ -22,28 +27,27 @@ app_examples = [
     ["Pra ser minha inimiga no mínimo tem que ter um rostinho bonito e delicado, não se considere minha rival com essa sua cara de cavalo não, feia, cara de traveco, cabeçuda, queixo quadrado 🤣🤣"]
 ]
 output_textbox_component_description = """
 EN: This box will display hate speech results based on the average score of multiple models.
 PT: Esta caixa exibirá resultados da classicação de discurso de ódio com base na pontuação média de vários modelos.
 """
-output_json_component_description = { "breakdown": """
 This box presents a detailed breakdown of the evaluation for each model.
 """,
-"detalhamento": """
 (Esta caixa apresenta um detalhamento da avaliação para cada modelo.)
-""" }
-short_score_descriptions = {
-   0: "Not hate",
-   1: "Hate"
 }
-# Define hate speech categories
 hate_speech_categories = {
     0: "ageism",
     1: "aporophobia",
-    2: "body_shame",
     3: "capacitism",
     4: "lgbtphobia",
     5: "political",
@@ -54,39 +58,52 @@ hate_speech_categories = {
     10: "other",
     11: "not hate"
 }
 model_list = [
-    "FpOliveira/tupi-bert-large-portuguese-cased",
-    "FpOliveira/tupi-bert-base-portuguese-cased",
 ]
 user_friendly_name = {
-    "FpOliveira/tupi-bert-large-portuguese-cased": "BERTimbau large (TuPi)",
-    "FpOliveira/tupi-bert-base-portuguese-cased":  "BERTimbau base (TuPi)",
 }
-reverse_user_friendly_name = { v:k for k,v in user_friendly_name.items() }
 user_friendly_name_list = list(user_friendly_name.values())
 model_array = []
 for model_name in model_list:
     row = {}
     row["name"] = model_name
     row["tokenizer"] = AutoTokenizer.from_pretrained(model_name)
     row["model"] = AutoModelForSequenceClassification.from_pretrained(model_name)
     model_array.append(row)
 def most_frequent(array):
     occurence_count = Counter(array)
     return occurence_count.most_common(1)[0][0]
 def predict(s1, chosen_model):
     if not chosen_model:
         chosen_model = user_friendly_name_list[0]
     full_chosen_model_name = reverse_user_friendly_name[chosen_model]
     for row in model_array:
         name = row["name"]
         if name != full_chosen_model_name:
@@ -101,20 +118,35 @@ def predict(s1, chosen_model):
                 logits = softmax(logits).tolist()
                 break
-    # Get the indices of the top two probabilities
-    top_two_indices = sorted(range(len(logits)), key=lambda i: logits[i], reverse=True)[:2]
     # Get the categories and probabilities for the top two
-    top_two_categories = [hate_speech_categories[str(index)] for index in top_two_indices]
-    top_two_probabilities = [logits[index] for index in top_two_indices]
-    result = {
-        "predicted_categories": top_two_categories,
-        "probabilities": top_two_probabilities,
-    }
     return result
 inputs = [
     gr.Textbox(label="Text", value=app_examples[0][0]),
     gr.Dropdown(label="Model", choices=user_friendly_name_list, value=user_friendly_name_list[0])
@@ -122,11 +154,12 @@ inputs = [
 # Output components
 outputs = [
-    gr.Label(label="Top Predicted Categories"),
-    gr.Label(label="Top Probabilities"),
 ]
-gr.Interface(fn=predict, inputs=inputs, outputs=outputs, title=app_title,
              description=app_description,
              examples=app_examples,
-             article = article_string).launch()

 import torch
 from collections import Counter
 from scipy.special import softmax
+import plotly.express as px
+# Article string
 article_string = "Author: <a href=\"https://huggingface.co/FpOliveira\">Felipe Ramos de Oliveira</a>. Read more about our <a href=\"https://github.com/Silly-Machine/TuPi-Portuguese-Hate-Speech-Dataset\">The Portuguese hate speech dataset (TuPI) </a>."
+# App title
 app_title = "Portuguese hate speech classifier - Classicador de discurso de ódio em português"
+# App description
 app_description = """
 EN: This application employs multiple models to identify hate speech in Portuguese texts. You have the option to enter your own phrases by filling in the "Text" field or choosing one of the examples provided below.
 \nPT: Esta aplicativo emprega múltiplos modelos para identificar discurso de ódio em textos portugueses. Você tem a opção de inserir suas próprias frases preenchendo o campo "Texto" ou escolhendo um dos exemplos abaixo
 """
+# App examples
 app_examples = [
     ["bom dia flor do dia!!!"],
     ["o ódio é muito grande no coração da ex-deputada federal joise hasselmann contra a família bolsonaro"],
     ["Pra ser minha inimiga no mínimo tem que ter um rostinho bonito e delicado, não se considere minha rival com essa sua cara de cavalo não, feia, cara de traveco, cabeçuda, queixo quadrado 🤣🤣"]
 ]
+# Output textbox component description
 output_textbox_component_description = """
 EN: This box will display hate speech results based on the average score of multiple models.
 PT: Esta caixa exibirá resultados da classicação de discurso de ódio com base na pontuação média de vários modelos.
 """
+# Output JSON component description
+output_json_component_description = {
+    "breakdown": """
 This box presents a detailed breakdown of the evaluation for each model.
 """,
+    "detalhamento": """
 (Esta caixa apresenta um detalhamento da avaliação para cada modelo.)
+"""
 }
+# Hate speech categories
 hate_speech_categories = {
     0: "ageism",
     1: "aporophobia",
+    2: "body shame",
     3: "capacitism",
     4: "lgbtphobia",
     5: "political",
     10: "other",
     11: "not hate"
 }
+# Model list
 model_list = [
+    "FpOliveira/tupi-bert-large-portuguese-cased-multiclass-multilabel",
+    "FpOliveira/tupi-bert-base-portuguese-cased-multiclass-multilabel",
 ]
+# User-friendly names for models
 user_friendly_name = {
+    "FpOliveira/tupi-bert-large-portuguese-cased-multiclass-multilabel": "BERTimbau large (TuPi)",
+    "FpOliveira/tupi-bert-base-portuguese-cased-multiclass-multilabel": "BERTimbau base (TuPi)",
 }
+# Reverse mapping for user-friendly names
+reverse_user_friendly_name = {v: k for k, v in user_friendly_name.items()}
+# List of user-friendly model names
 user_friendly_name_list = list(user_friendly_name.values())
+# Model array
 model_array = []
+# Populate model array
 for model_name in model_list:
     row = {}
     row["name"] = model_name
     row["tokenizer"] = AutoTokenizer.from_pretrained(model_name)
     row["model"] = AutoModelForSequenceClassification.from_pretrained(model_name)
     model_array.append(row)
+# Function to find the most frequent element in an array
 def most_frequent(array):
     occurence_count = Counter(array)
     return occurence_count.most_common(1)[0][0]
+# Prediction function
 def predict(s1, chosen_model):
+    # Clear previous figure instance
+    fig = None
     if not chosen_model:
         chosen_model = user_friendly_name_list[0]
+    scores = {}
     full_chosen_model_name = reverse_user_friendly_name[chosen_model]
     for row in model_array:
         name = row["name"]
         if name != full_chosen_model_name:
                 logits = softmax(logits).tolist()
                 break
+    # Get the indices of all probabilities
+    all_indices = range(len(logits))
+    # Get the indices of the top two probabilities
+    top_indices = sorted(range(len(logits)), key=lambda i: logits[i], reverse=True)
+    # Filter out invalid indices
+    valid_indices = [index for index in top_indices if index < len(hate_speech_categories)]
+    # Get the categories and probabilities for all classes
+    all_categories = [hate_speech_categories[index] for index in valid_indices]
+    all_probabilities = [logits[index] for index in valid_indices]
+    # Create a bar plot using Plotly
+    fig = px.bar(x=all_categories, y=all_probabilities, labels={'x': 'Categories', 'y': 'Probabilities'},
+                 title="Classes Predicted Probabilities", text=all_probabilities)
+    fig.update_traces(texttemplate='%{text:.4f}', textposition='outside')
+    fig.show()
+    # # Save the figure as HTML
+    # html_figure = fig.to_html()
     # Get the categories and probabilities for the top two
+    top_category = [hate_speech_categories[index] for index in top_indices]
+    top_probability = [logits[index] for index in top_indices]
+    result = top_category[0], 1-top_probability[1]
     return result
+# Input components
 inputs = [
     gr.Textbox(label="Text", value=app_examples[0][0]),
     gr.Dropdown(label="Model", choices=user_friendly_name_list, value=user_friendly_name_list[0])
 # Output components
 outputs = [
+    gr.Label(label="Predominant category"),
+    gr.Label(label="Probability"),
 ]
+# Gradio interface
+gr.Interface(fn=predict, inputs=inputs, outputs=outputs, title=app_title,
              description=app_description,
              examples=app_examples,
+             article=article_string).launch()

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 torch
 gradio
 transformers
-scipy

 torch
 gradio
 transformers
+scipy
+plotly