FpOliveira commited on
Commit
095a9fc
1 Parent(s): f4b1b2c

app: app changes

Browse files
Files changed (2) hide show
  1. app.py +63 -30
  2. requirements.txt +2 -1
app.py CHANGED
@@ -3,16 +3,21 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import torch
4
  from collections import Counter
5
  from scipy.special import softmax
 
6
 
 
7
  article_string = "Author: <a href=\"https://huggingface.co/FpOliveira\">Felipe Ramos de Oliveira</a>. Read more about our <a href=\"https://github.com/Silly-Machine/TuPi-Portuguese-Hate-Speech-Dataset\">The Portuguese hate speech dataset (TuPI) </a>."
8
 
 
9
  app_title = "Portuguese hate speech classifier - Classicador de discurso de ódio em português"
10
 
 
11
  app_description = """
12
  EN: This application employs multiple models to identify hate speech in Portuguese texts. You have the option to enter your own phrases by filling in the "Text" field or choosing one of the examples provided below.
13
  \nPT: Esta aplicativo emprega múltiplos modelos para identificar discurso de ódio em textos portugueses. Você tem a opção de inserir suas próprias frases preenchendo o campo "Texto" ou escolhendo um dos exemplos abaixo
14
  """
15
 
 
16
  app_examples = [
17
  ["bom dia flor do dia!!!"],
18
  ["o ódio é muito grande no coração da ex-deputada federal joise hasselmann contra a família bolsonaro"],
@@ -22,28 +27,27 @@ app_examples = [
22
  ["Pra ser minha inimiga no mínimo tem que ter um rostinho bonito e delicado, não se considere minha rival com essa sua cara de cavalo não, feia, cara de traveco, cabeçuda, queixo quadrado 🤣🤣"]
23
  ]
24
 
 
25
  output_textbox_component_description = """
26
  EN: This box will display hate speech results based on the average score of multiple models.
27
  PT: Esta caixa exibirá resultados da classicação de discurso de ódio com base na pontuação média de vários modelos.
28
  """
29
 
30
- output_json_component_description = { "breakdown": """
 
 
31
  This box presents a detailed breakdown of the evaluation for each model.
32
  """,
33
- "detalhamento": """
34
  (Esta caixa apresenta um detalhamento da avaliação para cada modelo.)
35
- """ }
36
-
37
- short_score_descriptions = {
38
- 0: "Not hate",
39
- 1: "Hate"
40
  }
41
 
42
- # Define hate speech categories
43
  hate_speech_categories = {
44
  0: "ageism",
45
  1: "aporophobia",
46
- 2: "body_shame",
47
  3: "capacitism",
48
  4: "lgbtphobia",
49
  5: "political",
@@ -54,39 +58,52 @@ hate_speech_categories = {
54
  10: "other",
55
  11: "not hate"
56
  }
 
 
57
  model_list = [
58
- "FpOliveira/tupi-bert-large-portuguese-cased",
59
- "FpOliveira/tupi-bert-base-portuguese-cased",
60
  ]
61
 
 
62
  user_friendly_name = {
63
- "FpOliveira/tupi-bert-large-portuguese-cased": "BERTimbau large (TuPi)",
64
- "FpOliveira/tupi-bert-base-portuguese-cased": "BERTimbau base (TuPi)",
65
  }
66
 
67
- reverse_user_friendly_name = { v:k for k,v in user_friendly_name.items() }
 
68
 
 
69
  user_friendly_name_list = list(user_friendly_name.values())
70
 
 
71
  model_array = []
72
 
 
73
  for model_name in model_list:
74
  row = {}
75
  row["name"] = model_name
76
  row["tokenizer"] = AutoTokenizer.from_pretrained(model_name)
77
  row["model"] = AutoModelForSequenceClassification.from_pretrained(model_name)
78
  model_array.append(row)
79
-
 
 
80
  def most_frequent(array):
81
  occurence_count = Counter(array)
82
  return occurence_count.most_common(1)[0][0]
83
 
84
 
 
85
  def predict(s1, chosen_model):
 
 
 
86
  if not chosen_model:
87
  chosen_model = user_friendly_name_list[0]
 
88
  full_chosen_model_name = reverse_user_friendly_name[chosen_model]
89
-
90
  for row in model_array:
91
  name = row["name"]
92
  if name != full_chosen_model_name:
@@ -101,20 +118,35 @@ def predict(s1, chosen_model):
101
  logits = softmax(logits).tolist()
102
  break
103
 
104
- # Get the indices of the top two probabilities
105
- top_two_indices = sorted(range(len(logits)), key=lambda i: logits[i], reverse=True)[:2]
 
 
 
 
 
 
106
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  # Get the categories and probabilities for the top two
108
- top_two_categories = [hate_speech_categories[str(index)] for index in top_two_indices]
109
- top_two_probabilities = [logits[index] for index in top_two_indices]
110
-
111
- result = {
112
- "predicted_categories": top_two_categories,
113
- "probabilities": top_two_probabilities,
114
- }
115
-
116
  return result
117
 
 
118
  inputs = [
119
  gr.Textbox(label="Text", value=app_examples[0][0]),
120
  gr.Dropdown(label="Model", choices=user_friendly_name_list, value=user_friendly_name_list[0])
@@ -122,11 +154,12 @@ inputs = [
122
 
123
  # Output components
124
  outputs = [
125
- gr.Label(label="Top Predicted Categories"),
126
- gr.Label(label="Top Probabilities"),
127
  ]
128
 
129
- gr.Interface(fn=predict, inputs=inputs, outputs=outputs, title=app_title,
 
130
  description=app_description,
131
  examples=app_examples,
132
- article = article_string).launch()
 
3
  import torch
4
  from collections import Counter
5
  from scipy.special import softmax
6
+ import plotly.express as px
7
 
8
+ # Article string
9
  article_string = "Author: <a href=\"https://huggingface.co/FpOliveira\">Felipe Ramos de Oliveira</a>. Read more about our <a href=\"https://github.com/Silly-Machine/TuPi-Portuguese-Hate-Speech-Dataset\">The Portuguese hate speech dataset (TuPI) </a>."
10
 
11
+ # App title
12
  app_title = "Portuguese hate speech classifier - Classicador de discurso de ódio em português"
13
 
14
+ # App description
15
  app_description = """
16
  EN: This application employs multiple models to identify hate speech in Portuguese texts. You have the option to enter your own phrases by filling in the "Text" field or choosing one of the examples provided below.
17
  \nPT: Esta aplicativo emprega múltiplos modelos para identificar discurso de ódio em textos portugueses. Você tem a opção de inserir suas próprias frases preenchendo o campo "Texto" ou escolhendo um dos exemplos abaixo
18
  """
19
 
20
+ # App examples
21
  app_examples = [
22
  ["bom dia flor do dia!!!"],
23
  ["o ódio é muito grande no coração da ex-deputada federal joise hasselmann contra a família bolsonaro"],
 
27
  ["Pra ser minha inimiga no mínimo tem que ter um rostinho bonito e delicado, não se considere minha rival com essa sua cara de cavalo não, feia, cara de traveco, cabeçuda, queixo quadrado 🤣🤣"]
28
  ]
29
 
30
+ # Output textbox component description
31
  output_textbox_component_description = """
32
  EN: This box will display hate speech results based on the average score of multiple models.
33
  PT: Esta caixa exibirá resultados da classicação de discurso de ódio com base na pontuação média de vários modelos.
34
  """
35
 
36
+ # Output JSON component description
37
+ output_json_component_description = {
38
+ "breakdown": """
39
  This box presents a detailed breakdown of the evaluation for each model.
40
  """,
41
+ "detalhamento": """
42
  (Esta caixa apresenta um detalhamento da avaliação para cada modelo.)
43
+ """
 
 
 
 
44
  }
45
 
46
+ # Hate speech categories
47
  hate_speech_categories = {
48
  0: "ageism",
49
  1: "aporophobia",
50
+ 2: "body shame",
51
  3: "capacitism",
52
  4: "lgbtphobia",
53
  5: "political",
 
58
  10: "other",
59
  11: "not hate"
60
  }
61
+
62
+ # Model list
63
  model_list = [
64
+ "FpOliveira/tupi-bert-large-portuguese-cased-multiclass-multilabel",
65
+ "FpOliveira/tupi-bert-base-portuguese-cased-multiclass-multilabel",
66
  ]
67
 
68
+ # User-friendly names for models
69
  user_friendly_name = {
70
+ "FpOliveira/tupi-bert-large-portuguese-cased-multiclass-multilabel": "BERTimbau large (TuPi)",
71
+ "FpOliveira/tupi-bert-base-portuguese-cased-multiclass-multilabel": "BERTimbau base (TuPi)",
72
  }
73
 
74
+ # Reverse mapping for user-friendly names
75
+ reverse_user_friendly_name = {v: k for k, v in user_friendly_name.items()}
76
 
77
+ # List of user-friendly model names
78
  user_friendly_name_list = list(user_friendly_name.values())
79
 
80
+ # Model array
81
  model_array = []
82
 
83
+ # Populate model array
84
  for model_name in model_list:
85
  row = {}
86
  row["name"] = model_name
87
  row["tokenizer"] = AutoTokenizer.from_pretrained(model_name)
88
  row["model"] = AutoModelForSequenceClassification.from_pretrained(model_name)
89
  model_array.append(row)
90
+
91
+
92
+ # Function to find the most frequent element in an array
93
  def most_frequent(array):
94
  occurence_count = Counter(array)
95
  return occurence_count.most_common(1)[0][0]
96
 
97
 
98
+ # Prediction function
99
  def predict(s1, chosen_model):
100
+ # Clear previous figure instance
101
+ fig = None
102
+
103
  if not chosen_model:
104
  chosen_model = user_friendly_name_list[0]
105
+ scores = {}
106
  full_chosen_model_name = reverse_user_friendly_name[chosen_model]
 
107
  for row in model_array:
108
  name = row["name"]
109
  if name != full_chosen_model_name:
 
118
  logits = softmax(logits).tolist()
119
  break
120
 
121
+ # Get the indices of all probabilities
122
+ all_indices = range(len(logits))
123
+
124
+ # Get the indices of the top two probabilities
125
+ top_indices = sorted(range(len(logits)), key=lambda i: logits[i], reverse=True)
126
+
127
+ # Filter out invalid indices
128
+ valid_indices = [index for index in top_indices if index < len(hate_speech_categories)]
129
 
130
+ # Get the categories and probabilities for all classes
131
+ all_categories = [hate_speech_categories[index] for index in valid_indices]
132
+ all_probabilities = [logits[index] for index in valid_indices]
133
+
134
+ # Create a bar plot using Plotly
135
+ fig = px.bar(x=all_categories, y=all_probabilities, labels={'x': 'Categories', 'y': 'Probabilities'},
136
+ title="Classes Predicted Probabilities", text=all_probabilities)
137
+ fig.update_traces(texttemplate='%{text:.4f}', textposition='outside')
138
+ fig.show()
139
+
140
+ # # Save the figure as HTML
141
+ # html_figure = fig.to_html()
142
+
143
  # Get the categories and probabilities for the top two
144
+ top_category = [hate_speech_categories[index] for index in top_indices]
145
+ top_probability = [logits[index] for index in top_indices]
146
+ result = top_category[0], 1-top_probability[1]
 
 
 
 
 
147
  return result
148
 
149
+ # Input components
150
  inputs = [
151
  gr.Textbox(label="Text", value=app_examples[0][0]),
152
  gr.Dropdown(label="Model", choices=user_friendly_name_list, value=user_friendly_name_list[0])
 
154
 
155
  # Output components
156
  outputs = [
157
+ gr.Label(label="Predominant category"),
158
+ gr.Label(label="Probability"),
159
  ]
160
 
161
+ # Gradio interface
162
+ gr.Interface(fn=predict, inputs=inputs, outputs=outputs, title=app_title,
163
  description=app_description,
164
  examples=app_examples,
165
+ article=article_string).launch()
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  torch
2
  gradio
3
  transformers
4
- scipy
 
 
1
  torch
2
  gradio
3
  transformers
4
+ scipy
5
+ plotly