File size: 1,723 Bytes
5841603
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# Import gradio and transformers libraries
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Load the small deberta models for hate and offensive speech detection
hate_model = AutoModelForSequenceClassification.from_pretrained("KoalaAI/HateSpeechDetector")
hate_tokenizer = AutoTokenizer.from_pretrained("KoalaAI/HateSpeechDetector")

offensive_model = AutoModelForSequenceClassification.from_pretrained("KoalaAI/OffensiveSpeechDetector")
offensive_tokenizer = AutoTokenizer.from_pretrained("KoalaAI/OffensiveSpeechDetector")

# Define a function that takes an input text and returns the scores from the models
def get_scores(text):
  # Tokenize and encode the input text
  hate_input = hate_tokenizer(text, return_tensors="pt")
  offensive_input = offensive_tokenizer(text, return_tensors="pt")

  # Get the logits from the models
  hate_logits = hate_model(**hate_input).logits
  offensive_logits = offensive_model(**offensive_input).logits

  # Apply softmax to get probabilities
  hate_probs = hate_logits.softmax(dim=1)
  offensive_probs = offensive_logits.softmax(dim=1)

  # Get the labels from the models
  hate_labels = hate_model.config.id2label
  offensive_labels = offensive_model.config.id2label

  # Format the output as a dictionary of scores
  output = {}
  output["Hate speech"] = {hate_labels[i]: round(p.item(), 4) for i, p in enumerate(hate_probs[0])}
  output["Offensive speech"] = {offensive_labels[i]: round(p.item(), 4) for i, p in enumerate(offensive_probs[0])}

  return output

# Create a gradio interface with a text input and a json output
iface = gr.Interface(fn=get_scores, inputs="text", outputs="json")

# Launch the interface
iface.launch()