File size: 7,094 Bytes
aa7f5a4
 
ef71cfe
 
1bf7e66
 
bd8d813
aa7f5a4
e4f9c2a
aa7f5a4
ceafef9
4b8f05e
40ea4c3
 
 
b02687c
40ea4c3
 
 
 
 
 
 
 
 
 
ef71cfe
40ea4c3
ef71cfe
40ea4c3
 
9ab46c1
40ea4c3
 
 
ef71cfe
40ea4c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ab46c1
40ea4c3
 
 
 
 
 
 
 
 
ceafef9
40ea4c3
 
 
 
 
 
 
 
ceafef9
40ea4c3
 
 
 
 
 
 
 
 
86a2a1b
9ab46c1
ef71cfe
 
 
 
 
 
 
 
 
 
 
 
b02687c
e4f9c2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c5dac1
e4f9c2a
 
 
 
bd8d813
aa7f5a4
 
13c3404
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import streamlit as st  #Web App
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import numpy as np 
import pandas as pd

#title
st.title("Toxic Tweets")

selection = st.selectbox("Select fine-tuned model",("Ariel8/toxic-tweets-classification","roberta-large-mnli","twitter-XLM-roBERTa-base"))

if selection == "Ariel8/toxic-tweets-classification":
    tokenizer = AutoTokenizer.from_pretrained("Ariel8/toxic-tweets-classification")
    model = AutoModelForSequenceClassification.from_pretrained("Ariel8/toxic-tweets-classification")

    tweets = ["BlackKite being a faggot",
    "Yo bitch Ja Rule is more succesful then you'll ever be whats up with you and hating you sad mofuckas...i should bitch slap ur pethedic white faces and get you to kiss my ass you guys sicken me. Ja rule is about pride in da music man. dont diss that shit on him. and nothin is wrong bein like tupac he was a brother too...fuckin white boys get things right next time.,",
    ":Dear god this site is horrible.",
    "I think its crap that the link to roggenbier is to this article. Somebody that knows how to do things should change it.",
    ":::::I have added more wikilinks to my sections and included more secondary sources as you suggested. For all the citations under human disease section, I went through them and exchanged review articles for the primary sources. Thanks again for all your input!",
    "DJ Robinson is gay as hell! he sucks his dick so much!!!!!",
    "I WILL BURN YOU TO HELL IF YOU REVOKE MY TALK PAGE ACCESS!!!!!!!!!!!!!",
    "um, taking a shot here... Um lets say Three separate people whos wills are to each other.",
    "How dare you vandalize that page about the HMS Beagle! Don't vandalize again, demon!",
    ":Thanks for the comment about Wiki-defenderness. I like that one. I usually wikiling Wiki-defender. I agree that at first he was somewhat innocent but now have my doubts as he is being really agressive about the whole matter."]

    text = st.text_input("Enter Text here for Toxicity Classification:","I hate everything")

    if st.button("Run Toxicity Classification of Text (and prepopulated Tweets)"): 
        tweets.append(text)

        labels = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
        main_class = []
        toxic_types = []

        for i in range(len(tweets)):
            batch = tokenizer(tweets[i], truncation=True, padding='max_length', return_tensors="pt") 
            with torch.no_grad():
                outputs = model(**batch)
                predictions = torch.sigmoid(outputs.logits)*100
                probs = predictions[0].tolist()
                # for i in range(len(probs)):
                #     st.write(f"{labels[i]}: {round(probs[i], 3)}%")
                # results.append(probs)
            first_max = max(probs)
            fm_index = probs.index(first_max)
            main_class.append((first_max,fm_index))
            second_max = max(probs[2:])
            sm_index = probs.index(second_max)
            toxic_types.append((second_max,sm_index))


        d = {'Tweet':tweets,'Main Classification':[labels[main_class[i][1]] for i in range(len(main_class))],'Score':[round(main_class[i][0],3) for i in range(len(main_class))],
                'Toxicity Type':[labels[toxic_types[i][1]] for i in range(len(toxic_types))],'Toxicity Score':[round(toxic_types[i][0],3) for i in range(len(toxic_types))]}
        dataframe = pd.DataFrame(data=d)
        st.table(dataframe)
else: 
    data = []
    text = st.text_input("Enter text here for Sentiment Analysis:","Artificial Intelligence is useful")
    data.append(text)
    if selection == "roberta-large-mnli":
        #1
        if st.button("Run Sentiment Analysis of Text"): 
            model_path = "roberta-large-mnli"
            sentiment_pipeline = pipeline(model=model_path)
            result = sentiment_pipeline(data)
            label = result[0]["label"]
            score = result[0]["score"]
            st.write("The classification of the given text is " + label + " with a score of " + str(score))
    elif selection == "twitter-XLM-roBERTa-base":
        #2
        if st.button("Run Sentiment Analysis of Text"): 
            model_path = "cardiffnlp/twitter-xlm-roberta-base-sentiment"
            sentiment_task = pipeline("sentiment-analysis", model=model_path, tokenizer=model_path)
            result = sentiment_task(text)
            label = result[0]["label"].capitalize()
            score = result[0]["score"]
            st.write("The classification of the given text is " + label + " with a score of " + str(score))
   
# main_class = [(23.93,0),(78.987,0)]
# toxic_type = []

# if model == "roberta-large-mnli":
#     #1
#     if st.button("Run Sentiment Analysis of Text"): 
#         model_path = "roberta-large-mnli"
#         sentiment_pipeline = pipeline(model=model_path)
#         result = sentiment_pipeline(data)
#         label = result[0]["label"]
#         score = result[0]["score"]
#         d = {'tweet':[model_path],'classification':[label],'score':[score]}
#         dataframe = pd.DataFrame(data=d)
#         st.table(dataframe)
        #st.write("The classification of the given text is " + label + " with a score of " + str(score))


# data = []
# text = st.text_input("Enter text here:","Artificial Intelligence is useful")
# data.append(text)
# if model == "roberta-large-mnli":
#     #1
#     if st.button("Run Sentiment Analysis of Text"): 
#         model_path = "roberta-large-mnli"
#         sentiment_pipeline = pipeline(model=model_path)
#         result = sentiment_pipeline(data)
#         label = result[0]["label"]
#         score = result[0]["score"]
#         st.write("The classification of the given text is " + label + " with a score of " + str(score))
# elif model == "twitter-XLM-roBERTa-base":
#     #2
#     if st.button("Run Sentiment Analysis of Text"): 
#         model_path = "cardiffnlp/twitter-xlm-roberta-base-sentiment"
#         sentiment_task = pipeline("sentiment-analysis", model=model_path, tokenizer=model_path)
#         result = sentiment_task(text)
#         label = result[0]["label"].capitalize()
#         score = result[0]["score"]
#         st.write("The classification of the given text is " + label + " with a score of " + str(score))

# elif model == "bertweet-sentiment-analysis": 
#     #3 
#     if st.button("Run Sentiment Analysis of Text"): 
#         analyzer = create_analyzer(task="sentiment", lang="en")
#         result = analyzer.predict(text)
#         if result.output == "POS": 
#             label = "POSITIVE"
#         elif result.output == "NEU": 
#             label = "NEUTRAL"
#         else: 
#             label = "NEGATIVE"
        
#         neg = result.probas["NEG"]
#         pos = result.probas["POS"]
#         neu = result.probas["NEU"]
#         st.write("The classification of the given text is " + label + " with the scores broken down as: Positive - " + str(pos) + ", Neutral - " + str(neu) + ", Negative - " + str(neg))