Spaces:
Runtime error
Runtime error
from transformers import pipeline | |
import matplotlib.pyplot as plt | |
import twitter_scraper as ts | |
import pandas as pd | |
import gradio as gr | |
pretrained_sentiment = "w11wo/indonesian-roberta-base-sentiment-classifier" | |
pretrained_ner = "cahya/bert-base-indonesian-NER" | |
sentiment_pipeline = pipeline( | |
"sentiment-analysis", | |
model=pretrained_sentiment, | |
tokenizer=pretrained_sentiment, | |
return_all_scores=True | |
) | |
ner_pipeline = pipeline( | |
"ner", | |
model=pretrained_ner, | |
tokenizer=pretrained_ner, | |
grouped_entities=True | |
) | |
examples = [ | |
"Jokowi sangat kecewa dengan POLRI atas kerusuhan yang terjadi di Malang", | |
"Lesti marah terhadap perlakuan KDRT yang dilakukan oleh Bilar", | |
"Ungkapan rasa bahagia diutarakan oleh Coki Pardede karena kebabasannya dari penjara" | |
] | |
def sentiment_analysis(text): | |
output = sentiment_pipeline(text) | |
return {elm["label"]: elm["score"] for elm in output[0]} | |
def ner(text): | |
output = ner_pipeline(text) | |
for elm in output: | |
elm['entity'] = elm['entity_group'] | |
return {"text": text, "entities": output} | |
def sentiment_ner(text): | |
return sentiment_analysis(text), ner(text) | |
def sentiment_df(df): | |
text_list = list(df["Text"].astype(str).values) | |
result = [sentiment_analysis(text) for text in text_list] | |
labels = [] | |
scores = [] | |
for pred in result: | |
idx = list(pred.values()).index(max(list(pred.values()))) | |
labels.append(list(pred.keys())[idx]) | |
scores.append(round(list(pred.values())[idx], 3)) | |
df['Label'] = labels | |
df['Score'] = scores | |
return df | |
def ner_df(df): | |
text_list = list(df["Text"].astype(str).values) | |
label_list = list(df["Label"].astype(str).values) | |
result = [ner(text) for text in text_list] | |
terms = [] | |
sentiments = [] | |
ent = ['PER', 'NOR'] | |
for i, preds in enumerate(result): | |
for pred in preds['entities']: | |
if pred['entity_group'] in ent: | |
terms.append(pred['word']) | |
sentiments.append(label_list[i]) | |
df_ner = pd.DataFrame(columns=['Entity', 'Sentiment']) | |
df_ner['Entity'] = terms | |
df_ner['Sentiment'] = sentiments | |
return df_ner | |
def twitter_analyzer(keyword, max_tweets): | |
df = ts.scrape_tweets(keyword, max_tweets=max_tweets) | |
df["Text"] = df["Text"].apply(ts.preprocess_text) | |
df = sentiment_df(df) | |
# df_ner = ner_df(df) | |
# df_ner = df_ner[df_ner.Entity != keyword] | |
fig = plt.figure() | |
df.groupby(["Label"])["Text"].count().plot.pie(autopct="%.1f%%", figsize=(6,6)) | |
return fig, df[["URL", "Text", "Label", "Score"]] | |
if __name__ == "__main__": | |
with gr.Blocks() as demo: | |
gr.Markdown("""<h1 style="text-align:center">Tweet Analyzer - Indonesia</h1>""") | |
gr.Markdown( | |
""" | |
Creator: Wira Indra Kusuma | |
""" | |
) | |
with gr.Tab("Single Input"): | |
with gr.Blocks(): | |
with gr.Row(): | |
with gr.Column(): | |
input_text = gr.Textbox(label="Input Text") | |
analyze_button = gr.Button(label="Analyze") | |
examples_bar = gr.Examples(examples=examples, inputs=input_text) | |
with gr.Column(): | |
sent_output = gr.Label(label="Sentiment Analysis") | |
ner_output = gr.HighlightedText(label="Named Entity Recognition") | |
with gr.Tab("Twitter"): | |
with gr.Blocks(): | |
with gr.Row(): | |
with gr.Column(): | |
keyword_textbox = gr.Textbox(lines=1, label="Keyword") | |
max_tweets_component = gr.Number(value=10, label="Total of Tweets to Scrape", precision=0) | |
submit_button = gr.Button("Submit") | |
plot_component = gr.Plot(label="Pie Chart of Sentiments") | |
dataframe_component = gr.DataFrame(type="pandas", | |
label="Dataframe", | |
max_rows=(20,'fixed'), | |
overflow_row_behaviour='paginate', | |
wrap=True) | |
# df_ner = gr.DataFrame(type="pandas", | |
# label="Dataframe", | |
# max_rows=(20,'fixed'), | |
# overflow_row_behaviour='paginate', | |
# wrap=True) | |
analyze_button.click(sentiment_ner, input_text, [sent_output, ner_output]) | |
submit_button.click(twitter_analyzer, | |
inputs=[keyword_textbox, max_tweets_component], | |
outputs=[plot_component, dataframe_component]) | |
gr.Markdown( | |
""" | |
""" | |
) | |
demo.launch(inbrowser=True) |