Spaces:

wiraindrak
/

entity-based-sentiment-analysis

Runtime error

File size: 4,943 Bytes

2168cf5
faf61e8
 
ca67adc
faf61e8
2168cf5
 
7f68476
 
2168cf5
7f68476
ef26fd6
7f68476
 
a60235f
2168cf5
 
7f68476
 
 
ca67adc
 
7f68476
 
2168cf5
42535f1
 
 
2168cf5
 
ef26fd6
7f68476
d961c51
faf61e8
7f68476
 
d84c978
 
7f68476
2168cf5
1df8439
ea552db
1df8439
faf61e8
 
 
acee695
 
 
 
 
 
 
 
faf61e8
 
ca67adc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
faf61e8
 
 
 
 
572a5c1
6f191b8
 
572a5c1
faf61e8
 
6f191b8
ca67adc
2168cf5
faf61e8
 
 
b9d9035
faf61e8
 
 
660b172
faf61e8
 
 
 
2dd816c
 
 
 
 
b99df17
2dd816c
660b172
 
2dd816c
faf61e8
 
 
 
 
 
1df8439
faf61e8
 
 
 
 
 
 
d6501eb
6f191b8
 
 
 
 
39d5762
 
d6501eb
 
38d1024
6f191b8
38d1024
faf61e8
 
 
 
 
 
 
d6501eb

from transformers import pipeline
import matplotlib.pyplot as plt
import twitter_scraper as ts
import pandas as pd

import gradio as gr

pretrained_sentiment = "w11wo/indonesian-roberta-base-sentiment-classifier"
pretrained_ner = "cahya/bert-base-indonesian-NER"

sentiment_pipeline = pipeline(
    "sentiment-analysis",
    model=pretrained_sentiment,
    tokenizer=pretrained_sentiment,
    return_all_scores=True
)

ner_pipeline = pipeline(
    "ner",
    model=pretrained_ner,
    tokenizer=pretrained_ner,
    grouped_entities=True
)

examples = [
    "Jokowi sangat kecewa dengan POLRI atas kerusuhan yang terjadi di Malang",
    "Lesti marah terhadap perlakuan KDRT yang dilakukan oleh Bilar",
    "Ungkapan rasa bahagia diutarakan oleh Coki Pardede karena kebabasannya dari penjara"
]

def sentiment_analysis(text):
    output = sentiment_pipeline(text)
    return {elm["label"]: elm["score"] for elm in output[0]}

def ner(text):
    output = ner_pipeline(text)
    for elm in output:
        elm['entity'] = elm['entity_group']
    return {"text": text, "entities": output}

def sentiment_ner(text):
    return sentiment_analysis(text), ner(text)

def sentiment_df(df):
    text_list = list(df["Text"].astype(str).values)
    result = [sentiment_analysis(text) for text in text_list]
    labels = []
    scores = []
    for pred in result:
        idx = list(pred.values()).index(max(list(pred.values())))
        labels.append(list(pred.keys())[idx])
        scores.append(round(list(pred.values())[idx], 3))
    df['Label'] = labels
    df['Score'] = scores
    return df

def ner_df(df):
    text_list = list(df["Text"].astype(str).values)
    label_list = list(df["Label"].astype(str).values)
    result = [ner(text) for text in text_list]
    terms = []
    sentiments = []
    ent = ['PER', 'NOR']
    for i, preds in enumerate(result):
        for pred in preds['entities']:
            if pred['entity_group'] in ent:
                terms.append(pred['word'])
                sentiments.append(label_list[i])
    df_ner = pd.DataFrame(columns=['Entity', 'Sentiment'])
    df_ner['Entity'] = terms
    df_ner['Sentiment'] = sentiments
    return df_ner


def twitter_analyzer(keyword, max_tweets):
    df = ts.scrape_tweets(keyword, max_tweets=max_tweets)
    df["Text"] = df["Text"].apply(ts.preprocess_text)
    df = sentiment_df(df)

    # df_ner = ner_df(df)
    # df_ner = df_ner[df_ner.Entity != keyword]

    fig = plt.figure()
    df.groupby(["Label"])["Text"].count().plot.pie(autopct="%.1f%%", figsize=(6,6))
    return fig, df[["URL", "Text", "Label", "Score"]]

if __name__ == "__main__":

    with gr.Blocks() as demo:

        gr.Markdown("""<h1 style="text-align:center">Tweet Analyzer - Indonesia</h1>""")

        gr.Markdown(
            """
            Creator: Wira Indra Kusuma
            """
            )

        with gr.Tab("Single Input"):
            with gr.Blocks():
                with gr.Row():
                    with gr.Column():
                        input_text = gr.Textbox(label="Input Text")
                        analyze_button = gr.Button(label="Analyze")
                        examples_bar = gr.Examples(examples=examples, inputs=input_text)
                    with gr.Column():
                        sent_output = gr.Label(label="Sentiment Analysis")
                        ner_output = gr.HighlightedText(label="Named Entity Recognition")

        with gr.Tab("Twitter"):
            with gr.Blocks():
                with gr.Row():
                    with gr.Column():
                        keyword_textbox = gr.Textbox(lines=1, label="Keyword")
                        max_tweets_component = gr.Number(value=10, label="Total of Tweets to Scrape", precision=0)
                        submit_button = gr.Button("Submit")

                    plot_component = gr.Plot(label="Pie Chart of Sentiments")
                dataframe_component = gr.DataFrame(type="pandas",
                                                label="Dataframe",
                                                max_rows=(20,'fixed'),
                                                overflow_row_behaviour='paginate',
                                                wrap=True)

                # df_ner = gr.DataFrame(type="pandas",
                #                                 label="Dataframe",
                #                                 max_rows=(20,'fixed'),
                #                                 overflow_row_behaviour='paginate',
                #                                 wrap=True)


        analyze_button.click(sentiment_ner, input_text, [sent_output, ner_output])
        submit_button.click(twitter_analyzer,
                    inputs=[keyword_textbox, max_tweets_component],
                    outputs=[plot_component, dataframe_component])

        gr.Markdown(
                """

                """

            )

    demo.launch(inbrowser=True)