Spaces:

dhanikitkat
/

demo-topic-detection

Sleeping

File size: 3,771 Bytes

import streamlit as st
import re
import pandas as pd
from transformers import pipeline
from gensim.models import LdaModel
from gensim.corpora import Dictionary

# Function to preprocess text
def text_preprocess(teks):
    teks = teks.lower()
    teks = re.sub("@[A-Za-z0-9_]+", " ", teks)
    teks = re.sub("#[A-Za-z0-9_]+", " ", teks)
    teks = re.sub(r"\\n", " ", teks)
    teks = teks.strip()
    teks = re.sub(r"http\S+", " ", teks)
    teks = re.sub(r"www.\S+", " ", teks)
    teks = re.sub("[^A-Za-z\s']", " ", teks)
    return teks

# Function to perform inference and get the topic with the highest probability
def get_highest_probability_topic(lda_model, dictionary, new_document, topic_names):
    new_bow = dictionary.doc2bow(new_document.split())
    topic_distribution = lda_model.get_document_topics(new_bow, minimum_probability=0)

    highest_probability_topic = max(topic_distribution, key=lambda x: x[1])
    topic_id, probability = highest_probability_topic
    topic_name = topic_names.get(topic_id, f"Topic {topic_id}")

    return topic_name, probability

# Load sentiment analysis model
pretrained_name = "w11wo/indonesian-roberta-base-sentiment-classifier"
nlp = pipeline("sentiment-analysis", model=pretrained_name, tokenizer=pretrained_name)

# Streamlit app
def main():
    st.title("Sentiment Analysis and Topic Inference App")
    st.write("Enter your text below:")
    input_text = st.text_area("Input Text")

    if st.button("Analyze Sentiment"):
        processed_text = text_preprocess(input_text)
        result = nlp(processed_text)
        sentiment = result[0]['label']
        probability = result[0]['score']
        st.write("Sentiment:", sentiment)
        st.write("Probability:", probability)

    if st.button("Infer Topic"):
        lda_model = LdaModel.load("lda.model")
        dictionary = Dictionary.load("dictionary.dict")
        topic_names = {0: 'Kurang Memuaskan',
                    1: 'Aplikasi Lambat',
                    2: 'Aplikasi Error',
                    3: 'Sulit Sinkronisasi',
                    4: 'Tidak Bisa Login',
                    5: 'Aplikasi Sulit Dibuka',
                    6: 'Aplikasi Keseringan Update',
                    7: 'Neutral',
                    8: 'Aplikasi Bug',
                    9: 'Pelayanan Buruk',
                    10: 'Aplikasi Tidak Bisa Digunakan',
                    11: 'Aplikasi Belum Update',
                    12: 'Aplikasi Bug/Lag',
                    13: 'Sulit Komplain',
                    14: 'Gangguan Server',
                    15: 'Tidak Bisa Update',
                    16: 'Tidak Bisa Download',
                    17: 'Jaringan Bermasalah',
                    18: 'Transaksi Lambat',
                    19: 'Tidak Bisa Buka Aplikasi',
                    20: 'Terlalu Banyak Iklan',
                    21: 'Verifikasi Wajah Gagal',
                    22: 'Pengajuan Pinjaman',
                    23: 'Sms Kode Otp Tidak Masuk',
                    24: 'Sulit Pengajuan Pinjaman',
                    25: 'Tidak Bisa Transaksi / Lambat',
                    26: 'Sulit Daftar',
                    27: 'Sulit Transfer',
                    28: 'Banyak Potongan',
                    29: 'Tidak Bisa Cek Mutasi / Mutasi Hilang',
                    30: 'Proses Kta Lama',
                    31: 'Aplikasi Tidak Real Time',
                    32: 'Kesulitan Pengajuan Kartu Kredit',
                    33: 'Mesin Atm Error',
                }

        inferred_topic, inferred_probability = get_highest_probability_topic(lda_model, dictionary, input_text, topic_names)
        st.write("Inferred Topic:", inferred_topic)
        st.write("Inference Probability:", inferred_probability)

if __name__ == "__main__":
    main()