dhanikitkat commited on
Commit
9c624ca
1 Parent(s): fb039e5

Upload 9 files

Browse files
app.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import re
3
+ import pandas as pd
4
+ from transformers import pipeline
5
+ from gensim.models import LdaModel
6
+ from gensim.corpora import Dictionary
7
+
8
+ # Function to preprocess text
9
+ def text_preprocess(teks):
10
+ teks = teks.lower()
11
+ teks = re.sub("@[A-Za-z0-9_]+", " ", teks)
12
+ teks = re.sub("#[A-Za-z0-9_]+", " ", teks)
13
+ teks = re.sub(r"\\n", " ", teks)
14
+ teks = teks.strip()
15
+ teks = re.sub(r"http\S+", " ", teks)
16
+ teks = re.sub(r"www.\S+", " ", teks)
17
+ teks = re.sub("[^A-Za-z\s']", " ", teks)
18
+ return teks
19
+
20
+ # Function to perform inference and get the topic with the highest probability
21
+ def get_highest_probability_topic(lda_model, dictionary, new_document, topic_names):
22
+ new_bow = dictionary.doc2bow(new_document.split())
23
+ topic_distribution = lda_model.get_document_topics(new_bow, minimum_probability=0)
24
+
25
+ highest_probability_topic = max(topic_distribution, key=lambda x: x[1])
26
+ topic_id, probability = highest_probability_topic
27
+ topic_name = topic_names.get(topic_id, f"Topic {topic_id}")
28
+
29
+ return topic_name, probability
30
+
31
+ # Load sentiment analysis model
32
+ pretrained_name = "w11wo/indonesian-roberta-base-sentiment-classifier"
33
+ nlp = pipeline("sentiment-analysis", model=pretrained_name, tokenizer=pretrained_name)
34
+
35
+ # Streamlit app
36
+ def main():
37
+ st.title("Sentiment Analysis and Topic Inference App")
38
+ st.write("Enter your text below:")
39
+ input_text = st.text_area("Input Text")
40
+
41
+ if st.button("Analyze Sentiment"):
42
+ processed_text = text_preprocess(input_text)
43
+ result = nlp(processed_text)
44
+ sentiment = result[0]['label']
45
+ probability = result[0]['score']
46
+ st.write("Sentiment:", sentiment)
47
+ st.write("Probability:", probability)
48
+
49
+ if st.button("Infer Topic"):
50
+ lda_model = LdaModel.load("lda.model")
51
+ dictionary = Dictionary.load("dictionary.dict")
52
+ topic_names = {0: 'User Experience',
53
+ 1: 'App Features',
54
+ 2: 'Questions and Engagement',
55
+ 3: 'Opinion on Banking App',
56
+ 4: 'Mixed Feedback and Technical Issues',
57
+ }
58
+
59
+ inferred_topic, inferred_probability = get_highest_probability_topic(lda_model, dictionary, input_text, topic_names)
60
+ st.write("Inferred Topic:", inferred_topic)
61
+ st.write("Inference Probability:", inferred_probability)
62
+
63
+ if __name__ == "__main__":
64
+ main()
corpus_tfidf_pos.mm ADDED
The diff for this file is too large to render. See raw diff
 
corpus_tfidf_pos.mm.index ADDED
Binary file (32.3 kB). View file
 
dictionary.dict ADDED
Binary file (33 kB). View file
 
lda.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:858f495dd7f6f45c574bf2b300a75249cc74a6154a4c5e3efed24c741a68f9d9
3
+ size 8478
lda.model.expElogbeta.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5e046c650d7dc819e07427f7ae23d682683eed9ec63de27ce26ee81b4362c04
3
+ size 22928
lda.model.id2word ADDED
Binary file (32.7 kB). View file
 
lda.model.state ADDED
Binary file (28 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ tensorflow
3
+ pandas
4
+ gensim
5
+ transformers
6
+ nltk