Spaces:
Running
Running
Update pages/2 Topic Modeling.py
Browse files- pages/2 Topic Modeling.py +15 -3
pages/2 Topic Modeling.py
CHANGED
@@ -34,6 +34,7 @@ import en_core_web_sm
|
|
34 |
import pipeline
|
35 |
from html2image import Html2Image
|
36 |
from umap import UMAP
|
|
|
37 |
|
38 |
|
39 |
#===config===
|
@@ -43,6 +44,14 @@ st.set_page_config(
|
|
43 |
layout="wide"
|
44 |
)
|
45 |
st.header("Topic Modeling")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
st.subheader('Put your file here...')
|
47 |
|
48 |
#========unique id========
|
@@ -72,6 +81,9 @@ def reset_biterm():
|
|
72 |
|
73 |
def reset_all():
|
74 |
st.cache_data.clear()
|
|
|
|
|
|
|
75 |
|
76 |
#===clean csv===
|
77 |
@st.cache_data(ttl=3600, show_spinner=False)
|
@@ -149,7 +161,7 @@ if uploaded_file is not None:
|
|
149 |
num_cho = c2.number_input('Choose number of topics', min_value=2, max_value=30, value=2)
|
150 |
words_to_remove = c3.text_input("Remove specific words. Separate words by semicolons (;)")
|
151 |
|
152 |
-
d1, d2 = st.columns([
|
153 |
d2.info("Don't do anything during the computing", icon="⚠️")
|
154 |
topic_abs, paper=clean_csv(extype)
|
155 |
|
@@ -358,7 +370,7 @@ if uploaded_file is not None:
|
|
358 |
|
359 |
@st.cache_data(ttl=3600, show_spinner=False)
|
360 |
def Vis_Barchart(extype):
|
361 |
-
fig5 = topic_model.visualize_barchart(top_n_topics=num_topic)
|
362 |
return fig5
|
363 |
|
364 |
@st.cache_data(ttl=3600, show_spinner=False)
|
@@ -403,4 +415,4 @@ if uploaded_file is not None:
|
|
403 |
|
404 |
with tab3:
|
405 |
st.markdown('**Jeet Rawat, A., Ghildiyal, S., & Dixit, A. K. (2022, December 1). Topic modelling of legal documents using NLP and bidirectional encoder representations from transformers. Indonesian Journal of Electrical Engineering and Computer Science, 28(3), 1749.** https://doi.org/10.11591/ijeecs.v28.i3.pp1749-1755')
|
406 |
-
st.markdown('**Yao, L. F., Ferawati, K., Liew, K., Wakamiya, S., & Aramaki, E. (2023, April 20). Disruptions in the Cystic Fibrosis Community’s Experiences and Concerns During the COVID-19 Pandemic: Topic Modeling and Time Series Analysis of Reddit Comments. Journal of Medical Internet Research, 25, e45249.** https://doi.org/10.2196/45249')
|
|
|
34 |
import pipeline
|
35 |
from html2image import Html2Image
|
36 |
from umap import UMAP
|
37 |
+
import os
|
38 |
|
39 |
|
40 |
#===config===
|
|
|
44 |
layout="wide"
|
45 |
)
|
46 |
st.header("Topic Modeling")
|
47 |
+
hide_streamlit_style = """
|
48 |
+
<style>
|
49 |
+
#MainMenu {visibility: hidden;}
|
50 |
+
footer {visibility: hidden;}
|
51 |
+
</style>
|
52 |
+
"""
|
53 |
+
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
|
54 |
+
|
55 |
st.subheader('Put your file here...')
|
56 |
|
57 |
#========unique id========
|
|
|
81 |
|
82 |
def reset_all():
|
83 |
st.cache_data.clear()
|
84 |
+
|
85 |
+
#===avoiding deadlock===
|
86 |
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
87 |
|
88 |
#===clean csv===
|
89 |
@st.cache_data(ttl=3600, show_spinner=False)
|
|
|
161 |
num_cho = c2.number_input('Choose number of topics', min_value=2, max_value=30, value=2)
|
162 |
words_to_remove = c3.text_input("Remove specific words. Separate words by semicolons (;)")
|
163 |
|
164 |
+
d1, d2 = st.columns([8,2])
|
165 |
d2.info("Don't do anything during the computing", icon="⚠️")
|
166 |
topic_abs, paper=clean_csv(extype)
|
167 |
|
|
|
370 |
|
371 |
@st.cache_data(ttl=3600, show_spinner=False)
|
372 |
def Vis_Barchart(extype):
|
373 |
+
fig5 = topic_model.visualize_barchart(top_n_topics=num_topic) #, n_words=10)
|
374 |
return fig5
|
375 |
|
376 |
@st.cache_data(ttl=3600, show_spinner=False)
|
|
|
415 |
|
416 |
with tab3:
|
417 |
st.markdown('**Jeet Rawat, A., Ghildiyal, S., & Dixit, A. K. (2022, December 1). Topic modelling of legal documents using NLP and bidirectional encoder representations from transformers. Indonesian Journal of Electrical Engineering and Computer Science, 28(3), 1749.** https://doi.org/10.11591/ijeecs.v28.i3.pp1749-1755')
|
418 |
+
st.markdown('**Yao, L. F., Ferawati, K., Liew, K., Wakamiya, S., & Aramaki, E. (2023, April 20). Disruptions in the Cystic Fibrosis Community’s Experiences and Concerns During the COVID-19 Pandemic: Topic Modeling and Time Series Analysis of Reddit Comments. Journal of Medical Internet Research, 25, e45249.** https://doi.org/10.2196/45249')
|