faizhalas commited on
Commit
8e31adb
1 Parent(s): 4c4579a

Update pages/2 Topic Modeling.py

Browse files
Files changed (1) hide show
  1. pages/2 Topic Modeling.py +15 -3
pages/2 Topic Modeling.py CHANGED
@@ -34,6 +34,7 @@ import en_core_web_sm
34
  import pipeline
35
  from html2image import Html2Image
36
  from umap import UMAP
 
37
 
38
 
39
  #===config===
@@ -43,6 +44,14 @@ st.set_page_config(
43
  layout="wide"
44
  )
45
  st.header("Topic Modeling")
 
 
 
 
 
 
 
 
46
  st.subheader('Put your file here...')
47
 
48
  #========unique id========
@@ -72,6 +81,9 @@ def reset_biterm():
72
 
73
  def reset_all():
74
  st.cache_data.clear()
 
 
 
75
 
76
  #===clean csv===
77
  @st.cache_data(ttl=3600, show_spinner=False)
@@ -149,7 +161,7 @@ if uploaded_file is not None:
149
  num_cho = c2.number_input('Choose number of topics', min_value=2, max_value=30, value=2)
150
  words_to_remove = c3.text_input("Remove specific words. Separate words by semicolons (;)")
151
 
152
- d1, d2 = st.columns([7,3])
153
  d2.info("Don't do anything during the computing", icon="⚠️")
154
  topic_abs, paper=clean_csv(extype)
155
 
@@ -358,7 +370,7 @@ if uploaded_file is not None:
358
 
359
  @st.cache_data(ttl=3600, show_spinner=False)
360
  def Vis_Barchart(extype):
361
- fig5 = topic_model.visualize_barchart(top_n_topics=num_topic)
362
  return fig5
363
 
364
  @st.cache_data(ttl=3600, show_spinner=False)
@@ -403,4 +415,4 @@ if uploaded_file is not None:
403
 
404
  with tab3:
405
  st.markdown('**Jeet Rawat, A., Ghildiyal, S., & Dixit, A. K. (2022, December 1). Topic modelling of legal documents using NLP and bidirectional encoder representations from transformers. Indonesian Journal of Electrical Engineering and Computer Science, 28(3), 1749.** https://doi.org/10.11591/ijeecs.v28.i3.pp1749-1755')
406
- st.markdown('**Yao, L. F., Ferawati, K., Liew, K., Wakamiya, S., & Aramaki, E. (2023, April 20). Disruptions in the Cystic Fibrosis Community’s Experiences and Concerns During the COVID-19 Pandemic: Topic Modeling and Time Series Analysis of Reddit Comments. Journal of Medical Internet Research, 25, e45249.** https://doi.org/10.2196/45249')
 
34
  import pipeline
35
  from html2image import Html2Image
36
  from umap import UMAP
37
+ import os
38
 
39
 
40
  #===config===
 
44
  layout="wide"
45
  )
46
  st.header("Topic Modeling")
47
+ hide_streamlit_style = """
48
+ <style>
49
+ #MainMenu {visibility: hidden;}
50
+ footer {visibility: hidden;}
51
+ </style>
52
+ """
53
+ st.markdown(hide_streamlit_style, unsafe_allow_html=True)
54
+
55
  st.subheader('Put your file here...')
56
 
57
  #========unique id========
 
81
 
82
  def reset_all():
83
  st.cache_data.clear()
84
+
85
+ #===avoiding deadlock===
86
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
87
 
88
  #===clean csv===
89
  @st.cache_data(ttl=3600, show_spinner=False)
 
161
  num_cho = c2.number_input('Choose number of topics', min_value=2, max_value=30, value=2)
162
  words_to_remove = c3.text_input("Remove specific words. Separate words by semicolons (;)")
163
 
164
+ d1, d2 = st.columns([8,2])
165
  d2.info("Don't do anything during the computing", icon="⚠️")
166
  topic_abs, paper=clean_csv(extype)
167
 
 
370
 
371
  @st.cache_data(ttl=3600, show_spinner=False)
372
  def Vis_Barchart(extype):
373
+ fig5 = topic_model.visualize_barchart(top_n_topics=num_topic) #, n_words=10)
374
  return fig5
375
 
376
  @st.cache_data(ttl=3600, show_spinner=False)
 
415
 
416
  with tab3:
417
  st.markdown('**Jeet Rawat, A., Ghildiyal, S., & Dixit, A. K. (2022, December 1). Topic modelling of legal documents using NLP and bidirectional encoder representations from transformers. Indonesian Journal of Electrical Engineering and Computer Science, 28(3), 1749.** https://doi.org/10.11591/ijeecs.v28.i3.pp1749-1755')
418
+ st.markdown('**Yao, L. F., Ferawati, K., Liew, K., Wakamiya, S., & Aramaki, E. (2023, April 20). Disruptions in the Cystic Fibrosis Community’s Experiences and Concerns During the COVID-19 Pandemic: Topic Modeling and Time Series Analysis of Reddit Comments. Journal of Medical Internet Research, 25, e45249.** https://doi.org/10.2196/45249')