faizhalas commited on
Commit
ef7d900
β€’
1 Parent(s): 161dafa

Update pages/2 Topic Modeling.py

Browse files
Files changed (1) hide show
  1. pages/2 Topic Modeling.py +6 -7
pages/2 Topic Modeling.py CHANGED
@@ -29,8 +29,7 @@ import tmplot as tmp
29
  import tomotopy
30
  import sys
31
  import spacy
32
- #import en_core_web_sm
33
- from spacy.lang.en.examples import sentences
34
  import pipeline
35
  from html2image import Html2Image
36
  from umap import UMAP
@@ -185,7 +184,7 @@ if uploaded_file is not None:
185
  bert_n_neighbors = t2.number_input('n_neighbors', value=15 , min_value=1, max_value=None, step=1)
186
  bert_embedding_model = st.radio(
187
  "embedding_model",
188
- ["all-MiniLM-L6-v2", "paraphrase-multilingual-MiniLM-L12-v2", "en_core_web_sm"], index=0, horizontal=True)
189
  else:
190
  st.write('Please choose your preferred method')
191
  if st.button("Submit", on_click=reset_all):
@@ -338,8 +337,8 @@ if uploaded_file is not None:
338
  if bert_embedding_model == 'all-MiniLM-L6-v2':
339
  emb_mod = 'all-MiniLM-L6-v2'
340
  lang = 'en'
341
- elif bert_embedding_model == 'en_core_web_sm':
342
- emb_mod = spacy.load(exclude=['tagger', 'parser', 'ner', 'attribute_ruler', 'lemmatizer'])
343
  lang = 'en'
344
  elif bert_embedding_model == 'paraphrase-multilingual-MiniLM-L12-v2':
345
  emb_mod = 'paraphrase-multilingual-MiniLM-L12-v2'
@@ -411,8 +410,8 @@ if uploaded_file is not None:
411
  with st.expander("Visualize Topic Similarity"):
412
  st.write(fig4)
413
 
414
- #except ValueError:
415
- #st.error('πŸ™‡β€β™‚οΈ Please raise the number of topics and click submit')
416
 
417
  except NameError:
418
  st.warning('πŸ–±οΈ Please click Submit')
 
29
  import tomotopy
30
  import sys
31
  import spacy
32
+ import en_core_web_md
 
33
  import pipeline
34
  from html2image import Html2Image
35
  from umap import UMAP
 
184
  bert_n_neighbors = t2.number_input('n_neighbors', value=15 , min_value=1, max_value=None, step=1)
185
  bert_embedding_model = st.radio(
186
  "embedding_model",
187
+ ["all-MiniLM-L6-v2", "paraphrase-multilingual-MiniLM-L12-v2", "en_core_web_md"], index=0, horizontal=True)
188
  else:
189
  st.write('Please choose your preferred method')
190
  if st.button("Submit", on_click=reset_all):
 
337
  if bert_embedding_model == 'all-MiniLM-L6-v2':
338
  emb_mod = 'all-MiniLM-L6-v2'
339
  lang = 'en'
340
+ elif bert_embedding_model == 'en_core_web_md':
341
+ emb_mod = en_core_web_md.load(exclude=['tagger', 'parser', 'ner', 'attribute_ruler', 'lemmatizer'])
342
  lang = 'en'
343
  elif bert_embedding_model == 'paraphrase-multilingual-MiniLM-L12-v2':
344
  emb_mod = 'paraphrase-multilingual-MiniLM-L12-v2'
 
410
  with st.expander("Visualize Topic Similarity"):
411
  st.write(fig4)
412
 
413
+ except ValueError:
414
+ st.error('πŸ™‡β€β™‚οΈ Please raise the number of topics and click submit')
415
 
416
  except NameError:
417
  st.warning('πŸ–±οΈ Please click Submit')