Spaces:
Running
Running
Update pages/2 Topic Modeling.py
Browse files
pages/2 Topic Modeling.py
CHANGED
@@ -28,8 +28,8 @@ import bitermplus as btm
|
|
28 |
import tmplot as tmp
|
29 |
import tomotopy
|
30 |
import sys
|
31 |
-
|
32 |
-
|
33 |
import pipeline
|
34 |
from html2image import Html2Image
|
35 |
from umap import UMAP
|
@@ -337,9 +337,9 @@ if uploaded_file is not None:
|
|
337 |
if bert_embedding_model == 'all-MiniLM-L6-v2':
|
338 |
emb_mod = 'all-MiniLM-L6-v2'
|
339 |
lang = 'en'
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
elif bert_embedding_model == 'paraphrase-multilingual-MiniLM-L12-v2':
|
344 |
emb_mod = 'paraphrase-multilingual-MiniLM-L12-v2'
|
345 |
lang = 'multilingual'
|
|
|
28 |
import tmplot as tmp
|
29 |
import tomotopy
|
30 |
import sys
|
31 |
+
import spacy
|
32 |
+
import en_core_web_sm
|
33 |
import pipeline
|
34 |
from html2image import Html2Image
|
35 |
from umap import UMAP
|
|
|
337 |
if bert_embedding_model == 'all-MiniLM-L6-v2':
|
338 |
emb_mod = 'all-MiniLM-L6-v2'
|
339 |
lang = 'en'
|
340 |
+
elif bert_embedding_model == 'en_core_web_sm':
|
341 |
+
emb_mod = en_core_web_sm.load(exclude=['tagger', 'parser', 'ner', 'attribute_ruler', 'lemmatizer'])
|
342 |
+
lang = 'en'
|
343 |
elif bert_embedding_model == 'paraphrase-multilingual-MiniLM-L12-v2':
|
344 |
emb_mod = 'paraphrase-multilingual-MiniLM-L12-v2'
|
345 |
lang = 'multilingual'
|