Spaces:
Running
Running
Temporarily fixing pydantic error
Browse files
pages/2 Topic Modeling.py
CHANGED
@@ -21,15 +21,15 @@ import streamlit.components.v1 as components
|
|
21 |
from io import StringIO
|
22 |
from ipywidgets.embed import embed_minimal_html
|
23 |
from nltk.stem.snowball import SnowballStemmer
|
24 |
-
|
25 |
import plotly.express as px
|
26 |
from sklearn.cluster import KMeans
|
27 |
import bitermplus as btm
|
28 |
import tmplot as tmp
|
29 |
import tomotopy
|
30 |
import sys
|
31 |
-
import spacy
|
32 |
-
import en_core_web_sm
|
33 |
import pipeline
|
34 |
from html2image import Html2Image
|
35 |
from umap import UMAP
|
@@ -337,9 +337,9 @@ if uploaded_file is not None:
|
|
337 |
if bert_embedding_model == 'all-MiniLM-L6-v2':
|
338 |
emb_mod = 'all-MiniLM-L6-v2'
|
339 |
lang = 'en'
|
340 |
-
elif bert_embedding_model == 'en_core_web_sm':
|
341 |
-
emb_mod = en_core_web_sm.load(exclude=['tagger', 'parser', 'ner', 'attribute_ruler', 'lemmatizer'])
|
342 |
-
lang = 'en'
|
343 |
elif bert_embedding_model == 'paraphrase-multilingual-MiniLM-L12-v2':
|
344 |
emb_mod = 'paraphrase-multilingual-MiniLM-L12-v2'
|
345 |
lang = 'multilingual'
|
|
|
21 |
from io import StringIO
|
22 |
from ipywidgets.embed import embed_minimal_html
|
23 |
from nltk.stem.snowball import SnowballStemmer
|
24 |
+
from bertopic import BERTopic
|
25 |
import plotly.express as px
|
26 |
from sklearn.cluster import KMeans
|
27 |
import bitermplus as btm
|
28 |
import tmplot as tmp
|
29 |
import tomotopy
|
30 |
import sys
|
31 |
+
#import spacy
|
32 |
+
#import en_core_web_sm
|
33 |
import pipeline
|
34 |
from html2image import Html2Image
|
35 |
from umap import UMAP
|
|
|
337 |
if bert_embedding_model == 'all-MiniLM-L6-v2':
|
338 |
emb_mod = 'all-MiniLM-L6-v2'
|
339 |
lang = 'en'
|
340 |
+
#elif bert_embedding_model == 'en_core_web_sm':
|
341 |
+
#emb_mod = en_core_web_sm.load(exclude=['tagger', 'parser', 'ner', 'attribute_ruler', 'lemmatizer'])
|
342 |
+
#lang = 'en'
|
343 |
elif bert_embedding_model == 'paraphrase-multilingual-MiniLM-L12-v2':
|
344 |
emb_mod = 'paraphrase-multilingual-MiniLM-L12-v2'
|
345 |
lang = 'multilingual'
|