more debugging
Browse files
app.py
CHANGED
@@ -185,6 +185,8 @@ def get_topic_value(row, i):
|
|
185 |
print(e)
|
186 |
|
187 |
def full_lda(df):
|
|
|
|
|
188 |
df.rename(columns = {'tweet':'original_tweets'}, inplace = True)
|
189 |
|
190 |
# Apply the function above and get tweets free of emoji's
|
@@ -243,6 +245,7 @@ def full_lda(df):
|
|
243 |
# Apply tokenizer
|
244 |
df['lemma_tokens'] = df['lemmas_back_to_text'].apply(tokenize)
|
245 |
|
|
|
246 |
# Create a id2word dictionary
|
247 |
global id2word
|
248 |
id2word = Dictionary(df['lemma_tokens'])
|
@@ -289,6 +292,7 @@ def full_lda(df):
|
|
289 |
global num_topics
|
290 |
num_topics = coherence_averages.index(k_max) + 2
|
291 |
|
|
|
292 |
grid = {}
|
293 |
grid['Validation_Set'] = {}
|
294 |
|
@@ -360,6 +364,7 @@ def full_lda(df):
|
|
360 |
|
361 |
lda_topics = lda_model_final.show_topics(num_words=10)
|
362 |
|
|
|
363 |
topics = []
|
364 |
filters = [lambda x: x.lower(), strip_punctuation, strip_numeric]
|
365 |
|
@@ -377,6 +382,7 @@ def full_lda(df):
|
|
377 |
topic_clusters.append(df[df['max_topic'].isin(([i]))])
|
378 |
topic_clusters[i] = topic_clusters[i]['original_tweets'].tolist()
|
379 |
|
|
|
380 |
global top_tweets
|
381 |
top_tweets = []
|
382 |
for i in range(len(topic_clusters)):
|
|
|
185 |
print(e)
|
186 |
|
187 |
def full_lda(df):
|
188 |
+
|
189 |
+
print('cleaning')
|
190 |
df.rename(columns = {'tweet':'original_tweets'}, inplace = True)
|
191 |
|
192 |
# Apply the function above and get tweets free of emoji's
|
|
|
245 |
# Apply tokenizer
|
246 |
df['lemma_tokens'] = df['lemmas_back_to_text'].apply(tokenize)
|
247 |
|
248 |
+
print('base model setup')
|
249 |
# Create a id2word dictionary
|
250 |
global id2word
|
251 |
id2word = Dictionary(df['lemma_tokens'])
|
|
|
292 |
global num_topics
|
293 |
num_topics = coherence_averages.index(k_max) + 2
|
294 |
|
295 |
+
print('hyperparameter opt')
|
296 |
grid = {}
|
297 |
grid['Validation_Set'] = {}
|
298 |
|
|
|
364 |
|
365 |
lda_topics = lda_model_final.show_topics(num_words=10)
|
366 |
|
367 |
+
print('assign topics')
|
368 |
topics = []
|
369 |
filters = [lambda x: x.lower(), strip_punctuation, strip_numeric]
|
370 |
|
|
|
382 |
topic_clusters.append(df[df['max_topic'].isin(([i]))])
|
383 |
topic_clusters[i] = topic_clusters[i]['original_tweets'].tolist()
|
384 |
|
385 |
+
print('rep topics')
|
386 |
global top_tweets
|
387 |
top_tweets = []
|
388 |
for i in range(len(topic_clusters)):
|
appv1.py
CHANGED
@@ -555,5 +555,6 @@ iface = gr.Interface(fn=main,
|
|
555 |
],
|
556 |
# examples=examples,
|
557 |
outputs=["text",
|
558 |
-
"text"]
|
|
|
559 |
iface.launch()
|
|
|
555 |
],
|
556 |
# examples=examples,
|
557 |
outputs=["text",
|
558 |
+
"text"]
|
559 |
+
)
|
560 |
iface.launch()
|