Spaces:
Running
Running
charlieoneill
commited on
Commit
•
3311ec5
1
Parent(s):
ad123f2
Update app.py
Browse files
app.py
CHANGED
@@ -206,6 +206,25 @@ def get_feature_from_index(subject, index):
|
|
206 |
feature = next((f for f in subject_data[subject]['feature_analysis'] if f['index'] == index), None)
|
207 |
return feature
|
208 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
def visualize_feature(subject, index):
|
210 |
feature = next((f for f in subject_data[subject]['feature_analysis'] if f['index'] == index), None)
|
211 |
if feature is None:
|
@@ -218,12 +237,30 @@ def visualize_feature(subject, index):
|
|
218 |
# Top m abstracts
|
219 |
top_m_abstracts = get_feature_activations(subject, index)
|
220 |
|
221 |
-
# Create dataframe for top abstracts
|
222 |
-
df_data = [
|
223 |
-
|
224 |
-
|
225 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
226 |
df_top_abstracts = pd.DataFrame(df_data)
|
|
|
|
|
|
|
227 |
|
228 |
# Activation value distribution
|
229 |
topk_indices = subject_data[subject]['topk_indices']
|
@@ -249,23 +286,62 @@ def visualize_feature(subject, index):
|
|
249 |
topk_indices_cosine = np.argsort(-cosine_similarities)[:topk]
|
250 |
topk_values_cosine = cosine_similarities[topk_indices_cosine]
|
251 |
|
252 |
-
# Create dataframe for top 5 correlated features
|
253 |
-
df_top_correlated = pd.DataFrame({
|
254 |
-
|
255 |
-
|
256 |
-
})
|
257 |
-
df_top_correlated_styled =
|
|
|
|
|
258 |
|
259 |
bottomk = 5
|
260 |
bottomk_indices_cosine = np.argsort(cosine_similarities)[:bottomk]
|
261 |
bottomk_values_cosine = cosine_similarities[bottomk_indices_cosine]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
262 |
|
263 |
# Create dataframe for bottom 5 correlated features
|
264 |
df_bottom_correlated = pd.DataFrame({
|
265 |
"Feature": [get_feature_from_index(subject, i)['label'] for i in bottomk_indices_cosine],
|
266 |
-
"Cosine similarity":
|
|
|
|
|
|
|
267 |
})
|
268 |
-
df_bottom_correlated_styled = style_dataframe(df_bottom_correlated, is_top=False)
|
269 |
|
270 |
# Co-occurrences
|
271 |
co_occurrences = calculate_co_occurrences(subject, index)
|
@@ -278,8 +354,102 @@ def visualize_feature(subject, index):
|
|
278 |
"Feature": [get_feature_from_index(subject, i)['label'] for i in topk_indices_co_occurrence],
|
279 |
"Co-occurrences": topk_values_co_occurrence
|
280 |
})
|
|
|
|
|
|
|
281 |
|
282 |
-
return output,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
283 |
|
284 |
# Modify the main interface function
|
285 |
def create_interface():
|
@@ -334,11 +504,16 @@ def create_interface():
|
|
334 |
manually_added_features_state = gr.State([])
|
335 |
|
336 |
def update_search_results(feature_values, feature_indices, manually_added_features, current_subject):
|
|
|
|
|
|
|
|
|
337 |
ae = subject_data[current_subject]['ae']
|
338 |
abstract_embeddings = subject_data[current_subject]['abstract_embeddings']
|
339 |
abstract_texts = subject_data[current_subject]['abstract_texts']
|
340 |
df_metadata = subject_data[current_subject]['df_metadata']
|
341 |
|
|
|
342 |
# Combine manually added features with query-generated features
|
343 |
all_indices = []
|
344 |
all_values = []
|
@@ -368,12 +543,40 @@ def create_interface():
|
|
368 |
doc_ids = abstract_texts['doc_ids']
|
369 |
topk_doc_ids = [doc_ids[i] for i in topk_indices_search]
|
370 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
371 |
# Prepare search results
|
372 |
search_results = []
|
373 |
for doc_id in topk_doc_ids:
|
374 |
metadata = df_metadata[df_metadata['arxiv_id'] == doc_id].iloc[0]
|
375 |
title = metadata['title'].replace('[', '').replace(']', '')
|
376 |
-
# Remove single quotes from title
|
377 |
title = title.replace("'", "")
|
378 |
|
379 |
url_id = doc_id.replace('_arXiv.txt', '')
|
@@ -381,7 +584,6 @@ def create_interface():
|
|
381 |
url_id = url_id.split('astro-ph')[1]
|
382 |
url = f"https://arxiv.org/abs/astro-ph/{url_id}"
|
383 |
else:
|
384 |
-
# Create the clickable link based on the doc_id
|
385 |
if '.' in doc_id:
|
386 |
url = f"https://arxiv.org/abs/{doc_id.replace('_arXiv.txt', '')}"
|
387 |
else:
|
@@ -395,7 +597,14 @@ def create_interface():
|
|
395 |
int(metadata['year'])
|
396 |
])
|
397 |
|
398 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
399 |
|
400 |
@gr.render(inputs=[input_text, search_results_state, feature_values_state, feature_indices_state, manually_added_features_state, subject])
|
401 |
def show_components(text, search_results, feature_values, feature_indices, manually_added_features, current_subject):
|
@@ -418,11 +627,6 @@ def create_interface():
|
|
418 |
|
419 |
with gr.Row():
|
420 |
with gr.Column(scale=2):
|
421 |
-
# df = gr.Dataframe(
|
422 |
-
# headers=["Title", "Citation Count", "Year"],
|
423 |
-
# value=search_results,
|
424 |
-
# label="Top 10 Search Results"
|
425 |
-
# )
|
426 |
df = gr.Dataframe(
|
427 |
headers=["Title", "Citation Count", "Year"],
|
428 |
value=search_results,
|
@@ -462,18 +666,7 @@ def create_interface():
|
|
462 |
with gr.Column(scale=1):
|
463 |
update_button = gr.Button("Update Results")
|
464 |
sliders = []
|
465 |
-
|
466 |
-
# feature = next((f for f in subject_data[current_subject]['feature_analysis'] if f['index'] == index), None)
|
467 |
-
# label = f"{feature['label']} ({index})" if feature else f"Feature {index}"
|
468 |
-
|
469 |
-
# # Add prefix and change color for manually added features
|
470 |
-
# if index in manually_added_features:
|
471 |
-
# label = f"[Custom] {label}"
|
472 |
-
# slider = gr.Slider(minimum=0, maximum=1, step=0.01, value=value, label=label, key=f"slider-{index}", elem_id=f"custom-slider-{index}")
|
473 |
-
# else:
|
474 |
-
# slider = gr.Slider(minimum=0, maximum=1, step=0.01, value=value, label=label, key=f"slider-{index}")
|
475 |
-
|
476 |
-
# sliders.append(slider)
|
477 |
for i, (value, index) in enumerate(zip(feature_values, feature_indices)):
|
478 |
feature = next((f for f in subject_data[current_subject]['feature_analysis'] if f['index'] == index), None)
|
479 |
label = f"{feature['label']} ({index})" if feature else f"Feature {index}"
|
@@ -491,16 +684,6 @@ def create_interface():
|
|
491 |
|
492 |
sliders.append(slider)
|
493 |
|
494 |
-
# def on_slider_change(*values):
|
495 |
-
# manually_added_features = values[-1]
|
496 |
-
# slider_values = list(values[:-1])
|
497 |
-
|
498 |
-
# # Reconstruct feature_indices based on the order of sliders
|
499 |
-
# reconstructed_indices = [int(slider.label.split('(')[-1].split(')')[0]) for slider in sliders]
|
500 |
-
|
501 |
-
# new_results, new_values, new_indices = update_search_results(slider_values, reconstructed_indices, manually_added_features, current_subject)
|
502 |
-
# return new_results, new_values, new_indices, manually_added_features
|
503 |
-
|
504 |
def on_slider_change(*values):
|
505 |
manually_added_features = values[-1]
|
506 |
slider_values = list(values[:-1])
|
@@ -530,10 +713,18 @@ def create_interface():
|
|
530 |
visualize_button = gr.Button("Visualize Feature")
|
531 |
|
532 |
feature_info = gr.Markdown()
|
|
|
|
|
|
|
|
|
|
|
|
|
533 |
abstracts_heading = gr.Markdown("## Top 5 Abstracts")
|
534 |
top_abstracts = gr.Dataframe(
|
535 |
headers=["Title", "Activation value"],
|
536 |
-
|
|
|
|
|
537 |
)
|
538 |
|
539 |
gr.Markdown("## Correlated Features")
|
|
|
206 |
feature = next((f for f in subject_data[subject]['feature_analysis'] if f['index'] == index), None)
|
207 |
return feature
|
208 |
|
209 |
+
# def visualize_feature(subject, index):
|
210 |
+
# feature = next((f for f in subject_data[subject]['feature_analysis'] if f['index'] == index), None)
|
211 |
+
# if feature is None:
|
212 |
+
# return "Invalid feature index", None, None, None, None, None, None
|
213 |
+
|
214 |
+
# output = f"# {feature['label']}\n\n"
|
215 |
+
# output += f"* Pearson correlation: {feature['pearson_correlation']:.4f}\n\n"
|
216 |
+
# output += f"* Density: {feature['density']:.4f}\n\n"
|
217 |
+
|
218 |
+
# # Top m abstracts
|
219 |
+
# top_m_abstracts = get_feature_activations(subject, index)
|
220 |
+
|
221 |
+
# # Create dataframe for top abstracts
|
222 |
+
# df_data = [
|
223 |
+
# {"Title": m[1].split('\n\n')[0], "Activation value": f"{m[2]:.4f}"}
|
224 |
+
# for m in top_m_abstracts
|
225 |
+
# ]
|
226 |
+
# df_top_abstracts = pd.DataFrame(df_data)
|
227 |
+
|
228 |
def visualize_feature(subject, index):
|
229 |
feature = next((f for f in subject_data[subject]['feature_analysis'] if f['index'] == index), None)
|
230 |
if feature is None:
|
|
|
237 |
# Top m abstracts
|
238 |
top_m_abstracts = get_feature_activations(subject, index)
|
239 |
|
240 |
+
# Create dataframe for top abstracts with clickable links
|
241 |
+
df_data = []
|
242 |
+
for doc_id, abstract, activation_value in top_m_abstracts:
|
243 |
+
title = abstract.split('\n\n')[0]
|
244 |
+
title = title.replace('[', '').replace(']', '')
|
245 |
+
title = title.replace("'", "")
|
246 |
+
title = title.replace('"', '')
|
247 |
+
url_id = doc_id.replace('_arXiv.txt', '')
|
248 |
+
if 'astro-ph' in url_id:
|
249 |
+
url_id = url_id.split('astro-ph')[1]
|
250 |
+
url = f"https://arxiv.org/abs/astro-ph/{url_id}"
|
251 |
+
else:
|
252 |
+
if '.' in doc_id:
|
253 |
+
url = f"https://arxiv.org/abs/{url_id}"
|
254 |
+
else:
|
255 |
+
url = f"https://arxiv.org/abs/hep-ph/{url_id}"
|
256 |
+
|
257 |
+
linked_title = f"[{title}]({url})"
|
258 |
+
df_data.append({"Title": linked_title, "Activation value": activation_value})
|
259 |
+
|
260 |
df_top_abstracts = pd.DataFrame(df_data)
|
261 |
+
styled_top_abstracts = df_top_abstracts.style.format({
|
262 |
+
"Activation value": "{:.4f}"
|
263 |
+
})
|
264 |
|
265 |
# Activation value distribution
|
266 |
topk_indices = subject_data[subject]['topk_indices']
|
|
|
286 |
topk_indices_cosine = np.argsort(-cosine_similarities)[:topk]
|
287 |
topk_values_cosine = cosine_similarities[topk_indices_cosine]
|
288 |
|
289 |
+
# # Create dataframe for top 5 correlated features
|
290 |
+
# df_top_correlated = pd.DataFrame({
|
291 |
+
# "Feature": [get_feature_from_index(subject, i)['label'] for i in topk_indices_cosine],
|
292 |
+
# "Cosine similarity": topk_values_cosine
|
293 |
+
# })
|
294 |
+
# df_top_correlated_styled = df_top_correlated.style.format({
|
295 |
+
# "Cosine similarity": "{:.4f}"
|
296 |
+
# })
|
297 |
|
298 |
bottomk = 5
|
299 |
bottomk_indices_cosine = np.argsort(cosine_similarities)[:bottomk]
|
300 |
bottomk_values_cosine = cosine_similarities[bottomk_indices_cosine]
|
301 |
+
|
302 |
+
# # Create dataframe for bottom 5 correlated features
|
303 |
+
# df_bottom_correlated = pd.DataFrame({
|
304 |
+
# "Feature": [get_feature_from_index(subject, i)['label'] for i in bottomk_indices_cosine],
|
305 |
+
# "Cosine similarity": bottomk_values_cosine
|
306 |
+
# })
|
307 |
+
# df_bottom_correlated_styled = df_bottom_correlated.style.format({
|
308 |
+
# "Cosine similarity": "{:.4f}"
|
309 |
+
# })
|
310 |
+
|
311 |
+
# # Co-occurrences
|
312 |
+
# co_occurrences = calculate_co_occurrences(subject, index)
|
313 |
+
# topk = 5
|
314 |
+
# topk_indices_co_occurrence = np.argsort(-co_occurrences)[:topk]
|
315 |
+
# topk_values_co_occurrence = co_occurrences[topk_indices_co_occurrence]
|
316 |
+
|
317 |
+
# # Create dataframe for top 5 co-occurring features
|
318 |
+
# df_co_occurrences = pd.DataFrame({
|
319 |
+
# "Feature": [get_feature_from_index(subject, i)['label'] for i in topk_indices_co_occurrence],
|
320 |
+
# "Co-occurrences": topk_values_co_occurrence
|
321 |
+
# })
|
322 |
+
# df_co_occurrences_styled = df_co_occurrences.style.format({
|
323 |
+
# "Co-occurrences": "{:.4f}"
|
324 |
+
# })
|
325 |
+
|
326 |
+
# return output, styled_top_abstracts, df_top_correlated_styled, df_bottom_correlated_styled, df_co_occurrences_styled, fig2
|
327 |
+
|
328 |
+
# Create dataframe for top 5 correlated features
|
329 |
+
df_top_correlated = pd.DataFrame({
|
330 |
+
"Feature": [get_feature_from_index(subject, i)['label'] for i in topk_indices_cosine],
|
331 |
+
"Cosine similarity": topk_values_cosine
|
332 |
+
})
|
333 |
+
df_top_correlated_styled = df_top_correlated.style.format({
|
334 |
+
"Cosine similarity": "{:.4f}"
|
335 |
+
})
|
336 |
|
337 |
# Create dataframe for bottom 5 correlated features
|
338 |
df_bottom_correlated = pd.DataFrame({
|
339 |
"Feature": [get_feature_from_index(subject, i)['label'] for i in bottomk_indices_cosine],
|
340 |
+
"Cosine similarity": bottomk_values_cosine
|
341 |
+
})
|
342 |
+
df_bottom_correlated_styled = df_bottom_correlated.style.format({
|
343 |
+
"Cosine similarity": "{:.4f}"
|
344 |
})
|
|
|
345 |
|
346 |
# Co-occurrences
|
347 |
co_occurrences = calculate_co_occurrences(subject, index)
|
|
|
354 |
"Feature": [get_feature_from_index(subject, i)['label'] for i in topk_indices_co_occurrence],
|
355 |
"Co-occurrences": topk_values_co_occurrence
|
356 |
})
|
357 |
+
df_co_occurrences_styled = df_co_occurrences.style.format({
|
358 |
+
"Co-occurrences": "{:.0f}" # Keep as integer
|
359 |
+
})
|
360 |
|
361 |
+
return output, styled_top_abstracts, df_top_correlated_styled, df_bottom_correlated_styled, df_co_occurrences_styled, fig2
|
362 |
+
|
363 |
+
# def visualize_feature(subject, index):
|
364 |
+
# feature = next((f for f in subject_data[subject]['feature_analysis'] if f['index'] == index), None)
|
365 |
+
# if feature is None:
|
366 |
+
# return "Invalid feature index", None, None, None, None, None, None
|
367 |
+
|
368 |
+
# output = f"# {feature['label']}\n\n"
|
369 |
+
# output += f"* Pearson correlation: {feature['pearson_correlation']:.4f}\n\n"
|
370 |
+
# output += f"* Density: {feature['density']:.4f}\n\n"
|
371 |
+
|
372 |
+
# # Top m abstracts
|
373 |
+
# top_m_abstracts = get_feature_activations(subject, index)
|
374 |
+
|
375 |
+
# # Create dataframe for top abstracts with clickable links
|
376 |
+
# df_data = []
|
377 |
+
# for doc_id, abstract, activation_value in top_m_abstracts:
|
378 |
+
# title = abstract.split('\n\n')[0]
|
379 |
+
# title = title.replace('[', '').replace(']', '')
|
380 |
+
# title = title.replace("'", "")
|
381 |
+
# title = title.replace('"', '')
|
382 |
+
# url_id = doc_id.replace('_arXiv.txt', '')
|
383 |
+
# if 'astro-ph' in url_id:
|
384 |
+
# url_id = url_id.split('astro-ph')[1]
|
385 |
+
# url = f"https://arxiv.org/abs/astro-ph/{url_id}"
|
386 |
+
# else:
|
387 |
+
# if '.' in doc_id:
|
388 |
+
# url = f"https://arxiv.org/abs/{url_id}"
|
389 |
+
# else:
|
390 |
+
# url = f"https://arxiv.org/abs/hep-ph/{url_id}"
|
391 |
+
|
392 |
+
# linked_title = f"[{title}]({url})"
|
393 |
+
# df_data.append({"Title": linked_title, "Activation value": activation_value})
|
394 |
+
|
395 |
+
# df_top_abstracts = pd.DataFrame(df_data)
|
396 |
+
|
397 |
+
# # Activation value distribution
|
398 |
+
# topk_indices = subject_data[subject]['topk_indices']
|
399 |
+
# topk_values = subject_data[subject]['topk_values']
|
400 |
+
|
401 |
+
# activation_values = np.where(topk_indices == index, topk_values, 0).max(axis=1)
|
402 |
+
# fig2 = px.histogram(x=activation_values, nbins=50)
|
403 |
+
# fig2.update_layout(
|
404 |
+
# #title=f'{feature["label"]}',
|
405 |
+
# xaxis_title='Activation value',
|
406 |
+
# yaxis_title=None,
|
407 |
+
# yaxis_type='log',
|
408 |
+
# height=220,
|
409 |
+
# )
|
410 |
+
|
411 |
+
# # Correlated features
|
412 |
+
# decoder = subject_data[subject]['decoder']
|
413 |
+
# feature_vector = decoder[:, index]
|
414 |
+
# decoder_without_feature = np.delete(decoder, index, axis=1)
|
415 |
+
# cosine_similarities = np.dot(feature_vector, decoder_without_feature) / (np.linalg.norm(decoder_without_feature, axis=0) * np.linalg.norm(feature_vector))
|
416 |
+
|
417 |
+
# topk = 5
|
418 |
+
# topk_indices_cosine = np.argsort(-cosine_similarities)[:topk]
|
419 |
+
# topk_values_cosine = cosine_similarities[topk_indices_cosine]
|
420 |
+
|
421 |
+
# # Create dataframe for top 5 correlated features
|
422 |
+
# df_top_correlated = pd.DataFrame({
|
423 |
+
# "Feature": [get_feature_from_index(subject, i)['label'] for i in topk_indices_cosine],
|
424 |
+
# "Cosine similarity": [f"{v:.4f}" for v in topk_values_cosine]
|
425 |
+
# })
|
426 |
+
# df_top_correlated_styled = style_dataframe(df_top_correlated, is_top=True)
|
427 |
+
|
428 |
+
# bottomk = 5
|
429 |
+
# bottomk_indices_cosine = np.argsort(cosine_similarities)[:bottomk]
|
430 |
+
# bottomk_values_cosine = cosine_similarities[bottomk_indices_cosine]
|
431 |
+
|
432 |
+
# # Create dataframe for bottom 5 correlated features
|
433 |
+
# df_bottom_correlated = pd.DataFrame({
|
434 |
+
# "Feature": [get_feature_from_index(subject, i)['label'] for i in bottomk_indices_cosine],
|
435 |
+
# "Cosine similarity": [f"{v:.4f}" for v in bottomk_values_cosine]
|
436 |
+
# })
|
437 |
+
# df_bottom_correlated_styled = style_dataframe(df_bottom_correlated, is_top=False)
|
438 |
+
|
439 |
+
# # Co-occurrences
|
440 |
+
# co_occurrences = calculate_co_occurrences(subject, index)
|
441 |
+
# topk = 5
|
442 |
+
# topk_indices_co_occurrence = np.argsort(-co_occurrences)[:topk]
|
443 |
+
# topk_values_co_occurrence = co_occurrences[topk_indices_co_occurrence]
|
444 |
+
|
445 |
+
# # Create dataframe for top 5 co-occurring features
|
446 |
+
# df_co_occurrences = pd.DataFrame({
|
447 |
+
# "Feature": [get_feature_from_index(subject, i)['label'] for i in topk_indices_co_occurrence],
|
448 |
+
# "Co-occurrences": topk_values_co_occurrence
|
449 |
+
# })
|
450 |
+
|
451 |
+
# #return output, df_top_abstracts, df_top_correlated_styled, df_bottom_correlated_styled, df_co_occurrences, fig2
|
452 |
+
# return output, df_top_abstracts, df_top_correlated_styled, df_bottom_correlated_styled, df_co_occurrences, fig2
|
453 |
|
454 |
# Modify the main interface function
|
455 |
def create_interface():
|
|
|
504 |
manually_added_features_state = gr.State([])
|
505 |
|
506 |
def update_search_results(feature_values, feature_indices, manually_added_features, current_subject):
|
507 |
+
# ae = subject_data[current_subject]['ae']
|
508 |
+
# abstract_embeddings = subject_data[current_subject]['abstract_embeddings']
|
509 |
+
# abstract_texts = subject_data[current_subject]['abstract_texts']
|
510 |
+
# df_metadata = subject_data[current_subject]['df_metadata']
|
511 |
ae = subject_data[current_subject]['ae']
|
512 |
abstract_embeddings = subject_data[current_subject]['abstract_embeddings']
|
513 |
abstract_texts = subject_data[current_subject]['abstract_texts']
|
514 |
df_metadata = subject_data[current_subject]['df_metadata']
|
515 |
|
516 |
+
|
517 |
# Combine manually added features with query-generated features
|
518 |
all_indices = []
|
519 |
all_values = []
|
|
|
543 |
doc_ids = abstract_texts['doc_ids']
|
544 |
topk_doc_ids = [doc_ids[i] for i in topk_indices_search]
|
545 |
|
546 |
+
# # Prepare search results
|
547 |
+
# search_results = []
|
548 |
+
# for doc_id in topk_doc_ids:
|
549 |
+
# metadata = df_metadata[df_metadata['arxiv_id'] == doc_id].iloc[0]
|
550 |
+
# title = metadata['title'].replace('[', '').replace(']', '')
|
551 |
+
# # Remove single quotes from title
|
552 |
+
# title = title.replace("'", "")
|
553 |
+
|
554 |
+
# url_id = doc_id.replace('_arXiv.txt', '')
|
555 |
+
# if 'astro-ph' in url_id:
|
556 |
+
# url_id = url_id.split('astro-ph')[1]
|
557 |
+
# url = f"https://arxiv.org/abs/astro-ph/{url_id}"
|
558 |
+
# else:
|
559 |
+
# # Create the clickable link based on the doc_id
|
560 |
+
# if '.' in doc_id:
|
561 |
+
# url = f"https://arxiv.org/abs/{doc_id.replace('_arXiv.txt', '')}"
|
562 |
+
# else:
|
563 |
+
# url = f"https://arxiv.org/abs/hep-ph/{doc_id.replace('_arXiv.txt', '')}"
|
564 |
+
|
565 |
+
# linked_title = f"[{title}]({url})"
|
566 |
+
|
567 |
+
# search_results.append([
|
568 |
+
# linked_title,
|
569 |
+
# int(metadata['citation_count']),
|
570 |
+
# int(metadata['year'])
|
571 |
+
# ])
|
572 |
+
|
573 |
+
# return search_results, all_values, all_indices
|
574 |
+
|
575 |
# Prepare search results
|
576 |
search_results = []
|
577 |
for doc_id in topk_doc_ids:
|
578 |
metadata = df_metadata[df_metadata['arxiv_id'] == doc_id].iloc[0]
|
579 |
title = metadata['title'].replace('[', '').replace(']', '')
|
|
|
580 |
title = title.replace("'", "")
|
581 |
|
582 |
url_id = doc_id.replace('_arXiv.txt', '')
|
|
|
584 |
url_id = url_id.split('astro-ph')[1]
|
585 |
url = f"https://arxiv.org/abs/astro-ph/{url_id}"
|
586 |
else:
|
|
|
587 |
if '.' in doc_id:
|
588 |
url = f"https://arxiv.org/abs/{doc_id.replace('_arXiv.txt', '')}"
|
589 |
else:
|
|
|
597 |
int(metadata['year'])
|
598 |
])
|
599 |
|
600 |
+
# Convert search_results to a DataFrame and apply styling
|
601 |
+
df_search_results = pd.DataFrame(search_results, columns=["Title", "Citation Count", "Year"])
|
602 |
+
styled_search_results = df_search_results.style.format({
|
603 |
+
"Citation Count": "{:.0f}", # Keep as integer
|
604 |
+
"Year": "{:.0f}" # Keep as integer
|
605 |
+
})
|
606 |
+
|
607 |
+
return styled_search_results, all_values, all_indices
|
608 |
|
609 |
@gr.render(inputs=[input_text, search_results_state, feature_values_state, feature_indices_state, manually_added_features_state, subject])
|
610 |
def show_components(text, search_results, feature_values, feature_indices, manually_added_features, current_subject):
|
|
|
627 |
|
628 |
with gr.Row():
|
629 |
with gr.Column(scale=2):
|
|
|
|
|
|
|
|
|
|
|
630 |
df = gr.Dataframe(
|
631 |
headers=["Title", "Citation Count", "Year"],
|
632 |
value=search_results,
|
|
|
666 |
with gr.Column(scale=1):
|
667 |
update_button = gr.Button("Update Results")
|
668 |
sliders = []
|
669 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
670 |
for i, (value, index) in enumerate(zip(feature_values, feature_indices)):
|
671 |
feature = next((f for f in subject_data[current_subject]['feature_analysis'] if f['index'] == index), None)
|
672 |
label = f"{feature['label']} ({index})" if feature else f"Feature {index}"
|
|
|
684 |
|
685 |
sliders.append(slider)
|
686 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
687 |
def on_slider_change(*values):
|
688 |
manually_added_features = values[-1]
|
689 |
slider_values = list(values[:-1])
|
|
|
713 |
visualize_button = gr.Button("Visualize Feature")
|
714 |
|
715 |
feature_info = gr.Markdown()
|
716 |
+
# abstracts_heading = gr.Markdown("## Top 5 Abstracts")
|
717 |
+
# top_abstracts = gr.Dataframe(
|
718 |
+
# headers=["Title", "Activation value"],
|
719 |
+
# interactive=False
|
720 |
+
# )
|
721 |
+
|
722 |
abstracts_heading = gr.Markdown("## Top 5 Abstracts")
|
723 |
top_abstracts = gr.Dataframe(
|
724 |
headers=["Title", "Activation value"],
|
725 |
+
datatype=["markdown", "number"],
|
726 |
+
interactive=False,
|
727 |
+
wrap=True
|
728 |
)
|
729 |
|
730 |
gr.Markdown("## Correlated Features")
|