Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ import datasets
|
|
5 |
import pandas as pd
|
6 |
import gradio as gr
|
7 |
|
|
|
8 |
from datetime import datetime, timedelta
|
9 |
from datasets import Dataset
|
10 |
from huggingface_hub import HfApi
|
@@ -24,6 +25,15 @@ from constants.js import UPDATE_SEARCH_RESULTS, UPDATE_IF_TYPE
|
|
24 |
|
25 |
from apscheduler.schedulers.background import BackgroundScheduler
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
gemini_api_key = os.getenv("GEMINI_API_KEY")
|
28 |
hf_token = os.getenv("HF_TOKEN")
|
29 |
|
@@ -40,7 +50,42 @@ requested_arxiv_ids_df = pd.DataFrame({'Requested arXiv IDs': requested_arxiv_id
|
|
40 |
|
41 |
title2qna = {}
|
42 |
date2qna = {}
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
def filter_function(example, ids):
|
46 |
ids_e = example['Requested arXiv IDs']
|
@@ -197,54 +242,38 @@ def add_arxiv_ids_to_queue(queue, arxiv_ids_str):
|
|
197 |
else:
|
198 |
gr.Warning(f"No valid arXiv IDs found...")
|
199 |
|
200 |
-
return
|
201 |
-
|
202 |
-
|
203 |
-
count = 0
|
204 |
-
|
205 |
-
for _, (k, v) in enumerate(data.items()):
|
206 |
-
if v is None:
|
207 |
-
count = count + 1
|
208 |
-
|
209 |
-
return count
|
210 |
-
|
211 |
-
for data in ds["train"]:
|
212 |
-
date = data["target_date"].strftime("%Y-%m-%d")
|
213 |
-
|
214 |
-
if date in date2qna:
|
215 |
-
papers = copy.deepcopy(date2qna[date])
|
216 |
-
for paper in papers:
|
217 |
-
if paper["title"] == data["title"]:
|
218 |
-
if count_nans(paper) > count_nans(data):
|
219 |
-
date2qna[date].remove(paper)
|
220 |
-
|
221 |
-
date2qna[date].append(data)
|
222 |
-
del papers
|
223 |
-
else:
|
224 |
-
date2qna[date] = [data]
|
225 |
-
|
226 |
-
for date in date2qna:
|
227 |
-
papers = date2qna[date]
|
228 |
-
for paper in papers:
|
229 |
-
title2qna[paper["title"]] = paper
|
230 |
-
|
231 |
-
titles = title2qna.keys()
|
232 |
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
237 |
|
238 |
-
def
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
|
|
|
|
243 |
)
|
244 |
|
245 |
-
def
|
|
|
|
|
|
|
|
|
246 |
selected_paper = None
|
247 |
-
for paper in
|
248 |
if paper["title"] == paper_title:
|
249 |
selected_paper = paper
|
250 |
break
|
@@ -331,11 +360,11 @@ def search(search_in, max_results=3):
|
|
331 |
|
332 |
def set_date(title):
|
333 |
paper = title2qna[title]
|
334 |
-
|
335 |
-
return
|
336 |
|
337 |
-
def set_papers(
|
338 |
-
papers = [paper["title"] for paper in
|
339 |
return (
|
340 |
gr.Dropdown(choices=papers, value=title),
|
341 |
gr.Textbox("")
|
@@ -346,13 +375,28 @@ with gr.Blocks(css=STYLE, theme=gr.themes.Soft()) as demo:
|
|
346 |
|
347 |
with gr.Column(elem_id="control-panel", elem_classes=["group"]):
|
348 |
with gr.Row():
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
356 |
papers_dd = gr.Dropdown(
|
357 |
[paper["title"] for paper in last_papers],
|
358 |
value=selected_paper["title"],
|
@@ -459,7 +503,7 @@ with gr.Blocks(css=STYLE, theme=gr.themes.Soft()) as demo:
|
|
459 |
arxiv_id_enter.submit(
|
460 |
add_arxiv_ids_to_queue,
|
461 |
[arxiv_queue, arxiv_id_enter],
|
462 |
-
arxiv_queue
|
463 |
)
|
464 |
|
465 |
|
@@ -468,71 +512,121 @@ with gr.Blocks(css=STYLE, theme=gr.themes.Soft()) as demo:
|
|
468 |
"If you are curious how it is done, visit the [Auto Paper Q&A Generation project repository](https://github.com/deep-diver/auto-paper-analysis) "
|
469 |
"Also, the generated dataset is hosted on Hugging Face π€ Dataset repository as well([Link](https://huggingface.co/datasets/chansung/auto-paper-qa2)). ")
|
470 |
|
471 |
-
search_r1.click(set_date, search_r1,
|
472 |
set_papers,
|
473 |
-
inputs=[
|
474 |
outputs=[papers_dd, search_in]
|
475 |
)
|
476 |
|
477 |
-
search_r2.click(set_date, search_r2,
|
478 |
set_papers,
|
479 |
-
inputs=[
|
480 |
outputs=[papers_dd, search_in]
|
481 |
)
|
482 |
|
483 |
-
search_r3.click(set_date, search_r3,
|
484 |
set_papers,
|
485 |
-
inputs=[
|
486 |
outputs=[papers_dd, search_in]
|
487 |
)
|
488 |
|
489 |
-
search_r4.click(set_date, search_r4,
|
490 |
set_papers,
|
491 |
-
inputs=[
|
492 |
outputs=[papers_dd, search_in]
|
493 |
)
|
494 |
|
495 |
-
search_r5.click(set_date, search_r5,
|
496 |
set_papers,
|
497 |
-
inputs=[
|
498 |
outputs=[papers_dd, search_in]
|
499 |
)
|
500 |
|
501 |
-
search_r6.click(set_date, search_r6,
|
502 |
set_papers,
|
503 |
-
inputs=[
|
504 |
outputs=[papers_dd, search_in]
|
505 |
)
|
506 |
|
507 |
-
search_r7.click(set_date, search_r7,
|
508 |
set_papers,
|
509 |
-
inputs=[
|
510 |
outputs=[papers_dd, search_in]
|
511 |
)
|
512 |
|
513 |
-
search_r8.click(set_date, search_r8,
|
514 |
set_papers,
|
515 |
-
inputs=[
|
516 |
outputs=[papers_dd, search_in]
|
517 |
)
|
518 |
|
519 |
-
search_r9.click(set_date, search_r9,
|
520 |
set_papers,
|
521 |
-
inputs=[
|
522 |
outputs=[papers_dd, search_in]
|
523 |
)
|
524 |
|
525 |
-
search_r10.click(set_date, search_r10,
|
526 |
set_papers,
|
527 |
-
inputs=[
|
528 |
outputs=[papers_dd, search_in]
|
529 |
)
|
530 |
|
531 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
532 |
set_paper,
|
533 |
-
[
|
534 |
[
|
535 |
-
title,
|
536 |
basic_q_0, basic_q_eli5_0, basic_q_expert_0,
|
537 |
depth_q_0, depth_q_eli5_0, depth_q_expert_0,
|
538 |
breath_q_0, breath_q_eli5_0, breath_q_expert_0,
|
@@ -549,9 +643,9 @@ with gr.Blocks(css=STYLE, theme=gr.themes.Soft()) as demo:
|
|
549 |
|
550 |
papers_dd.change(
|
551 |
set_paper,
|
552 |
-
[
|
553 |
[
|
554 |
-
title,
|
555 |
basic_q_0, basic_q_eli5_0, basic_q_expert_0,
|
556 |
depth_q_0, depth_q_eli5_0, depth_q_expert_0,
|
557 |
breath_q_0, breath_q_eli5_0, breath_q_expert_0,
|
|
|
5 |
import pandas as pd
|
6 |
import gradio as gr
|
7 |
|
8 |
+
from collections import defaultdict
|
9 |
from datetime import datetime, timedelta
|
10 |
from datasets import Dataset
|
11 |
from huggingface_hub import HfApi
|
|
|
25 |
|
26 |
from apscheduler.schedulers.background import BackgroundScheduler
|
27 |
|
28 |
+
def count_nans(row):
|
29 |
+
count = 0
|
30 |
+
|
31 |
+
for _, (k, v) in enumerate(data.items()):
|
32 |
+
if v is None:
|
33 |
+
count = count + 1
|
34 |
+
|
35 |
+
return count
|
36 |
+
|
37 |
gemini_api_key = os.getenv("GEMINI_API_KEY")
|
38 |
hf_token = os.getenv("HF_TOKEN")
|
39 |
|
|
|
50 |
|
51 |
title2qna = {}
|
52 |
date2qna = {}
|
53 |
+
date_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
|
54 |
+
|
55 |
+
for data in ds["train"]:
|
56 |
+
date = data["target_date"].strftime("%Y-%m-%d")
|
57 |
+
|
58 |
+
if date in date2qna:
|
59 |
+
papers = copy.deepcopy(date2qna[date])
|
60 |
+
for paper in papers:
|
61 |
+
if paper["title"] == data["title"]:
|
62 |
+
if count_nans(paper) > count_nans(data):
|
63 |
+
date2qna[date].remove(paper)
|
64 |
+
|
65 |
+
date2qna[date].append(data)
|
66 |
+
del papers
|
67 |
+
else:
|
68 |
+
date2qna[date] = [data]
|
69 |
+
|
70 |
+
for date in date2qna:
|
71 |
+
year, month, day = date.split("-")
|
72 |
+
papers = date2qna[date]
|
73 |
+
for paper in papers:
|
74 |
+
title2qna[paper["title"]] = paper
|
75 |
+
date_dict[year][month][day].append(paper)
|
76 |
+
|
77 |
+
titles = title2qna.keys()
|
78 |
+
|
79 |
+
sorted_dates = sorted(date2qna.keys())
|
80 |
+
|
81 |
+
sorted_year = sorted(date_dict.keys())
|
82 |
+
last_year = sorted_year[-1]
|
83 |
+
sorted_month = sorted(date_dict[last_year].keys())
|
84 |
+
last_month = sorted_month[-1]
|
85 |
+
sorted_day = sorted(date_dict[last_year][last_month].keys())
|
86 |
+
last_day = sorted_day[-1]
|
87 |
+
last_papers = date_dict[last_year][last_month][last_date]
|
88 |
+
selected_paper = last_papers[0]
|
89 |
|
90 |
def filter_function(example, ids):
|
91 |
ids_e = example['Requested arXiv IDs']
|
|
|
242 |
else:
|
243 |
gr.Warning(f"No valid arXiv IDs found...")
|
244 |
|
245 |
+
return (
|
246 |
+
queue, gr.Textbox("")
|
247 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
248 |
|
249 |
+
def get_paper_by_year(y):
|
250 |
+
m = sorted(date_dict[y].keys())
|
251 |
+
last_m = m[-1]
|
252 |
+
d = sorted(date_dict[y][last_m].keys())
|
253 |
+
last_d = d[-1]
|
254 |
+
papers = [paper["title"] for paper in date_dict[y][last_m][last_d]]
|
255 |
+
return (
|
256 |
+
gr.Dropdown(choices=m, value=last_m),
|
257 |
+
gr.Dropdown(choices=d, value=last_d),
|
258 |
+
gr.Dropdown(choices=papers, value=papers[0])
|
259 |
+
)
|
260 |
|
261 |
+
def get_paper_by_month(y, m):
|
262 |
+
d = sorted(date_dict[y][m].keys())
|
263 |
+
last_d = d[-1]
|
264 |
+
papers = [paper["title"] for paper in date_dict[y][m][last_d]]
|
265 |
+
return (
|
266 |
+
gr.Dropdown(choices=d, value=last_d),
|
267 |
+
gr.Dropdown(choices=papers, value=papers[0])
|
268 |
)
|
269 |
|
270 |
+
def get_paper_by_day(y, m, d):
|
271 |
+
papers = [paper["title"] for paper in date_dict[y][m][d]]
|
272 |
+
return gr.Dropdown(choices=papers, value=papers[0])
|
273 |
+
|
274 |
+
def set_paper(y, m, d, paper_title):
|
275 |
selected_paper = None
|
276 |
+
for paper in date_dict[y][m][d]:
|
277 |
if paper["title"] == paper_title:
|
278 |
selected_paper = paper
|
279 |
break
|
|
|
360 |
|
361 |
def set_date(title):
|
362 |
paper = title2qna[title]
|
363 |
+
year, month, day = paper["target_date"].strftime("%Y-%m-%d").split("-")
|
364 |
+
return year, month, day
|
365 |
|
366 |
+
def set_papers(y, m, d, title):
|
367 |
+
papers = [paper["title"] for paper in date_dict[y][m][d]]
|
368 |
return (
|
369 |
gr.Dropdown(choices=papers, value=title),
|
370 |
gr.Textbox("")
|
|
|
375 |
|
376 |
with gr.Column(elem_id="control-panel", elem_classes=["group"]):
|
377 |
with gr.Row():
|
378 |
+
with gr.Row():
|
379 |
+
year_dd = gr.Dropdown(
|
380 |
+
sorted_year,
|
381 |
+
value=last_year,
|
382 |
+
label="Year",
|
383 |
+
interactive=True,
|
384 |
+
)
|
385 |
+
|
386 |
+
month_dd = gr.Dropdown(
|
387 |
+
sorted_month,
|
388 |
+
value=last_month,
|
389 |
+
label="Month",
|
390 |
+
interactive=True,
|
391 |
+
)
|
392 |
+
|
393 |
+
day_dd = gr.Dropdown(
|
394 |
+
sorted_day,
|
395 |
+
value=last_day,
|
396 |
+
label="Day",
|
397 |
+
interactive=True,
|
398 |
+
)
|
399 |
+
|
400 |
papers_dd = gr.Dropdown(
|
401 |
[paper["title"] for paper in last_papers],
|
402 |
value=selected_paper["title"],
|
|
|
503 |
arxiv_id_enter.submit(
|
504 |
add_arxiv_ids_to_queue,
|
505 |
[arxiv_queue, arxiv_id_enter],
|
506 |
+
[arxiv_queue, arxiv_id_enter]
|
507 |
)
|
508 |
|
509 |
|
|
|
512 |
"If you are curious how it is done, visit the [Auto Paper Q&A Generation project repository](https://github.com/deep-diver/auto-paper-analysis) "
|
513 |
"Also, the generated dataset is hosted on Hugging Face π€ Dataset repository as well([Link](https://huggingface.co/datasets/chansung/auto-paper-qa2)). ")
|
514 |
|
515 |
+
search_r1.click(set_date, search_r1, [year_dd, month_dd, day_dd]).then(
|
516 |
set_papers,
|
517 |
+
inputs=[year_dd, month_dd, day_dd, search_r1],
|
518 |
outputs=[papers_dd, search_in]
|
519 |
)
|
520 |
|
521 |
+
search_r2.click(set_date, search_r2, [year_dd, month_dd, day_dd]).then(
|
522 |
set_papers,
|
523 |
+
inputs=[year_dd, month_dd, day_dd, search_r2],
|
524 |
outputs=[papers_dd, search_in]
|
525 |
)
|
526 |
|
527 |
+
search_r3.click(set_date, search_r3, [year_dd, month_dd, day_dd]).then(
|
528 |
set_papers,
|
529 |
+
inputs=[year_dd, month_dd, day_dd, search_r3],
|
530 |
outputs=[papers_dd, search_in]
|
531 |
)
|
532 |
|
533 |
+
search_r4.click(set_date, search_r4, [year_dd, month_dd, day_dd]).then(
|
534 |
set_papers,
|
535 |
+
inputs=[year_dd, month_dd, day_dd, search_r4],
|
536 |
outputs=[papers_dd, search_in]
|
537 |
)
|
538 |
|
539 |
+
search_r5.click(set_date, search_r5, [year_dd, month_dd, day_dd]).then(
|
540 |
set_papers,
|
541 |
+
inputs=[year_dd, month_dd, day_dd, search_r5],
|
542 |
outputs=[papers_dd, search_in]
|
543 |
)
|
544 |
|
545 |
+
search_r6.click(set_date, search_r6, [year_dd, month_dd, day_dd]).then(
|
546 |
set_papers,
|
547 |
+
inputs=[year_dd, month_dd, day_dd, search_r6],
|
548 |
outputs=[papers_dd, search_in]
|
549 |
)
|
550 |
|
551 |
+
search_r7.click(set_date, search_r7, [year_dd, month_dd, day_dd]).then(
|
552 |
set_papers,
|
553 |
+
inputs=[year_dd, month_dd, day_dd, search_r7],
|
554 |
outputs=[papers_dd, search_in]
|
555 |
)
|
556 |
|
557 |
+
search_r8.click(set_date, search_r8, [year_dd, month_dd, day_dd]).then(
|
558 |
set_papers,
|
559 |
+
inputs=[year_dd, month_dd, day_dd, search_r8],
|
560 |
outputs=[papers_dd, search_in]
|
561 |
)
|
562 |
|
563 |
+
search_r9.click(set_date, search_r9, [year_dd, month_dd, day_dd]).then(
|
564 |
set_papers,
|
565 |
+
inputs=[year_dd, month_dd, day_dd, search_r9],
|
566 |
outputs=[papers_dd, search_in]
|
567 |
)
|
568 |
|
569 |
+
search_r10.click(set_date, search_r10, [year_dd, month_dd, day_dd]).then(
|
570 |
set_papers,
|
571 |
+
inputs=[year_dd, month_dd, day_dd, search_r10],
|
572 |
outputs=[papers_dd, search_in]
|
573 |
)
|
574 |
|
575 |
+
year_dd.input(
|
576 |
+
get_paper_by_year,
|
577 |
+
inputs=[year_dd],
|
578 |
+
outputs=[month_dd, day_dd, papers_dd]
|
579 |
+
).then(
|
580 |
+
set_paper,
|
581 |
+
[year_dd, month_dd, day_dd, papers_dd],
|
582 |
+
[
|
583 |
+
title, summary,
|
584 |
+
basic_q_0, basic_q_eli5_0, basic_q_expert_0,
|
585 |
+
depth_q_0, depth_q_eli5_0, depth_q_expert_0,
|
586 |
+
breath_q_0, breath_q_eli5_0, breath_q_expert_0,
|
587 |
+
|
588 |
+
basic_q_1, basic_q_eli5_1, basic_q_expert_1,
|
589 |
+
depth_q_1, depth_q_eli5_1, depth_q_expert_1,
|
590 |
+
breath_q_1, breath_q_eli5_1, breath_q_expert_1,
|
591 |
+
|
592 |
+
basic_q_2, basic_q_eli5_2, basic_q_expert_2,
|
593 |
+
depth_q_2, depth_q_eli5_2, depth_q_expert_2,
|
594 |
+
breath_q_2, breath_q_eli5_2, breath_q_expert_2
|
595 |
+
]
|
596 |
+
)
|
597 |
+
|
598 |
+
month_dd.input(
|
599 |
+
get_paper_by_month,
|
600 |
+
inputs=[year_dd, month_dd],
|
601 |
+
outputs=[day_dd, papers_dd]
|
602 |
+
).then(
|
603 |
+
set_paper,
|
604 |
+
[year_dd, month_dd, day_dd, papers_dd],
|
605 |
+
[
|
606 |
+
title, summary,
|
607 |
+
basic_q_0, basic_q_eli5_0, basic_q_expert_0,
|
608 |
+
depth_q_0, depth_q_eli5_0, depth_q_expert_0,
|
609 |
+
breath_q_0, breath_q_eli5_0, breath_q_expert_0,
|
610 |
+
|
611 |
+
basic_q_1, basic_q_eli5_1, basic_q_expert_1,
|
612 |
+
depth_q_1, depth_q_eli5_1, depth_q_expert_1,
|
613 |
+
breath_q_1, breath_q_eli5_1, breath_q_expert_1,
|
614 |
+
|
615 |
+
basic_q_2, basic_q_eli5_2, basic_q_expert_2,
|
616 |
+
depth_q_2, depth_q_eli5_2, depth_q_expert_2,
|
617 |
+
breath_q_2, breath_q_eli5_2, breath_q_expert_2
|
618 |
+
]
|
619 |
+
)
|
620 |
+
|
621 |
+
day_dd.input(
|
622 |
+
get_paper_by_day,
|
623 |
+
inputs=[year_dd, month_dd, day_dd],
|
624 |
+
outputs=[papers_dd]
|
625 |
+
).then(
|
626 |
set_paper,
|
627 |
+
[year_dd, month_dd, day_dd, papers_dd],
|
628 |
[
|
629 |
+
title, summary,
|
630 |
basic_q_0, basic_q_eli5_0, basic_q_expert_0,
|
631 |
depth_q_0, depth_q_eli5_0, depth_q_expert_0,
|
632 |
breath_q_0, breath_q_eli5_0, breath_q_expert_0,
|
|
|
643 |
|
644 |
papers_dd.change(
|
645 |
set_paper,
|
646 |
+
[year_dd, month_dd, day_dd, papers_dd],
|
647 |
[
|
648 |
+
title, summary,
|
649 |
basic_q_0, basic_q_eli5_0, basic_q_expert_0,
|
650 |
depth_q_0, depth_q_eli5_0, depth_q_expert_0,
|
651 |
breath_q_0, breath_q_eli5_0, breath_q_expert_0,
|