chansung commited on
Commit
4572f9c
β€’
1 Parent(s): f4902e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +175 -81
app.py CHANGED
@@ -5,6 +5,7 @@ import datasets
5
  import pandas as pd
6
  import gradio as gr
7
 
 
8
  from datetime import datetime, timedelta
9
  from datasets import Dataset
10
  from huggingface_hub import HfApi
@@ -24,6 +25,15 @@ from constants.js import UPDATE_SEARCH_RESULTS, UPDATE_IF_TYPE
24
 
25
  from apscheduler.schedulers.background import BackgroundScheduler
26
 
 
 
 
 
 
 
 
 
 
27
  gemini_api_key = os.getenv("GEMINI_API_KEY")
28
  hf_token = os.getenv("HF_TOKEN")
29
 
@@ -40,7 +50,42 @@ requested_arxiv_ids_df = pd.DataFrame({'Requested arXiv IDs': requested_arxiv_id
40
 
41
  title2qna = {}
42
  date2qna = {}
43
- longest_qans = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  def filter_function(example, ids):
46
  ids_e = example['Requested arXiv IDs']
@@ -197,54 +242,38 @@ def add_arxiv_ids_to_queue(queue, arxiv_ids_str):
197
  else:
198
  gr.Warning(f"No valid arXiv IDs found...")
199
 
200
- return queue
201
-
202
- def count_nans(row):
203
- count = 0
204
-
205
- for _, (k, v) in enumerate(data.items()):
206
- if v is None:
207
- count = count + 1
208
-
209
- return count
210
-
211
- for data in ds["train"]:
212
- date = data["target_date"].strftime("%Y-%m-%d")
213
-
214
- if date in date2qna:
215
- papers = copy.deepcopy(date2qna[date])
216
- for paper in papers:
217
- if paper["title"] == data["title"]:
218
- if count_nans(paper) > count_nans(data):
219
- date2qna[date].remove(paper)
220
-
221
- date2qna[date].append(data)
222
- del papers
223
- else:
224
- date2qna[date] = [data]
225
-
226
- for date in date2qna:
227
- papers = date2qna[date]
228
- for paper in papers:
229
- title2qna[paper["title"]] = paper
230
-
231
- titles = title2qna.keys()
232
 
233
- sorted_dates = sorted(date2qna.keys())
234
- last_date = sorted_dates[-1]
235
- last_papers = date2qna[last_date]
236
- selected_paper = last_papers[0]
 
 
 
 
 
 
 
237
 
238
- def get_papers(date):
239
- papers = [paper["title"] for paper in date2qna[date]]
240
- return gr.Dropdown(
241
- papers,
242
- value=papers[0]
 
 
243
  )
244
 
245
- def set_paper(date, paper_title):
 
 
 
 
246
  selected_paper = None
247
- for paper in date2qna[date]:
248
  if paper["title"] == paper_title:
249
  selected_paper = paper
250
  break
@@ -331,11 +360,11 @@ def search(search_in, max_results=3):
331
 
332
  def set_date(title):
333
  paper = title2qna[title]
334
- date = paper["target_date"].strftime("%Y-%m-%d")
335
- return date
336
 
337
- def set_papers(date, title):
338
- papers = [paper["title"] for paper in date2qna[date]]
339
  return (
340
  gr.Dropdown(choices=papers, value=title),
341
  gr.Textbox("")
@@ -346,13 +375,28 @@ with gr.Blocks(css=STYLE, theme=gr.themes.Soft()) as demo:
346
 
347
  with gr.Column(elem_id="control-panel", elem_classes=["group"]):
348
  with gr.Row():
349
- date_dd = gr.Dropdown(
350
- sorted_dates,
351
- value=last_date,
352
- label="Select date",
353
- interactive=True,
354
- scale=3, filterable=False
355
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
  papers_dd = gr.Dropdown(
357
  [paper["title"] for paper in last_papers],
358
  value=selected_paper["title"],
@@ -459,7 +503,7 @@ with gr.Blocks(css=STYLE, theme=gr.themes.Soft()) as demo:
459
  arxiv_id_enter.submit(
460
  add_arxiv_ids_to_queue,
461
  [arxiv_queue, arxiv_id_enter],
462
- arxiv_queue
463
  )
464
 
465
 
@@ -468,71 +512,121 @@ with gr.Blocks(css=STYLE, theme=gr.themes.Soft()) as demo:
468
  "If you are curious how it is done, visit the [Auto Paper Q&A Generation project repository](https://github.com/deep-diver/auto-paper-analysis) "
469
  "Also, the generated dataset is hosted on Hugging Face πŸ€— Dataset repository as well([Link](https://huggingface.co/datasets/chansung/auto-paper-qa2)). ")
470
 
471
- search_r1.click(set_date, search_r1, date_dd).then(
472
  set_papers,
473
- inputs=[date_dd, search_r1],
474
  outputs=[papers_dd, search_in]
475
  )
476
 
477
- search_r2.click(set_date, search_r2, date_dd).then(
478
  set_papers,
479
- inputs=[date_dd, search_r2],
480
  outputs=[papers_dd, search_in]
481
  )
482
 
483
- search_r3.click(set_date, search_r3, date_dd).then(
484
  set_papers,
485
- inputs=[date_dd, search_r3],
486
  outputs=[papers_dd, search_in]
487
  )
488
 
489
- search_r4.click(set_date, search_r4, date_dd).then(
490
  set_papers,
491
- inputs=[date_dd, search_r4],
492
  outputs=[papers_dd, search_in]
493
  )
494
 
495
- search_r5.click(set_date, search_r5, date_dd).then(
496
  set_papers,
497
- inputs=[date_dd, search_r5],
498
  outputs=[papers_dd, search_in]
499
  )
500
 
501
- search_r6.click(set_date, search_r6, date_dd).then(
502
  set_papers,
503
- inputs=[date_dd, search_r6],
504
  outputs=[papers_dd, search_in]
505
  )
506
 
507
- search_r7.click(set_date, search_r7, date_dd).then(
508
  set_papers,
509
- inputs=[date_dd, search_r7],
510
  outputs=[papers_dd, search_in]
511
  )
512
 
513
- search_r8.click(set_date, search_r8, date_dd).then(
514
  set_papers,
515
- inputs=[date_dd, search_r8],
516
  outputs=[papers_dd, search_in]
517
  )
518
 
519
- search_r9.click(set_date, search_r9, date_dd).then(
520
  set_papers,
521
- inputs=[date_dd, search_r9],
522
  outputs=[papers_dd, search_in]
523
  )
524
 
525
- search_r10.click(set_date, search_r10, date_dd).then(
526
  set_papers,
527
- inputs=[date_dd, search_r10],
528
  outputs=[papers_dd, search_in]
529
  )
530
 
531
- date_dd.input(get_papers, date_dd, papers_dd).then(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
532
  set_paper,
533
- [date_dd, papers_dd],
534
  [
535
- title, arxiv_link, hf_paper_link, summary,
536
  basic_q_0, basic_q_eli5_0, basic_q_expert_0,
537
  depth_q_0, depth_q_eli5_0, depth_q_expert_0,
538
  breath_q_0, breath_q_eli5_0, breath_q_expert_0,
@@ -549,9 +643,9 @@ with gr.Blocks(css=STYLE, theme=gr.themes.Soft()) as demo:
549
 
550
  papers_dd.change(
551
  set_paper,
552
- [date_dd, papers_dd],
553
  [
554
- title, arxiv_link, hf_paper_link, summary,
555
  basic_q_0, basic_q_eli5_0, basic_q_expert_0,
556
  depth_q_0, depth_q_eli5_0, depth_q_expert_0,
557
  breath_q_0, breath_q_eli5_0, breath_q_expert_0,
 
5
  import pandas as pd
6
  import gradio as gr
7
 
8
+ from collections import defaultdict
9
  from datetime import datetime, timedelta
10
  from datasets import Dataset
11
  from huggingface_hub import HfApi
 
25
 
26
  from apscheduler.schedulers.background import BackgroundScheduler
27
 
28
+ def count_nans(row):
29
+ count = 0
30
+
31
+ for _, (k, v) in enumerate(data.items()):
32
+ if v is None:
33
+ count = count + 1
34
+
35
+ return count
36
+
37
  gemini_api_key = os.getenv("GEMINI_API_KEY")
38
  hf_token = os.getenv("HF_TOKEN")
39
 
 
50
 
51
  title2qna = {}
52
  date2qna = {}
53
+ date_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
54
+
55
+ for data in ds["train"]:
56
+ date = data["target_date"].strftime("%Y-%m-%d")
57
+
58
+ if date in date2qna:
59
+ papers = copy.deepcopy(date2qna[date])
60
+ for paper in papers:
61
+ if paper["title"] == data["title"]:
62
+ if count_nans(paper) > count_nans(data):
63
+ date2qna[date].remove(paper)
64
+
65
+ date2qna[date].append(data)
66
+ del papers
67
+ else:
68
+ date2qna[date] = [data]
69
+
70
+ for date in date2qna:
71
+ year, month, day = date.split("-")
72
+ papers = date2qna[date]
73
+ for paper in papers:
74
+ title2qna[paper["title"]] = paper
75
+ date_dict[year][month][day].append(paper)
76
+
77
+ titles = title2qna.keys()
78
+
79
+ sorted_dates = sorted(date2qna.keys())
80
+
81
+ sorted_year = sorted(date_dict.keys())
82
+ last_year = sorted_year[-1]
83
+ sorted_month = sorted(date_dict[last_year].keys())
84
+ last_month = sorted_month[-1]
85
+ sorted_day = sorted(date_dict[last_year][last_month].keys())
86
+ last_day = sorted_day[-1]
87
+ last_papers = date_dict[last_year][last_month][last_date]
88
+ selected_paper = last_papers[0]
89
 
90
  def filter_function(example, ids):
91
  ids_e = example['Requested arXiv IDs']
 
242
  else:
243
  gr.Warning(f"No valid arXiv IDs found...")
244
 
245
+ return (
246
+ queue, gr.Textbox("")
247
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
 
249
+ def get_paper_by_year(y):
250
+ m = sorted(date_dict[y].keys())
251
+ last_m = m[-1]
252
+ d = sorted(date_dict[y][last_m].keys())
253
+ last_d = d[-1]
254
+ papers = [paper["title"] for paper in date_dict[y][last_m][last_d]]
255
+ return (
256
+ gr.Dropdown(choices=m, value=last_m),
257
+ gr.Dropdown(choices=d, value=last_d),
258
+ gr.Dropdown(choices=papers, value=papers[0])
259
+ )
260
 
261
+ def get_paper_by_month(y, m):
262
+ d = sorted(date_dict[y][m].keys())
263
+ last_d = d[-1]
264
+ papers = [paper["title"] for paper in date_dict[y][m][last_d]]
265
+ return (
266
+ gr.Dropdown(choices=d, value=last_d),
267
+ gr.Dropdown(choices=papers, value=papers[0])
268
  )
269
 
270
+ def get_paper_by_day(y, m, d):
271
+ papers = [paper["title"] for paper in date_dict[y][m][d]]
272
+ return gr.Dropdown(choices=papers, value=papers[0])
273
+
274
+ def set_paper(y, m, d, paper_title):
275
  selected_paper = None
276
+ for paper in date_dict[y][m][d]:
277
  if paper["title"] == paper_title:
278
  selected_paper = paper
279
  break
 
360
 
361
  def set_date(title):
362
  paper = title2qna[title]
363
+ year, month, day = paper["target_date"].strftime("%Y-%m-%d").split("-")
364
+ return year, month, day
365
 
366
+ def set_papers(y, m, d, title):
367
+ papers = [paper["title"] for paper in date_dict[y][m][d]]
368
  return (
369
  gr.Dropdown(choices=papers, value=title),
370
  gr.Textbox("")
 
375
 
376
  with gr.Column(elem_id="control-panel", elem_classes=["group"]):
377
  with gr.Row():
378
+ with gr.Row():
379
+ year_dd = gr.Dropdown(
380
+ sorted_year,
381
+ value=last_year,
382
+ label="Year",
383
+ interactive=True,
384
+ )
385
+
386
+ month_dd = gr.Dropdown(
387
+ sorted_month,
388
+ value=last_month,
389
+ label="Month",
390
+ interactive=True,
391
+ )
392
+
393
+ day_dd = gr.Dropdown(
394
+ sorted_day,
395
+ value=last_day,
396
+ label="Day",
397
+ interactive=True,
398
+ )
399
+
400
  papers_dd = gr.Dropdown(
401
  [paper["title"] for paper in last_papers],
402
  value=selected_paper["title"],
 
503
  arxiv_id_enter.submit(
504
  add_arxiv_ids_to_queue,
505
  [arxiv_queue, arxiv_id_enter],
506
+ [arxiv_queue, arxiv_id_enter]
507
  )
508
 
509
 
 
512
  "If you are curious how it is done, visit the [Auto Paper Q&A Generation project repository](https://github.com/deep-diver/auto-paper-analysis) "
513
  "Also, the generated dataset is hosted on Hugging Face πŸ€— Dataset repository as well([Link](https://huggingface.co/datasets/chansung/auto-paper-qa2)). ")
514
 
515
+ search_r1.click(set_date, search_r1, [year_dd, month_dd, day_dd]).then(
516
  set_papers,
517
+ inputs=[year_dd, month_dd, day_dd, search_r1],
518
  outputs=[papers_dd, search_in]
519
  )
520
 
521
+ search_r2.click(set_date, search_r2, [year_dd, month_dd, day_dd]).then(
522
  set_papers,
523
+ inputs=[year_dd, month_dd, day_dd, search_r2],
524
  outputs=[papers_dd, search_in]
525
  )
526
 
527
+ search_r3.click(set_date, search_r3, [year_dd, month_dd, day_dd]).then(
528
  set_papers,
529
+ inputs=[year_dd, month_dd, day_dd, search_r3],
530
  outputs=[papers_dd, search_in]
531
  )
532
 
533
+ search_r4.click(set_date, search_r4, [year_dd, month_dd, day_dd]).then(
534
  set_papers,
535
+ inputs=[year_dd, month_dd, day_dd, search_r4],
536
  outputs=[papers_dd, search_in]
537
  )
538
 
539
+ search_r5.click(set_date, search_r5, [year_dd, month_dd, day_dd]).then(
540
  set_papers,
541
+ inputs=[year_dd, month_dd, day_dd, search_r5],
542
  outputs=[papers_dd, search_in]
543
  )
544
 
545
+ search_r6.click(set_date, search_r6, [year_dd, month_dd, day_dd]).then(
546
  set_papers,
547
+ inputs=[year_dd, month_dd, day_dd, search_r6],
548
  outputs=[papers_dd, search_in]
549
  )
550
 
551
+ search_r7.click(set_date, search_r7, [year_dd, month_dd, day_dd]).then(
552
  set_papers,
553
+ inputs=[year_dd, month_dd, day_dd, search_r7],
554
  outputs=[papers_dd, search_in]
555
  )
556
 
557
+ search_r8.click(set_date, search_r8, [year_dd, month_dd, day_dd]).then(
558
  set_papers,
559
+ inputs=[year_dd, month_dd, day_dd, search_r8],
560
  outputs=[papers_dd, search_in]
561
  )
562
 
563
+ search_r9.click(set_date, search_r9, [year_dd, month_dd, day_dd]).then(
564
  set_papers,
565
+ inputs=[year_dd, month_dd, day_dd, search_r9],
566
  outputs=[papers_dd, search_in]
567
  )
568
 
569
+ search_r10.click(set_date, search_r10, [year_dd, month_dd, day_dd]).then(
570
  set_papers,
571
+ inputs=[year_dd, month_dd, day_dd, search_r10],
572
  outputs=[papers_dd, search_in]
573
  )
574
 
575
+ year_dd.input(
576
+ get_paper_by_year,
577
+ inputs=[year_dd],
578
+ outputs=[month_dd, day_dd, papers_dd]
579
+ ).then(
580
+ set_paper,
581
+ [year_dd, month_dd, day_dd, papers_dd],
582
+ [
583
+ title, summary,
584
+ basic_q_0, basic_q_eli5_0, basic_q_expert_0,
585
+ depth_q_0, depth_q_eli5_0, depth_q_expert_0,
586
+ breath_q_0, breath_q_eli5_0, breath_q_expert_0,
587
+
588
+ basic_q_1, basic_q_eli5_1, basic_q_expert_1,
589
+ depth_q_1, depth_q_eli5_1, depth_q_expert_1,
590
+ breath_q_1, breath_q_eli5_1, breath_q_expert_1,
591
+
592
+ basic_q_2, basic_q_eli5_2, basic_q_expert_2,
593
+ depth_q_2, depth_q_eli5_2, depth_q_expert_2,
594
+ breath_q_2, breath_q_eli5_2, breath_q_expert_2
595
+ ]
596
+ )
597
+
598
+ month_dd.input(
599
+ get_paper_by_month,
600
+ inputs=[year_dd, month_dd],
601
+ outputs=[day_dd, papers_dd]
602
+ ).then(
603
+ set_paper,
604
+ [year_dd, month_dd, day_dd, papers_dd],
605
+ [
606
+ title, summary,
607
+ basic_q_0, basic_q_eli5_0, basic_q_expert_0,
608
+ depth_q_0, depth_q_eli5_0, depth_q_expert_0,
609
+ breath_q_0, breath_q_eli5_0, breath_q_expert_0,
610
+
611
+ basic_q_1, basic_q_eli5_1, basic_q_expert_1,
612
+ depth_q_1, depth_q_eli5_1, depth_q_expert_1,
613
+ breath_q_1, breath_q_eli5_1, breath_q_expert_1,
614
+
615
+ basic_q_2, basic_q_eli5_2, basic_q_expert_2,
616
+ depth_q_2, depth_q_eli5_2, depth_q_expert_2,
617
+ breath_q_2, breath_q_eli5_2, breath_q_expert_2
618
+ ]
619
+ )
620
+
621
+ day_dd.input(
622
+ get_paper_by_day,
623
+ inputs=[year_dd, month_dd, day_dd],
624
+ outputs=[papers_dd]
625
+ ).then(
626
  set_paper,
627
+ [year_dd, month_dd, day_dd, papers_dd],
628
  [
629
+ title, summary,
630
  basic_q_0, basic_q_eli5_0, basic_q_expert_0,
631
  depth_q_0, depth_q_eli5_0, depth_q_expert_0,
632
  breath_q_0, breath_q_eli5_0, breath_q_expert_0,
 
643
 
644
  papers_dd.change(
645
  set_paper,
646
+ [year_dd, month_dd, day_dd, papers_dd],
647
  [
648
+ title, summary,
649
  basic_q_0, basic_q_eli5_0, basic_q_expert_0,
650
  depth_q_0, depth_q_eli5_0, depth_q_expert_0,
651
  breath_q_0, breath_q_eli5_0, breath_q_expert_0,