chansung commited on
Commit
046ea77
β€’
1 Parent(s): 26e4174
Files changed (3) hide show
  1. app.py +19 -18
  2. init.py +15 -0
  3. utils.py +9 -0
app.py CHANGED
@@ -29,6 +29,7 @@ sorted_day = sorted(date_dict[last_year][last_month].keys())
29
  last_day = sorted_day[-1]
30
  last_papers = date_dict[last_year][last_month][last_day]
31
  selected_paper = last_papers[0]
 
32
 
33
  with gr.Blocks(css="constants/styles.css", theme=gr.themes.Soft()) as demo:
34
  cur_arxiv_id = gr.Textbox(selected_paper['arxiv_id'], visible=False)
@@ -54,7 +55,7 @@ with gr.Blocks(css="constants/styles.css", theme=gr.themes.Soft()) as demo:
54
 
55
  gr.Markdown("# Let's explore papers with auto generated Q&As")
56
 
57
- with gr.Column(elem_id="control-panel", elem_classes=["group"]):
58
  with gr.Column():
59
  with gr.Row():
60
  year_dd = gr.Dropdown(sorted_year, value=last_year, label="Year", interactive=True, filterable=False)
@@ -82,7 +83,7 @@ with gr.Blocks(css="constants/styles.css", theme=gr.themes.Soft()) as demo:
82
  search_r9 = gr.Button(visible=False, elem_id="search_r9", elem_classes=["no-radius"])
83
  search_r10 = gr.Button(visible=False, elem_id="search_r10", elem_classes=["no-radius"])
84
 
85
- with gr.Column(scale=7):
86
  title = gr.Markdown(f"# {selected_paper['title']}", elem_classes=["markdown-center"])
87
  # with gr.Row():
88
  with gr.Row():
@@ -150,21 +151,21 @@ with gr.Blocks(css="constants/styles.css", theme=gr.themes.Soft()) as demo:
150
  breath_q_eli5_2 = gr.Markdown(f"β†ͺ **(ELI5)** {selected_paper['2_additional_breath_q:answers:eli5']}", elem_classes=["small-font"])
151
  breath_q_expert_2 = gr.Markdown(f"β†ͺ **(Technical)** {selected_paper['2_additional_breath_q:answers:expert']}", visible=False, elem_classes=["small-font"])
152
 
153
- gr.Markdown("## Request any arXiv ids")
154
- arxiv_queue = gr.Dataframe(
155
- headers=["Requested arXiv IDs"], col_count=(1, "fixed"),
156
- value=requested_arxiv_ids_df,
157
- datatype=["str"],
158
- interactive=False,
159
- )
160
-
161
- arxiv_id_enter = gr.Textbox(placeholder="Enter comma separated arXiv IDs...", elem_classes=["textbox-no-label"])
162
- arxiv_id_enter.submit(
163
- add_arxiv_ids_to_queue,
164
- [arxiv_queue, arxiv_id_enter],
165
- [arxiv_queue, arxiv_id_enter],
166
- concurrency_limit=20,
167
- )
168
 
169
  gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
170
 
@@ -390,7 +391,7 @@ scheduler.add_job(
390
  ],
391
  start_date=start_date
392
  )
393
- # scheduler.start()
394
 
395
  demo.queue(
396
  default_concurrency_limit=20,
 
29
  last_day = sorted_day[-1]
30
  last_papers = date_dict[last_year][last_month][last_day]
31
  selected_paper = last_papers[0]
32
+ visible = True if len(sorted_year) > 0 else False
33
 
34
  with gr.Blocks(css="constants/styles.css", theme=gr.themes.Soft()) as demo:
35
  cur_arxiv_id = gr.Textbox(selected_paper['arxiv_id'], visible=False)
 
55
 
56
  gr.Markdown("# Let's explore papers with auto generated Q&As")
57
 
58
+ with gr.Column(elem_id="control-panel", elem_classes=["group"], visible=visible):
59
  with gr.Column():
60
  with gr.Row():
61
  year_dd = gr.Dropdown(sorted_year, value=last_year, label="Year", interactive=True, filterable=False)
 
83
  search_r9 = gr.Button(visible=False, elem_id="search_r9", elem_classes=["no-radius"])
84
  search_r10 = gr.Button(visible=False, elem_id="search_r10", elem_classes=["no-radius"])
85
 
86
+ with gr.Column(scale=7, visible=visible):
87
  title = gr.Markdown(f"# {selected_paper['title']}", elem_classes=["markdown-center"])
88
  # with gr.Row():
89
  with gr.Row():
 
151
  breath_q_eli5_2 = gr.Markdown(f"β†ͺ **(ELI5)** {selected_paper['2_additional_breath_q:answers:eli5']}", elem_classes=["small-font"])
152
  breath_q_expert_2 = gr.Markdown(f"β†ͺ **(Technical)** {selected_paper['2_additional_breath_q:answers:expert']}", visible=False, elem_classes=["small-font"])
153
 
154
+ gr.Markdown("## Request any arXiv ids")
155
+ arxiv_queue = gr.Dataframe(
156
+ headers=["Requested arXiv IDs"], col_count=(1, "fixed"),
157
+ value=requested_arxiv_ids_df,
158
+ datatype=["str"],
159
+ interactive=False,
160
+ )
161
+
162
+ arxiv_id_enter = gr.Textbox(placeholder="Enter comma separated arXiv IDs...", elem_classes=["textbox-no-label"])
163
+ arxiv_id_enter.submit(
164
+ add_arxiv_ids_to_queue,
165
+ [arxiv_queue, arxiv_id_enter],
166
+ [arxiv_queue, arxiv_id_enter],
167
+ concurrency_limit=20,
168
+ )
169
 
170
  gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
171
 
 
391
  ],
392
  start_date=start_date
393
  )
394
+ scheduler.start()
395
 
396
  demo.queue(
397
  default_concurrency_limit=20,
init.py CHANGED
@@ -2,10 +2,12 @@ import os
2
  import copy
3
  import datasets
4
  import pandas as pd
 
5
  from collections import defaultdict
6
 
7
  from datetime import datetime, timedelta
8
  from background import process_arxiv_ids
 
9
  from apscheduler.schedulers.background import BackgroundScheduler
10
 
11
  def _count_nans(row):
@@ -79,6 +81,19 @@ def update_dataframe(request_data_repo_id):
79
  request_ds = datasets.load_dataset(request_data_repo_id)
80
  return _initialize_requested_arxiv_ids(request_ds)
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  def get_secrets():
83
  global gemini_api_key
84
  global hf_token
 
2
  import copy
3
  import datasets
4
  import pandas as pd
5
+ from datasets import Dataset
6
  from collections import defaultdict
7
 
8
  from datetime import datetime, timedelta
9
  from background import process_arxiv_ids
10
+ from utils import create_hf_hub
11
  from apscheduler.schedulers.background import BackgroundScheduler
12
 
13
  def _count_nans(row):
 
81
  request_ds = datasets.load_dataset(request_data_repo_id)
82
  return _initialize_requested_arxiv_ids(request_ds)
83
 
84
+ def initialize_repos(
85
+ source_data_repo_id, request_data_repo_id, hf_token
86
+ ):
87
+ if create_hf_hub(source_data_repo_id, hf_token) is False:
88
+ print(f"{source_data_repo_id} repository already exists")
89
+
90
+ if create_hf_hub(request_data_repo_id, hf_token) is False:
91
+ print(f"{request_data_repo_id} repository already exists")
92
+ else:
93
+ df = pd.DataFrame(data={"Requested arXiv IDs": [["top"]]})
94
+ ds = Dataset.from_df(df)
95
+ ds.push_to_hub(request_data_repo_id, token=hf_token)
96
+
97
  def get_secrets():
98
  global gemini_api_key
99
  global hf_token
utils.py CHANGED
@@ -4,6 +4,15 @@ import pandas as pd
4
  from huggingface_hub import create_repo
5
  from huggingface_hub.utils import HfHubHTTPError
6
 
 
 
 
 
 
 
 
 
 
7
  def push_to_hf_hub(
8
  ds, repo_id, hf_token, append=True
9
  ):
 
4
  from huggingface_hub import create_repo
5
  from huggingface_hub.utils import HfHubHTTPError
6
 
7
+ def create_hf_hub(
8
+ repo_id, hf_token
9
+ ):
10
+ try:
11
+ create_repo(repo_id, repo_type="dataset", token=hf_token)
12
+ except HfHubHTTPError as e:
13
+ return False
14
+ return True
15
+
16
  def push_to_hf_hub(
17
  ds, repo_id, hf_token, append=True
18
  ):