Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import os | |
import re | |
import copy | |
import datasets | |
import pandas as pd | |
import gradio as gr | |
from datetime import datetime, timedelta | |
from datasets import Dataset | |
from huggingface_hub import HfApi | |
from huggingface_hub import create_repo | |
from huggingface_hub.utils import HfHubHTTPError | |
from paper.download import ( | |
download_pdf_from_arxiv, | |
get_papers_from_hf_daily_papers, | |
get_papers_from_arxiv_ids | |
) | |
from paper.parser import extract_text_and_figures | |
from gen.gemini import get_basic_qa, get_deep_qa | |
import utils | |
from apscheduler.schedulers.background import BackgroundScheduler | |
STYLE = """ | |
@media only screen and (max-width: 700px) { | |
.main { | |
width: 80% !important; | |
margin: 0 auto; /* Center the container */ | |
} | |
} | |
.small-font{ | |
font-size: 12pt !important; | |
} | |
.small-font:hover { | |
font-size: 20px !important; | |
transition: font-size 0.3s ease-out; | |
transition-delay: 1.5s; | |
} | |
.group { | |
padding-top: 10px; | |
padding-left: 10px; | |
padding-right: 10px; | |
padding-bottom: 10px; | |
border: 2px dashed gray; | |
border-radius: 20px; | |
box-shadow: 5px 3px 10px 1px rgba(0, 0, 0, 0.4) !important; | |
} | |
.accordion > button > span{ | |
font-size: 12pt !important; | |
} | |
.accordion { | |
border-style: dashed !important; | |
border-left-width: 2px !important; | |
border-bottom-width: 2.5px !important; | |
border-top: none !important; | |
border-right: none !important; | |
box-shadow: none !important; | |
} | |
.no-gap { | |
gap: 0px; | |
} | |
.no-radius { | |
border-radius: 0px; | |
} | |
.textbox-no-label > label > span { | |
display: none; | |
} | |
.exp-type > span { | |
display: none; | |
} | |
.conv-type > span { | |
display: none; | |
} | |
.conv-type .wrap:nth-child(3) { | |
width: 167px; | |
margin: auto; | |
} | |
button { | |
font-size: 10pt !important; | |
} | |
h3 { | |
font-size: 13pt !important; | |
} | |
""" | |
gemini_api_key = os.getenv("GEMINI_API_KEY") | |
hf_token = os.getenv("HF_TOKEN") | |
dataset_repo_id = "chansung/auto-paper-qa2" | |
request_arxiv_repo_id="chansung/requested-arxiv-ids-3" | |
ds = datasets.load_dataset(dataset_repo_id) | |
request_ds = datasets.load_dataset(request_arxiv_repo_id) | |
requested_arxiv_ids = [] | |
for request_d in request_ds['train']: | |
arxiv_ids = request_d['Requested arXiv IDs'] | |
requested_arxiv_ids = requested_arxiv_ids + arxiv_ids | |
requested_arxiv_ids_df = pd.DataFrame({'Requested arXiv IDs': requested_arxiv_ids}) | |
title2qna = {} | |
date2qna = {} | |
longest_qans = 0 | |
def filter_function(example, ids): | |
ids_e = example['Requested arXiv IDs'] | |
for iid in ids: | |
if iid in ids_e: | |
ids_e.remove(iid) | |
example['Requested arXiv IDs'] = ids_e | |
print(example) | |
return example | |
def process_arxiv_ids(gemini_api, hf_repo_id, req_hf_repo_id, hf_token, how_many=10): | |
arxiv_ids = [] | |
ds1 = datasets.load_dataset(req_hf_repo_id) | |
for d in ds1['train']: | |
req_arxiv_ids = d['Requested arXiv IDs'] | |
if len(req_arxiv_ids) > 0 and req_arxiv_ids[0] != "top": | |
arxiv_ids = arxiv_ids + req_arxiv_ids | |
arxiv_ids = arxiv_ids[:how_many] | |
if arxiv_ids is not None and len(arxiv_ids) > 0: | |
print(f"1. Get metadata for the papers [{arxiv_ids}]") | |
papers = get_papers_from_arxiv_ids(arxiv_ids) | |
print("...DONE") | |
print("2. Generating QAs for the paper") | |
for paper in papers: | |
try: | |
title = paper['title'] | |
target_date = paper['target_date'] | |
abstract = paper['paper']['summary'] | |
arxiv_id = paper['paper']['id'] | |
authors = paper['paper']['authors'] | |
print(f"...PROCESSING ON[{arxiv_id}, {title}]") | |
print(f"......Downloading the paper PDF") | |
filename = download_pdf_from_arxiv(arxiv_id) | |
print(f"......DONE") | |
print(f"......Extracting text and figures") | |
texts, figures = extract_text_and_figures(filename) | |
text =' '.join(texts) | |
print(f"......DONE") | |
print(f"......Generating the seed(basic) QAs") | |
qnas = get_basic_qa(text, gemini_api_key=gemini_api, trucate=30000) | |
qnas['title'] = title | |
qnas['abstract'] = abstract | |
qnas['authors'] = ','.join(authors) | |
qnas['arxiv_id'] = arxiv_id | |
qnas['target_date'] = target_date | |
qnas['full_text'] = text | |
print(f"......DONE") | |
print(f"......Generating the follow-up QAs") | |
qnas = get_deep_qa(text, qnas, gemini_api_key=gemini_api, trucate=30000) | |
del qnas["qna"] | |
print(f"......DONE") | |
print(f"......Exporting to HF Dataset repo at [{hf_repo_id}]") | |
utils.push_to_hf_hub(qnas, hf_repo_id, hf_token) | |
print(f"......DONE") | |
print(f"......Updating request arXiv HF Dataset repo at [{req_hf_repo_id}]") | |
ds1 = ds1['train'].map( | |
lambda example: filter_function(example, [arxiv_id]) | |
).filter( | |
lambda example: len(example['Requested arXiv IDs']) > 0 | |
) | |
ds1.push_to_hub(req_hf_repo_id, token=hf_token) | |
print(f"......DONE") | |
except Exception as e: | |
print(f".......failed due to exception {e}") | |
continue | |
HfApi(token=hf_token).restart_space( | |
repo_id="chansung/paper_qa", token=hf_token | |
) | |
def push_to_hf_hub( | |
df, repo_id, token, append=True | |
): | |
exist = False | |
ds = Dataset.from_pandas(df) | |
try: | |
create_repo(request_arxiv_repo_id, repo_type="dataset", token=hf_token) | |
except HfHubHTTPError as e: | |
exist = True | |
if exist and append: | |
existing_ds = datasets.load_dataset(repo_id) | |
ds = datasets.concatenate_datasets([existing_ds['train'], ds]) | |
ds.push_to_hub(repo_id, token=token) | |
def _filter_duplicate_arxiv_ids(arxiv_ids_to_be_added): | |
ds1 = datasets.load_dataset("chansung/requested-arxiv-ids-3") | |
ds2 = datasets.load_dataset("chansung/auto-paper-qa2") | |
unique_arxiv_ids = set() | |
for d in ds1['train']: | |
arxiv_ids = d['Requested arXiv IDs'] | |
unique_arxiv_ids = set(list(unique_arxiv_ids) + arxiv_ids) | |
for d in ds2['train']: | |
arxiv_id = d['arxiv_id'] | |
unique_arxiv_ids.add(arxiv_id) | |
return list(set(arxiv_ids_to_be_added) - unique_arxiv_ids) | |
def _is_arxiv_id_valid(arxiv_id): | |
pattern = r"^\d{4}\.\d{5}$" | |
return bool(re.match(pattern, arxiv_id)) | |
def _get_valid_arxiv_ids(arxiv_ids_str): | |
valid_arxiv_ids = [] | |
invalid_arxiv_ids = [] | |
for arxiv_id in arxiv_ids_str.split(","): | |
arxiv_id = arxiv_id.strip() | |
if _is_arxiv_id_valid(arxiv_id): | |
valid_arxiv_ids.append(arxiv_id) | |
else: | |
invalid_arxiv_ids.append(arxiv_id) | |
return valid_arxiv_ids, invalid_arxiv_ids | |
def add_arxiv_ids_to_queue(queue, arxiv_ids_str): | |
print(0) | |
valid_arxiv_ids, invalid_arxiv_ids = _get_valid_arxiv_ids(arxiv_ids_str) | |
print("01") | |
if len(invalid_arxiv_ids) > 0: | |
gr.Warning(f"found invalid arXiv ids as in {invalid_arxiv_ids}") | |
if len(valid_arxiv_ids) > 0: | |
valid_arxiv_ids = _filter_duplicate_arxiv_ids(valid_arxiv_ids) | |
if len(valid_arxiv_ids) > 0: | |
valid_arxiv_ids = [[arxiv_id] for arxiv_id in valid_arxiv_ids] | |
gr.Warning(f"Processing on [{valid_arxiv_ids}]. Other requested arXiv IDs not found on this list should be already processed or being processed...") | |
valid_arxiv_ids = pd.DataFrame({'Requested arXiv IDs': valid_arxiv_ids}) | |
queue = pd.concat([queue, valid_arxiv_ids]) | |
queue.reset_index(drop=True) | |
push_to_hf_hub(valid_arxiv_ids, request_arxiv_repo_id, hf_token) | |
else: | |
gr.Warning(f"All requested arXiv IDs are already processed or being processed...") | |
else: | |
gr.Warning(f"No valid arXiv IDs found...") | |
return queue | |
def count_nans(row): | |
count = 0 | |
for _, (k, v) in enumerate(data.items()): | |
if v is None: | |
count = count + 1 | |
return count | |
for data in ds["train"]: | |
date = data["target_date"].strftime("%Y-%m-%d") | |
if date in date2qna: | |
papers = copy.deepcopy(date2qna[date]) | |
for paper in papers: | |
if paper["title"] == data["title"]: | |
if count_nans(paper) > count_nans(data): | |
date2qna[date].remove(paper) | |
date2qna[date].append(data) | |
del papers | |
else: | |
date2qna[date] = [data] | |
for date in date2qna: | |
papers = date2qna[date] | |
for paper in papers: | |
title2qna[paper["title"]] = paper | |
titles = title2qna.keys() | |
sorted_dates = sorted(date2qna.keys()) | |
last_date = sorted_dates[-1] | |
last_papers = date2qna[last_date] | |
selected_paper = last_papers[0] | |
def get_papers(date): | |
papers = [paper["title"] for paper in date2qna[date]] | |
return gr.Dropdown( | |
papers, | |
value=papers[0] | |
) | |
def set_paper(date, paper_title): | |
selected_paper = None | |
for paper in date2qna[date]: | |
if paper["title"] == paper_title: | |
selected_paper = paper | |
break | |
return ( | |
gr.Markdown(f"# {selected_paper['title']}"), gr.Markdown(selected_paper["summary"]), | |
gr.Markdown(f"### π {selected_paper['0_question']}"), | |
gr.Markdown(f"βͺ **(ELI5)** {selected_paper['0_answers:eli5']}"), | |
gr.Markdown(f"βͺ **(Technical)** {selected_paper['0_answers:expert']}"), | |
gr.Markdown(f"### ππ {selected_paper['0_additional_depth_q:follow up question']}"), | |
gr.Markdown(f"βͺ **(ELI5)** {selected_paper['0_additional_depth_q:answers:eli5']}"), | |
gr.Markdown(f"βͺ **(Technical)** {selected_paper['0_additional_depth_q:answers:expert']}"), | |
gr.Markdown(f"### ππ {selected_paper['0_additional_breath_q:follow up question']}"), | |
gr.Markdown(f"βͺ **(ELI5)** {selected_paper['0_additional_breath_q:answers:eli5']}"), | |
gr.Markdown(f"βͺ **(Technical)** {selected_paper['0_additional_breath_q:answers:expert']}"), | |
gr.Markdown(f"### π {selected_paper['1_question']}"), | |
gr.Markdown(f"βͺ **(ELI5)** {selected_paper['1_answers:eli5']}"), | |
gr.Markdown(f"βͺ **(Technical)** {selected_paper['1_answers:expert']}"), | |
gr.Markdown(f"### ππ {selected_paper['1_additional_depth_q:follow up question']}"), | |
gr.Markdown(f"βͺ **(ELI5)** {selected_paper['1_additional_depth_q:answers:eli5']}"), | |
gr.Markdown(f"βͺ **(Technical)** {selected_paper['1_additional_depth_q:answers:expert']}"), | |
gr.Markdown(f"### ππ {selected_paper['1_additional_breath_q:follow up question']}"), | |
gr.Markdown(f"βͺ **(ELI5)** {selected_paper['1_additional_breath_q:answers:eli5']}"), | |
gr.Markdown(f"βͺ **(Technical)** {selected_paper['1_additional_breath_q:answers:expert']}"), | |
gr.Markdown(f"### π {selected_paper['2_question']}"), | |
gr.Markdown(f"βͺ **(ELI5)** {selected_paper['2_answers:eli5']}"), | |
gr.Markdown(f"βͺ **(Technical)** {selected_paper['2_answers:expert']}"), | |
gr.Markdown(f"### ππ {selected_paper['2_additional_depth_q:follow up question']}"), | |
gr.Markdown(f"βͺ **(ELI5)** {selected_paper['2_additional_depth_q:answers:eli5']}"), | |
gr.Markdown(f"βͺ **(Technical)** {selected_paper['2_additional_depth_q:answers:expert']}"), | |
gr.Markdown(f"### ππ {selected_paper['2_additional_breath_q:follow up question']}"), | |
gr.Markdown(f"βͺ **(ELI5)** {selected_paper['2_additional_breath_q:answers:eli5']}"), | |
gr.Markdown(f"βͺ **(Technical)** {selected_paper['2_additional_breath_q:answers:expert']}"), | |
) | |
def change_exp_type(exp_type): | |
if exp_type == "ELI5": | |
return ( | |
gr.Markdown(visible=True), gr.Markdown(visible=False), gr.Markdown(visible=True), gr.Markdown(visible=False), gr.Markdown(visible=True), gr.Markdown(visible=False), | |
gr.Markdown(visible=True), gr.Markdown(visible=False), gr.Markdown(visible=True), gr.Markdown(visible=False), gr.Markdown(visible=True), gr.Markdown(visible=False), | |
gr.Markdown(visible=True), gr.Markdown(visible=False), gr.Markdown(visible=True), gr.Markdown(visible=False), gr.Markdown(visible=True), gr.Markdown(visible=False), | |
) | |
else: | |
return ( | |
gr.Markdown(visible=False), gr.Markdown(visible=True), gr.Markdown(visible=False), gr.Markdown(visible=True), gr.Markdown(visible=False), gr.Markdown(visible=True), | |
gr.Markdown(visible=False), gr.Markdown(visible=True), gr.Markdown(visible=False), gr.Markdown(visible=True), gr.Markdown(visible=False), gr.Markdown(visible=True), | |
gr.Markdown(visible=False), gr.Markdown(visible=True), gr.Markdown(visible=False), gr.Markdown(visible=True), gr.Markdown(visible=False), gr.Markdown(visible=True), | |
) | |
def search(search_in, max_results=3): | |
results = [] | |
for title in titles: | |
if len(results) > 3: | |
break | |
else: | |
if search_in in title: | |
results.append(title) | |
return ( | |
gr.Textbox( | |
visible=True if len(results) > 0 else False, | |
value=results[0] if len(results) > 0 else "" | |
), | |
gr.Textbox( | |
visible=True if len(results) > 1 else False, | |
value=results[1] if len(results) > 1 else "" | |
), | |
gr.Textbox( | |
visible=True if len(results) > 2 else False, | |
value=results[2] if len(results) > 2 else "" | |
) | |
) | |
UPDATE_SEARCH_RESULTS = f""" | |
function search(searchIn, maxResults = 3) {{ | |
if (searchIn.trim().length > 0) {{ | |
const results = []; | |
let titles = {list(titles)}; | |
for (const title of titles) {{ // Assuming 'titles' is an array defined elsewhere | |
if (results.length > 10) {{ | |
break; | |
}} else {{ | |
if (title.toLowerCase().includes(searchIn.toLowerCase())) {{ // JavaScript's equivalent to Python's 'in' | |
results.push(title); | |
}} | |
}} | |
}} | |
// Handle UI elements (Explanation below) | |
const resultElements = [1,2,3,4,5,6,7,8,9,10].map(index => {{ | |
return results[index - 1] || ''; | |
}}); | |
if (resultElements[0] == '') {{ | |
document.getElementById('search_r1').style.display = 'none'; | |
}} else {{ | |
document.getElementById('search_r1').style.display = 'block'; | |
}} | |
if (resultElements[1] == '') {{ | |
document.getElementById('search_r2').style.display = 'none'; | |
}} else {{ | |
document.getElementById('search_r2').style.display = 'block'; | |
}} | |
if (resultElements[2] == '') {{ | |
document.getElementById('search_r3').style.display = 'none'; | |
}} else {{ | |
document.getElementById('search_r3').style.display = 'block'; | |
}} | |
if (resultElements[3] == '') {{ | |
document.getElementById('search_r4').style.display = 'none'; | |
}} else {{ | |
document.getElementById('search_r4').style.display = 'block'; | |
}} | |
if (resultElements[4] == '') {{ | |
document.getElementById('search_r5').style.display = 'none'; | |
}} else {{ | |
document.getElementById('search_r5').style.display = 'block'; | |
}} | |
if (resultElements[5] == '') {{ | |
document.getElementById('search_r6').style.display = 'none'; | |
}} else {{ | |
document.getElementById('search_r6').style.display = 'block'; | |
}} | |
if (resultElements[6] == '') {{ | |
document.getElementById('search_r7').style.display = 'none'; | |
}} else {{ | |
document.getElementById('search_r7').style.display = 'block'; | |
}} | |
if (resultElements[7] == '') {{ | |
document.getElementById('search_r8').style.display = 'none'; | |
}} else {{ | |
document.getElementById('search_r8').style.display = 'block'; | |
}} | |
if (resultElements[8] == '') {{ | |
document.getElementById('search_r9').style.display = 'none'; | |
}} else {{ | |
document.getElementById('search_r9').style.display = 'block'; | |
}} | |
if (resultElements[9] == '') {{ | |
document.getElementById('search_r10').style.display = 'none'; | |
}} else {{ | |
document.getElementById('search_r10').style.display = 'block'; | |
}} | |
return resultElements; | |
}} else {{ | |
document.getElementById('search_r1').style.display = 'none'; | |
document.getElementById('search_r2').style.display = 'none'; | |
document.getElementById('search_r3').style.display = 'none'; | |
document.getElementById('search_r4').style.display = 'none'; | |
document.getElementById('search_r5').style.display = 'none'; | |
document.getElementById('search_r6').style.display = 'none'; | |
document.getElementById('search_r7').style.display = 'none'; | |
document.getElementById('search_r8').style.display = 'none'; | |
document.getElementById('search_r9').style.display = 'none'; | |
document.getElementById('search_r10').style.display = 'none'; | |
return ['', '', '', '', '', '', '', '', '', ''] | |
}} | |
}} | |
""" | |
UPDATE_IF_TYPE = f""" | |
function chage_if_type(if_type) {{ | |
if (if_type == 'Q&As') {{ | |
document.getElementById('chat_block').style.display = 'none'; | |
document.getElementById('qna_block').style.display = 'block'; | |
}} else {{ | |
document.getElementById('chat_block').style.display = 'block'; | |
document.getElementById('qna_block').style.display = 'none'; | |
}} | |
}} | |
""" | |
def set_date(title): | |
paper = title2qna[title] | |
date = paper["target_date"].strftime("%Y-%m-%d") | |
return date | |
def set_papers(date, title): | |
papers = [paper["title"] for paper in date2qna[date]] | |
return ( | |
gr.Dropdown(choices=papers, value=title), | |
gr.Textbox("") | |
) | |
with gr.Blocks(css=STYLE, theme=gr.themes.Soft()) as demo: | |
gr.Markdown("# Let's explore papers with auto generated Q&As") | |
with gr.Column(elem_classes=["group"]): | |
with gr.Row(): | |
date_dd = gr.Dropdown( | |
sorted_dates, | |
value=last_date, | |
label="Select date", | |
interactive=True, | |
scale=3, | |
) | |
papers_dd = gr.Dropdown( | |
[paper["title"] for paper in last_papers], | |
value=selected_paper["title"], | |
label="Select paper title", | |
interactive=True, | |
scale=7, | |
) | |
with gr.Column(elem_classes=["no-gap"]): | |
search_in = gr.Textbox("", placeholder="Enter keywords to search...", elem_classes=["textbox-no-label"]) | |
search_r1 = gr.Button(visible=False, elem_id="search_r1", elem_classes=["no-radius"]) | |
search_r2 = gr.Button(visible=False, elem_id="search_r2", elem_classes=["no-radius"]) | |
search_r3 = gr.Button(visible=False, elem_id="search_r3", elem_classes=["no-radius"]) | |
search_r4 = gr.Button(visible=False, elem_id="search_r4", elem_classes=["no-radius"]) | |
search_r5 = gr.Button(visible=False, elem_id="search_r5", elem_classes=["no-radius"]) | |
search_r6 = gr.Button(visible=False, elem_id="search_r6", elem_classes=["no-radius"]) | |
search_r7 = gr.Button(visible=False, elem_id="search_r7", elem_classes=["no-radius"]) | |
search_r8 = gr.Button(visible=False, elem_id="search_r8", elem_classes=["no-radius"]) | |
search_r9 = gr.Button(visible=False, elem_id="search_r9", elem_classes=["no-radius"]) | |
search_r10 = gr.Button(visible=False, elem_id="search_r10", elem_classes=["no-radius"]) | |
conv_type = gr.Radio(choices=["Q&As", "Chat"], value="Q&As", interactive=True, visible=False, elem_classes=["conv-type"]) | |
with gr.Column(scale=7): | |
title = gr.Markdown(f"# {selected_paper['title']}") | |
summary = gr.Markdown(f"{selected_paper['summary']}", elem_classes=["small-font"]) | |
with gr.Column(elem_id="chat_block", visible=False): | |
gr.Chatbot([("hello", "world"), ("how", "are you?")]) | |
with gr.Column(elem_id="qna_block", visible=True): | |
with gr.Row(): | |
with gr.Column(scale=7): | |
gr.Markdown("## Auto generated Questions & Answers") | |
exp_type = gr.Radio(choices=["ELI5", "Technical"], value="ELI5", elem_classes=["exp-type"], scale=3) | |
# 1 | |
with gr.Column(elem_classes=["group"], visible=True) as q_0: | |
basic_q_0 = gr.Markdown(f"### π {selected_paper['0_question']}") | |
basic_q_eli5_0 = gr.Markdown(f"βͺ **(ELI5)** {selected_paper['0_answers:eli5']}", elem_classes=["small-font"]) | |
basic_q_expert_0 = gr.Markdown(f"βͺ **(Technical)** {selected_paper['0_answers:expert']}", visible=False, elem_classes=["small-font"]) | |
with gr.Accordion("Additional question #1", open=False, elem_classes=["accordion"]) as aq_0_0: | |
depth_q_0 = gr.Markdown(f"### ππ {selected_paper['0_additional_depth_q:follow up question']}") | |
depth_q_eli5_0 = gr.Markdown(f"βͺ **(ELI5)** {selected_paper['0_additional_depth_q:answers:eli5']}", elem_classes=["small-font"]) | |
depth_q_expert_0 = gr.Markdown(f"βͺ **(Technical)** {selected_paper['0_additional_depth_q:answers:expert']}", visible=False, elem_classes=["small-font"]) | |
with gr.Accordion("Additional question #2", open=False, elem_classes=["accordion"]) as aq_0_1: | |
breath_q_0 = gr.Markdown(f"### ππ {selected_paper['0_additional_breath_q:follow up question']}") | |
breath_q_eli5_0 = gr.Markdown(f"βͺ **(ELI5)** {selected_paper['0_additional_breath_q:answers:eli5']}", elem_classes=["small-font"]) | |
breath_q_expert_0 = gr.Markdown(f"βͺ **(Technical)** {selected_paper['0_additional_breath_q:answers:expert']}", visible=False, elem_classes=["small-font"]) | |
# 2 | |
with gr.Column(elem_classes=["group"], visible=True) as q_1: | |
basic_q_1 = gr.Markdown(f"### π {selected_paper['1_question']}") | |
basic_q_eli5_1 = gr.Markdown(f"βͺ **(ELI5)** {selected_paper['1_answers:eli5']}", elem_classes=["small-font"]) | |
basic_q_expert_1 = gr.Markdown(f"βͺ **(Technical)** {selected_paper['1_answers:expert']}", visible=False, elem_classes=["small-font"]) | |
with gr.Accordion("Additional question #1", open=False, elem_classes=["accordion"]) as aq_1_0: | |
depth_q_1 = gr.Markdown(f"### ππ {selected_paper['1_additional_depth_q:follow up question']}") | |
depth_q_eli5_1 = gr.Markdown(f"βͺ **(ELI5)** {selected_paper['1_additional_depth_q:answers:eli5']}", elem_classes=["small-font"]) | |
depth_q_expert_1 = gr.Markdown(f"βͺ **(Technical)** {selected_paper['1_additional_depth_q:answers:expert']}", visible=False, elem_classes=["small-font"]) | |
with gr.Accordion("Additional question #2", open=False, elem_classes=["accordion"]) as aq_1_1: | |
breath_q_1 = gr.Markdown(f"### ππ {selected_paper['1_additional_breath_q:follow up question']}") | |
breath_q_eli5_1 = gr.Markdown(f"βͺ **(ELI5)** {selected_paper['1_additional_breath_q:answers:eli5']}", elem_classes=["small-font"]) | |
breath_q_expert_1 = gr.Markdown(f"βͺ **(Technical)** {selected_paper['1_additional_breath_q:answers:expert']}", visible=False, elem_classes=["small-font"]) | |
# 3 | |
with gr.Column(elem_classes=["group"], visible=True) as q_2: | |
basic_q_2 = gr.Markdown(f"### π {selected_paper['2_question']}") | |
basic_q_eli5_2 = gr.Markdown(f"βͺ **(ELI5)** {selected_paper['2_answers:eli5']}", elem_classes=["small-font"]) | |
basic_q_expert_2 = gr.Markdown(f"βͺ **(Technical)** {selected_paper['2_answers:expert']}", visible=False, elem_classes=["small-font"]) | |
with gr.Accordion("Additional question #1", open=False, elem_classes=["accordion"]) as aq_2_0: | |
depth_q_2 = gr.Markdown(f"### ππ {selected_paper['2_additional_depth_q:follow up question']}") | |
depth_q_eli5_2 = gr.Markdown(f"βͺ **(ELI5)** {selected_paper['2_additional_depth_q:answers:eli5']}", elem_classes=["small-font"]) | |
depth_q_expert_2 = gr.Markdown(f"βͺ **(Technical)** {selected_paper['2_additional_depth_q:answers:expert']}", visible=False, elem_classes=["small-font"]) | |
with gr.Accordion("Additional question #2", open=False, elem_classes=["accordion"]) as aq_2_1: | |
breath_q_2 = gr.Markdown(f"### ππ {selected_paper['2_additional_breath_q:follow up question']}") | |
breath_q_eli5_2 = gr.Markdown(f"βͺ **(ELI5)** {selected_paper['2_additional_breath_q:answers:eli5']}", elem_classes=["small-font"]) | |
breath_q_expert_2 = gr.Markdown(f"βͺ **(Technical)** {selected_paper['2_additional_breath_q:answers:expert']}", visible=False, elem_classes=["small-font"]) | |
gr.Markdown("## Request any arXiv ids") | |
arxiv_queue = gr.Dataframe( | |
headers=["Requested arXiv IDs"], col_count=(1, "fixed"), | |
value=requested_arxiv_ids_df, | |
datatype=["str"], | |
interactive=False | |
) | |
arxiv_id_enter = gr.Textbox(placeholder="Enter comma separated arXiv IDs...", elem_classes=["textbox-no-label"]) | |
arxiv_id_enter.submit( | |
add_arxiv_ids_to_queue, | |
[arxiv_queue, arxiv_id_enter], | |
arxiv_queue | |
) | |
gr.Markdown("The target papers are collected from [Hugging Face π€ Daily Papers](https://huggingface.co/papers) on a daily basis. " | |
"The entire data is generated by [Google's Gemini 1.0](https://deepmind.google/technologies/gemini/) Pro. " | |
"If you are curious how it is done, visit the [Auto Paper Q&A Generation project repository](https://github.com/deep-diver/auto-paper-analysis) " | |
"Also, the generated dataset is hosted on Hugging Face π€ Dataset repository as well([Link](https://huggingface.co/datasets/chansung/auto-paper-qa2)). ") | |
search_r1.click(set_date, search_r1, date_dd).then( | |
set_papers, | |
inputs=[date_dd, search_r1], | |
outputs=[papers_dd, search_in] | |
) | |
search_r2.click(set_date, search_r2, date_dd).then( | |
set_papers, | |
inputs=[date_dd, search_r2], | |
outputs=[papers_dd, search_in] | |
) | |
search_r3.click(set_date, search_r3, date_dd).then( | |
set_papers, | |
inputs=[date_dd, search_r3], | |
outputs=[papers_dd, search_in] | |
) | |
search_r4.click(set_date, search_r4, date_dd).then( | |
set_papers, | |
inputs=[date_dd, search_r4], | |
outputs=[papers_dd, search_in] | |
) | |
search_r5.click(set_date, search_r5, date_dd).then( | |
set_papers, | |
inputs=[date_dd, search_r5], | |
outputs=[papers_dd, search_in] | |
) | |
search_r6.click(set_date, search_r6, date_dd).then( | |
set_papers, | |
inputs=[date_dd, search_r6], | |
outputs=[papers_dd, search_in] | |
) | |
search_r7.click(set_date, search_r7, date_dd).then( | |
set_papers, | |
inputs=[date_dd, search_r7], | |
outputs=[papers_dd, search_in] | |
) | |
search_r8.click(set_date, search_r8, date_dd).then( | |
set_papers, | |
inputs=[date_dd, search_r8], | |
outputs=[papers_dd, search_in] | |
) | |
search_r9.click(set_date, search_r9, date_dd).then( | |
set_papers, | |
inputs=[date_dd, search_r9], | |
outputs=[papers_dd, search_in] | |
) | |
search_r10.click(set_date, search_r10, date_dd).then( | |
set_papers, | |
inputs=[date_dd, search_r10], | |
outputs=[papers_dd, search_in] | |
) | |
date_dd.input(get_papers, date_dd, papers_dd).then( | |
set_paper, | |
[date_dd, papers_dd], | |
[ | |
title, summary, | |
basic_q_0, basic_q_eli5_0, basic_q_expert_0, | |
depth_q_0, depth_q_eli5_0, depth_q_expert_0, | |
breath_q_0, breath_q_eli5_0, breath_q_expert_0, | |
basic_q_1, basic_q_eli5_1, basic_q_expert_1, | |
depth_q_1, depth_q_eli5_1, depth_q_expert_1, | |
breath_q_1, breath_q_eli5_1, breath_q_expert_1, | |
basic_q_2, basic_q_eli5_2, basic_q_expert_2, | |
depth_q_2, depth_q_eli5_2, depth_q_expert_2, | |
breath_q_2, breath_q_eli5_2, breath_q_expert_2 | |
] | |
) | |
papers_dd.change( | |
set_paper, | |
[date_dd, papers_dd], | |
[ | |
title, summary, | |
basic_q_0, basic_q_eli5_0, basic_q_expert_0, | |
depth_q_0, depth_q_eli5_0, depth_q_expert_0, | |
breath_q_0, breath_q_eli5_0, breath_q_expert_0, | |
basic_q_1, basic_q_eli5_1, basic_q_expert_1, | |
depth_q_1, depth_q_eli5_1, depth_q_expert_1, | |
breath_q_1, breath_q_eli5_1, breath_q_expert_1, | |
basic_q_2, basic_q_eli5_2, basic_q_expert_2, | |
depth_q_2, depth_q_eli5_2, depth_q_expert_2, | |
breath_q_2, breath_q_eli5_2, breath_q_expert_2 | |
] | |
) | |
search_in.change( | |
inputs=[search_in], | |
outputs=[ | |
search_r1, search_r2, search_r3, search_r4, search_r5, | |
search_r6, search_r7, search_r8, search_r9, search_r10 | |
], | |
js=UPDATE_SEARCH_RESULTS, | |
fn=None | |
) | |
exp_type.select( | |
change_exp_type, | |
exp_type, | |
[ | |
basic_q_eli5_0, basic_q_expert_0, depth_q_eli5_0, depth_q_expert_0, breath_q_eli5_0, breath_q_expert_0, | |
basic_q_eli5_1, basic_q_expert_1, depth_q_eli5_1, depth_q_expert_1, breath_q_eli5_1, breath_q_expert_1, | |
basic_q_eli5_2, basic_q_expert_2, depth_q_eli5_2, depth_q_expert_2, breath_q_eli5_2, breath_q_expert_2 | |
] | |
) | |
conv_type.select( | |
inputs=[conv_type], | |
js=UPDATE_IF_TYPE, | |
outputs=None, | |
fn=None | |
) | |
start_date = datetime.now() + timedelta(minutes=1) | |
scheduler = BackgroundScheduler() | |
scheduler.add_job( | |
process_arxiv_ids, | |
trigger='interval', | |
seconds=3600, | |
args=[ | |
gemini_api_key, | |
dataset_repo_id, | |
request_arxiv_repo_id, | |
hf_token | |
], | |
start_date=start_date | |
) | |
scheduler.start() | |
demo.launch(share=True, debug=True) |