Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
#!/usr/bin/env python | |
import datetime | |
import gradio as gr | |
import pandas as pd | |
from apscheduler.schedulers.background import BackgroundScheduler | |
from gradio_calendar import Calendar | |
from papers import PaperList, get_df | |
DESCRIPTION = "# [Daily Papers](https://huggingface.co/papers)" | |
FOOT_NOTE = """\ | |
Related useful Spaces: | |
- [Semantic Scholar Paper Recommender](https://huggingface.co/spaces/librarian-bots/recommend_similar_papers) by [davanstrien](https://huggingface.co/davanstrien) | |
- [ArXiv CS RAG](https://huggingface.co/spaces/bishmoy/Arxiv-CS-RAG) by [bishmoy](https://huggingface.co/bishmoy) | |
- [Paper Q&A](https://huggingface.co/spaces/chansung/paper_qa) by [chansung](https://huggingface.co/chansung) | |
- [dailypapershackernews](https://huggingface.co/spaces/akhaliq/dailypapershackernews) by [akhaliq](https://huggingface.co/akhaliq) | |
""" | |
paper_list = PaperList(get_df()) | |
def update_paper_list() -> None: | |
global paper_list | |
paper_list = PaperList(get_df()) | |
scheduler = BackgroundScheduler() | |
scheduler.add_job(func=update_paper_list, trigger="cron", hour="*", timezone="UTC", misfire_grace_time=60) | |
scheduler.start() | |
def update_df() -> gr.Dataframe: | |
return gr.Dataframe(value=paper_list.df_prettified) | |
def update_num_papers(df: pd.DataFrame) -> str: | |
return f"{len(df)} / {len(paper_list.df_raw)}" | |
def search( | |
start_date: datetime.datetime, | |
end_date: datetime.datetime, | |
search_title: str, | |
search_abstract: str, | |
max_num_to_retrieve: int, | |
) -> pd.DataFrame: | |
return paper_list.search(start_date, end_date, search_title, search_abstract, max_num_to_retrieve) | |
with gr.Blocks(css_paths="style.css") as demo: | |
gr.Markdown(DESCRIPTION) | |
with gr.Group(): | |
search_title = gr.Textbox(label="Search title") | |
with gr.Row(): | |
with gr.Column(scale=4): | |
search_abstract = gr.Textbox( | |
label="Search abstract", | |
info="The result may not be accurate as the abstract does not contain all the information.", | |
) | |
with gr.Column(scale=1): | |
max_num_to_retrieve = gr.Slider( | |
label="Max number to retrieve", | |
info="This is used only for search on abstracts.", | |
minimum=1, | |
maximum=len(paper_list.df_raw), | |
step=1, | |
value=100, | |
) | |
with gr.Row(): | |
start_date = Calendar(label="Start date", type="datetime", value="2023-05-05") | |
end_date = Calendar(label="End date", type="datetime") | |
num_papers = gr.Textbox(label="Number of papers", value=update_num_papers(paper_list.df_raw), interactive=False) | |
df = gr.Dataframe( | |
value=paper_list.df_prettified, | |
datatype=paper_list.column_datatype, | |
type="pandas", | |
interactive=False, | |
max_height=1000, | |
elem_id="table", | |
column_widths=["10%", "10%", "60%", "10%", "5%", "5%"], | |
wrap=True, | |
) | |
gr.Markdown(FOOT_NOTE) | |
gr.on( | |
triggers=[start_date.change, end_date.change, search_title.submit, search_abstract.submit], | |
fn=search, | |
inputs=[start_date, end_date, search_title, search_abstract, max_num_to_retrieve], | |
outputs=df, | |
api_name=False, | |
).then( | |
fn=update_num_papers, | |
inputs=df, | |
outputs=num_papers, | |
queue=False, | |
api_name=False, | |
) | |
demo.load( | |
fn=update_df, | |
outputs=df, | |
queue=False, | |
api_name=False, | |
).then( | |
fn=update_num_papers, | |
inputs=df, | |
outputs=num_papers, | |
queue=False, | |
api_name=False, | |
) | |
if __name__ == "__main__": | |
demo.queue(api_open=False).launch(show_api=False) | |