daily-papers / app.py
hysts's picture
hysts HF staff
gradio==5.0.1
aa632a5
raw
history blame
3.78 kB
#!/usr/bin/env python
import datetime
import gradio as gr
import pandas as pd
from apscheduler.schedulers.background import BackgroundScheduler
from gradio_calendar import Calendar
from papers import PaperList, get_df
DESCRIPTION = "# [Daily Papers](https://huggingface.co/papers)"
FOOT_NOTE = """\
Related useful Spaces:
- [Semantic Scholar Paper Recommender](https://huggingface.co/spaces/librarian-bots/recommend_similar_papers) by [davanstrien](https://huggingface.co/davanstrien)
- [ArXiv CS RAG](https://huggingface.co/spaces/bishmoy/Arxiv-CS-RAG) by [bishmoy](https://huggingface.co/bishmoy)
- [Paper Q&A](https://huggingface.co/spaces/chansung/paper_qa) by [chansung](https://huggingface.co/chansung)
- [dailypapershackernews](https://huggingface.co/spaces/akhaliq/dailypapershackernews) by [akhaliq](https://huggingface.co/akhaliq)
"""
paper_list = PaperList(get_df())
def update_paper_list() -> None:
global paper_list
paper_list = PaperList(get_df())
scheduler = BackgroundScheduler()
scheduler.add_job(func=update_paper_list, trigger="cron", hour="*", timezone="UTC", misfire_grace_time=60)
scheduler.start()
def update_df() -> gr.Dataframe:
return gr.Dataframe(value=paper_list.df_prettified)
def update_num_papers(df: pd.DataFrame) -> str:
return f"{len(df)} / {len(paper_list.df_raw)}"
def search(
start_date: datetime.datetime,
end_date: datetime.datetime,
search_title: str,
search_abstract: str,
max_num_to_retrieve: int,
) -> pd.DataFrame:
return paper_list.search(start_date, end_date, search_title, search_abstract, max_num_to_retrieve)
with gr.Blocks(css_paths="style.css") as demo:
gr.Markdown(DESCRIPTION)
with gr.Group():
search_title = gr.Textbox(label="Search title")
with gr.Row():
with gr.Column(scale=4):
search_abstract = gr.Textbox(
label="Search abstract",
info="The result may not be accurate as the abstract does not contain all the information.",
)
with gr.Column(scale=1):
max_num_to_retrieve = gr.Slider(
label="Max number to retrieve",
info="This is used only for search on abstracts.",
minimum=1,
maximum=len(paper_list.df_raw),
step=1,
value=100,
)
with gr.Row():
start_date = Calendar(label="Start date", type="datetime", value="2023-05-05")
end_date = Calendar(label="End date", type="datetime")
num_papers = gr.Textbox(label="Number of papers", value=update_num_papers(paper_list.df_raw), interactive=False)
df = gr.Dataframe(
value=paper_list.df_prettified,
datatype=paper_list.column_datatype,
type="pandas",
interactive=False,
max_height=1000,
elem_id="table",
column_widths=["10%", "10%", "60%", "10%", "5%", "5%"],
wrap=True,
)
gr.Markdown(FOOT_NOTE)
gr.on(
triggers=[start_date.change, end_date.change, search_title.submit, search_abstract.submit],
fn=search,
inputs=[start_date, end_date, search_title, search_abstract, max_num_to_retrieve],
outputs=df,
api_name=False,
).then(
fn=update_num_papers,
inputs=df,
outputs=num_papers,
queue=False,
api_name=False,
)
demo.load(
fn=update_df,
outputs=df,
queue=False,
api_name=False,
).then(
fn=update_num_papers,
inputs=df,
outputs=num_papers,
queue=False,
api_name=False,
)
if __name__ == "__main__":
demo.queue(api_open=False).launch(show_api=False)