#!/usr/bin/env python import datetime import gradio as gr import pandas as pd from apscheduler.schedulers.background import BackgroundScheduler from gradio_calendar import Calendar from papers import PaperList, get_df DESCRIPTION = "# [Daily Papers](https://huggingface.co/papers)" FOOT_NOTE = """\ Related useful Spaces: - [Semantic Scholar Paper Recommender](https://huggingface.co/spaces/librarian-bots/recommend_similar_papers) by [davanstrien](https://huggingface.co/davanstrien) - [ArXiv CS RAG](https://huggingface.co/spaces/bishmoy/Arxiv-CS-RAG) by [bishmoy](https://huggingface.co/bishmoy) - [Paper Q&A](https://huggingface.co/spaces/chansung/paper_qa) by [chansung](https://huggingface.co/chansung) - [dailypapershackernews](https://huggingface.co/spaces/akhaliq/dailypapershackernews) by [akhaliq](https://huggingface.co/akhaliq) """ paper_list = PaperList(get_df()) def update_paper_list() -> None: global paper_list paper_list = PaperList(get_df()) scheduler = BackgroundScheduler() scheduler.add_job(func=update_paper_list, trigger="cron", hour="*", timezone="UTC", misfire_grace_time=60) scheduler.start() def update_df() -> gr.Dataframe: return gr.Dataframe(value=paper_list.df_prettified) def update_num_papers(df: pd.DataFrame) -> str: return f"{len(df)} / {len(paper_list.df_raw)}" def search( start_date: datetime.datetime, end_date: datetime.datetime, search_title: str, search_abstract: str, max_num_to_retrieve: int, ) -> pd.DataFrame: return paper_list.search(start_date, end_date, search_title, search_abstract, max_num_to_retrieve) with gr.Blocks(css_paths="style.css") as demo: gr.Markdown(DESCRIPTION) with gr.Group(): search_title = gr.Textbox(label="Search title") with gr.Row(): with gr.Column(scale=4): search_abstract = gr.Textbox( label="Search abstract", info="The result may not be accurate as the abstract does not contain all the information.", ) with gr.Column(scale=1): max_num_to_retrieve = gr.Slider( label="Max number to retrieve", info="This is used only for search on abstracts.", minimum=1, maximum=len(paper_list.df_raw), step=1, value=100, ) with gr.Row(): start_date = Calendar(label="Start date", type="datetime", value="2023-05-05") end_date = Calendar(label="End date", type="datetime") num_papers = gr.Textbox(label="Number of papers", value=update_num_papers(paper_list.df_raw), interactive=False) df = gr.Dataframe( value=paper_list.df_prettified, datatype=paper_list.column_datatype, type="pandas", interactive=False, max_height=1000, elem_id="table", column_widths=["10%", "10%", "60%", "10%", "5%", "5%"], wrap=True, ) gr.Markdown(FOOT_NOTE) gr.on( triggers=[start_date.change, end_date.change, search_title.submit, search_abstract.submit], fn=search, inputs=[start_date, end_date, search_title, search_abstract, max_num_to_retrieve], outputs=df, api_name=False, ).then( fn=update_num_papers, inputs=df, outputs=num_papers, queue=False, api_name=False, ) demo.load( fn=update_df, outputs=df, queue=False, api_name=False, ).then( fn=update_num_papers, inputs=df, outputs=num_papers, queue=False, api_name=False, ) if __name__ == "__main__": demo.queue(api_open=False).launch(show_api=False)