Spaces:
Sleeping
Sleeping
import huggingface_hub as hf | |
import gradio as gr | |
import os, datetime | |
fs = hf.HfFileSystem(token=os.environ["HF_TOKEN"]) | |
datasetdir = "datasets/yoinked/blue-arxiv-papers/" | |
basecss = """ | |
.caaard-container { | |
width: 250px; | |
padding: 20px; | |
border: 3px solid black; | |
border-radius: 15px; | |
text-align: left; | |
} | |
.title { | |
font-size: 24px; | |
margin-bottom: 10px; | |
text-align: center; | |
} | |
.caaard-containers { | |
display: flex; gap: 20px; flex-wrap: wrap; | |
} | |
.extra-info { | |
font-size: 14px; | |
line-height: 1.5; | |
} | |
.extra-info-paperid { | |
font-size: 18px; | |
line-height: 1.75; | |
}""" | |
def get_papers(): | |
return reversed(fs.glob(datasetdir+"*.md")) | |
def get_papers_metadata(papiers=None): | |
metadatas = [] | |
if papiers is None: | |
papiers = get_papers() | |
for paper in papiers: | |
with fs.open(paper, "r") as f: | |
papertxt = f.read() | |
metadata = papertxt.split("---")[1] | |
try: | |
author = metadata.split("author: ")[1].split("\n")[0] | |
except: | |
author = "unknown" | |
try: | |
title = metadata.split("title: ")[1].split("\n")[0] | |
except: | |
title = "unknown" | |
try: | |
tags = metadata.split("tags: ")[1].split("\n")[0].split(", ") | |
except: | |
tags = [] | |
try: | |
abstract = metadata.split("abstract: ")[1].split("\n")[0] | |
except: | |
abstract = "unknown" | |
try: | |
date_published = metadata.split("date_published: ")[1].split("\n")[0] | |
except: | |
date_published = "unknown" | |
try: | |
paperid = metadata.split("paperid: ")[1].split("\n")[0] #if this fails then no reason to display | |
md = {"fname": paper, "metadata": metadata, "author": author, "title": title, "tags": tags, "abstract": abstract, "date_published": date_published, "paperid": paperid} | |
metadatas.append(md) | |
except: | |
pass | |
return metadatas | |
def make_paper_card(md): | |
html = f""" | |
<div class="caaard-container" title="{md["abstract"]}"> | |
<div class="title">{md["title"]}</div> | |
<br><br> | |
<div class="extra-info">author: {md["author"]}</div> | |
<div class="extra-info">published: {md["date_published"]}</div> | |
<div class="extra-info-paperid">id: {md["paperid"]}</div> | |
</div> | |
""" | |
return html | |
def make_paper_cards(tags=""): | |
mds = get_papers_metadata() | |
if tags != "": | |
tags = tags.split(",") | |
tags = [tag.strip() for tag in tags] | |
mds = [md for md in mds if any(tag in md["tags"] for tag in tags)] | |
htmls = [make_paper_card(md) for md in mds] | |
fin = "<div class='caaard-containers'>" | |
for html in htmls: | |
fin += html + "<br>" | |
fin += "</div>" | |
return fin | |
def get_paper_markdown(paperid): | |
allpapers = get_papers_metadata() | |
fname = None | |
for paper in allpapers: | |
if paper["paperid"] == paperid: | |
fname = paper["fname"] | |
break | |
print(fname, paperid) | |
if fname is None: | |
return "## paper not found" | |
else: | |
with fs.open(fname, "r") as f: | |
papertxt = f.read() | |
return papertxt.split("---")[2] | |
def publish_paper(title, authors, tags, abst, data): | |
paperid = "" | |
year = datetime.datetime.now().year | |
month = datetime.datetime.now().month | |
if month < 10: | |
month = "0"+str(month) | |
day = datetime.datetime.now().day | |
if day < 10: | |
day = "0"+str(day) | |
idx = 1 | |
while True: | |
paperid = f"{year}-{month}{day}.{idx}" | |
if not fs.exists(datasetdir+paperid+".md"): | |
break | |
idx += 1 | |
if idx > 100: | |
return "could not generate paperid, try again tomorrow" | |
bad_chars = "<>:|\\" # primitive anti-xss sanitization | |
for c in bad_chars: | |
title = title.replace(c, "") | |
authors = authors.replace(c, "") | |
tags = tags.replace(c, "") | |
abst = abst.replace(c, "") | |
metadata = f"""--- | |
title: {title} | |
author: {authors} | |
tags: {tags} | |
abstract: {abst} | |
date_published: {year}-{month}-{day} | |
paperid: {paperid} | |
---\n""" | |
with fs.open(datasetdir+paperid+".md", "w") as f: | |
raw = metadata + data | |
f.write(raw) | |
def makepreview(x): | |
return x | |
def upload(prefix, fname, ext, file): | |
fname = prefix+"-"+fname | |
with fs.open(datasetdir+"uploads/"+fname+"."+ext, "wb") as f: | |
f.write(file) | |
return f"uploaded, use https://huggingface.co/datasets/yoinked/blue-arxiv-papers/resolve/main/uploads/{fname}.{ext} to include in your paper (so like ![image](https://huggingface.co/datasets/yoinked/blue-arxiv-papers/resolve/main/uploads/{fname}.{ext})) for inline img" | |
with gr.Blocks(css=basecss, theme='NoCrypt/miku') as demo: | |
gr.Image("./blue-arxiv.png", container=False, label=None, interactive=False, show_fullscreen_button=False, show_share_button=False, show_download_button=False) | |
with gr.Tab("search"): | |
with gr.Row(): | |
query = gr.Textbox(label="tags (optional, comma seperated)", lines=1, interactive=True) | |
searchbutton = gr.Button("π") | |
with gr.Row(): | |
papercards = gr.HTML("Click the π to load all papers!") | |
with gr.Tab("read"): | |
with gr.Row(): | |
paperid = gr.Textbox(label="paper id", lines=1, interactive=True) | |
readbutton = gr.Button("read") | |
with gr.Row(): | |
paper = gr.Markdown() | |
with gr.Tab("publish"): | |
with gr.Row(): | |
title = gr.Textbox(label="title", lines=1, interactive=True) | |
authors = gr.Textbox(label="author(s)", lines=1, interactive=True) | |
with gr.Row(): | |
tags = gr.Textbox(label="tags (optional, comma seperated)", lines=1, interactive=True) | |
abst = gr.Textbox(label="abriged abstract (aka tooltip)", lines=2, interactive=True) | |
markd = gr.Textbox(label="markdown", lines=10, interactive=True, max_lines=1e3) | |
preview = gr.Markdown() | |
with gr.Row(): | |
status = gr.Textbox(label="status", lines=1, interactive=False) | |
publishbutton = gr.Button("publish") | |
with gr.Tab("files"): | |
with gr.Row(): | |
prefix = gr.Textbox(label="prefix", lines=1, interactive=True) | |
file_name = gr.Textbox(label="file name", lines=1, interactive=True) | |
with gr.Row(): | |
file = gr.File(label="file", file_types=[".png", ".gif", ".webp", ".jpg", ".wav", ".mp3"], type="binary") | |
fileext = gr.Dropdown(label="filetype", choices=["png", "gif", "webp", "jpg", "wav", "mp3"]) | |
uploadbutton = gr.Button("upload") | |
statii = gr.Textbox(label="status", interactive=False) | |
uploadbutton.click(fn=upload, inputs=[prefix, file_name, fileext, file], outputs=statii) | |
markd.change(fn=makepreview, inputs=markd, outputs=preview) | |
publishbutton.click(fn=publish_paper, inputs=[title, authors, tags, abst, markd], outputs=status) | |
searchbutton.click(fn=make_paper_cards, inputs=query, outputs=papercards) | |
readbutton.click(fn=get_paper_markdown, inputs=paperid, outputs=paper) | |
demo.launch() |