Spaces:
Sleeping
Sleeping
import huggingface_hub as hf | |
import gradio as gr | |
import os, datetime | |
fs = hf.HfFileSystem(token=os.environ["HF_TOKEN"]) | |
datasetdir = "datasets/yoinked/blue-arxiv-papers/" | |
basecss = """ | |
.caaard-container { | |
width: 250px; | |
padding: 20px; | |
border: 3px solid black; | |
border-radius: 15px; | |
text-align: left; | |
} | |
.title { | |
font-size: 24px; | |
margin-bottom: 10px; | |
text-align: center; | |
} | |
.caaard-containers { | |
display: flex; gap: 20px; flex-wrap: wrap; | |
} | |
.extra-info { | |
font-size: 14px; | |
line-height: 1.5; | |
} | |
.extra-info-paperid { | |
font-size: 14px; | |
line-height: 1.5; | |
color: #222; | |
}""" | |
jscode = """ | |
function copyToClipboard(container) { | |
const titleElement = container.querySelector(".title"); | |
const titleText = titleElement.textContent; | |
const tempTextArea = document.createElement("textarea"); | |
tempTextArea.value = titleText; | |
document.body.appendChild(tempTextArea); | |
tempTextArea.select(); | |
document.execCommand("copy"); | |
document.body.removeChild(tempTextArea); | |
} | |
""" | |
def get_papers(): | |
return fs.glob(datasetdir+"**.md") | |
def get_papers_metadata(papiers=None): | |
metadatas = [] | |
if papiers is None: | |
papiers = get_papers() | |
for paper in papiers: | |
papertxt = fs.read(paper) | |
metadata = papertxt.split("---")[1] | |
try: | |
author = metadata.split("author: ")[1].split("\n")[0] | |
except: | |
author = "unknown" | |
try: | |
title = metadata.split("title: ")[1].split("\n")[0] | |
except: | |
title = "unknown" | |
try: | |
tags = metadata.split("tags: ")[1].split("\n")[0].split(", ") | |
except: | |
tags = [] | |
try: | |
abstract = metadata.split("abstract: ")[1].split("\n")[0] | |
except: | |
abstract = "unknown" | |
try: | |
date_published = metadata.split("date_published: ")[1].split("\n")[0] | |
except: | |
date_published = "unknown" | |
try: | |
paperid = metadata.split("paperid: ")[1].split("\n")[0] #if this fails then no reason to display | |
md = {"fname": paper, "metadata": metadata, "author": author, "title": title, "tags": tags, "abstract": abstract, "date_published": date_published, "paperid": paperid} | |
metadatas.append(md) | |
except: | |
pass | |
return metadatas | |
def make_paper_card(md): | |
html = f""" | |
<div class="caaard-container" onclick="copyToClipboard(this)" title="{md["abstract"]}">> | |
<div class="title">{md["title"]}</div> | |
<br><br> | |
<div class="extra-info">author: {md["author"]}</div> | |
<div class="extra-info">published: {md["date_published"]}</div> | |
<div class="extra-info-paperid">id: {md["paperid"]}</div> | |
</div> | |
""" | |
return html | |
def make_paper_cards(tags=None): | |
mds = get_papers_metadata() | |
tags = tags.split(",") | |
tags = [tag.strip() for tag in tags] | |
if tags is not None: | |
mds = [md for md in mds if any(tag in md["tags"] for tag in tags)] | |
htmls = [make_paper_card(md) for md in mds] | |
fin = "<div class='caaard-containers'>" | |
for html in htmls: | |
fin += html + "<br>" | |
fin += "</div>" | |
return fin | |
def get_paper_markdown(paperid): | |
allpapers = get_papers_metadata() | |
fname = None | |
for paper in allpapers: | |
if paper["paperid"] == paperid: | |
fname = paper["fname"] | |
break | |
if fname is None: | |
return "## paper not found" | |
else: | |
paper = fs.read(fname).split("---")[2] | |
return paper | |
def publish_paper(title, authors, tags, abst, data): | |
paperid = "" | |
year = datetime.datetime.now().year | |
month = datetime.datetime.now().month | |
if month < 10: | |
month = "0"+str(month) | |
day = datetime.datetime.now().day | |
if day < 10: | |
day = "0"+str(day) | |
idx = 1 | |
while True: | |
paperid = f"{year}-{month}{day}.{idx}" | |
if not fs.exists(datasetdir+paperid+".md"): | |
break | |
idx += 1 | |
if idx > 100: | |
return "could not generate paperid, try again tomorrow" | |
bad_chars = "<>:|\\" # primitive anti-xss sanitization | |
for c in bad_chars: | |
title = title.replace(c, "") | |
authors = authors.replace(c, "") | |
tags = tags.replace(c, "") | |
abst = abst.replace(c, "") | |
metadata = f"""--- | |
title: {title} | |
author: {authors} | |
tags: {tags} | |
abstract: {abst} | |
date_published: {year}-{month}-{day} | |
paperid: {paperid} | |
---\n""" | |
with fs.open(datasetdir+paperid+".md", "w") as f: | |
raw = metadata + data | |
f.write(raw) | |
def makepreview(x): | |
return x | |
with gr.Blocks(css=basecss, js=jscode, theme='NoCrypt/miku') as demo: | |
with gr.Tab("search"): | |
with gr.Row(): | |
query = gr.Textbox(label="tags (optional, comma seperated)", lines=1, interactive=True) | |
searchbutton = gr.Button("search") | |
with gr.Row(): | |
papercards = gr.HTML() | |
with gr.Tab("read"): | |
with gr.Row(): | |
paperid = gr.Textbox(label="paper id", lines=1, interactive=True) | |
readbutton = gr.Button("read") | |
with gr.Row(): | |
paper = gr.Markdown() | |
with gr.Tab("publish"): | |
with gr.Row(): | |
title = gr.Textbox(label="title", lines=1, interactive=True) | |
authors = gr.Textbox(label="author(s)", lines=1, interactive=True) | |
with gr.Row(): | |
tags = gr.Textbox(label="tags (optional, comma seperated)", lines=1, interactive=True) | |
abst = gr.Textbox(label="abriged abstract (aka tooltip)", lines=2, interactive=True) | |
markd = gr.Textbox(label="markdown", lines=10, interactive=True, max_lines=1e3) | |
preview = gr.Markdown() | |
with gr.Row(): | |
status = gr.Textbox(label="status", lines=1, interactive=False) | |
publishbutton = gr.Button("publish") | |
markd.change(fn=makepreview, inputs=markd, outputs=preview) | |
publishbutton.click(fn=publish_paper, inputs=[title, authors, tags, abst, markd], outputs=status) | |
searchbutton.click(fn=make_paper_cards, inputs=query, outputs=papercards) | |
readbutton.click(fn=get_paper_markdown, inputs=paperid, outputs=paper) | |
demo.launch() |