Spaces:
Running
Running
File size: 7,034 Bytes
5ad8cbc acfde36 5ad8cbc acfde36 5ad8cbc e52f41a 5ad8cbc 1f871fc 5ad8cbc 3cc8ac8 5ad8cbc f1ef237 5ad8cbc b005dc6 5ad8cbc b005dc6 52fdc4e 5ad8cbc acfde36 5ad8cbc a7c0e1b 5ad8cbc 956ed72 c8821e9 5ad8cbc d6e3fde 5ad8cbc f602886 2cddf47 5cbff0e 2cddf47 96e61f3 8c3c798 5ad8cbc 2fa4815 5ad8cbc 2fa4815 5ad8cbc be6e01f 5ad8cbc be6e01f 5ad8cbc 2cddf47 220e98c 2cddf47 5cbff0e be6e01f 5ad8cbc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 |
import huggingface_hub as hf
import gradio as gr
import os, datetime
fs = hf.HfFileSystem(token=os.environ["HF_TOKEN"])
datasetdir = "datasets/yoinked/blue-arxiv-papers/"
basecss = """
.caaard-container {
width: 250px;
padding: 20px;
border: 3px solid black;
border-radius: 15px;
text-align: left;
}
.title {
font-size: 24px;
margin-bottom: 10px;
text-align: center;
}
.caaard-containers {
display: flex; gap: 20px; flex-wrap: wrap;
}
.extra-info {
font-size: 14px;
line-height: 1.5;
}
.extra-info-paperid {
font-size: 18px;
line-height: 1.75;
}"""
def get_papers():
return reversed(fs.glob(datasetdir+"*.md"))
def get_papers_metadata(papiers=None):
metadatas = []
if papiers is None:
papiers = get_papers()
for paper in papiers:
with fs.open(paper, "r") as f:
papertxt = f.read()
metadata = papertxt.split("---")[1]
try:
author = metadata.split("author: ")[1].split("\n")[0]
except:
author = "unknown"
try:
title = metadata.split("title: ")[1].split("\n")[0]
except:
title = "unknown"
try:
tags = metadata.split("tags: ")[1].split("\n")[0].split(", ")
except:
tags = []
try:
abstract = metadata.split("abstract: ")[1].split("\n")[0]
except:
abstract = "unknown"
try:
date_published = metadata.split("date_published: ")[1].split("\n")[0]
except:
date_published = "unknown"
try:
paperid = metadata.split("paperid: ")[1].split("\n")[0] #if this fails then no reason to display
md = {"fname": paper, "metadata": metadata, "author": author, "title": title, "tags": tags, "abstract": abstract, "date_published": date_published, "paperid": paperid}
metadatas.append(md)
except:
pass
return metadatas
def make_paper_card(md):
html = f"""
<div class="caaard-container" title="{md["abstract"]}">
<div class="title">{md["title"]}</div>
<br><br>
<div class="extra-info">author: {md["author"]}</div>
<div class="extra-info">published: {md["date_published"]}</div>
<div class="extra-info-paperid">id: {md["paperid"]}</div>
</div>
"""
return html
def make_paper_cards(tags=""):
mds = get_papers_metadata()
if tags != "":
tags = tags.split(",")
tags = [tag.strip() for tag in tags]
mds = [md for md in mds if any(tag in md["tags"] for tag in tags)]
htmls = [make_paper_card(md) for md in mds]
fin = "<div class='caaard-containers'>"
for html in htmls:
fin += html + "<br>"
fin += "</div>"
return fin
def get_paper_markdown(paperid):
allpapers = get_papers_metadata()
fname = None
for paper in allpapers:
if paper["paperid"] == paperid:
fname = paper["fname"]
break
print(fname, paperid)
if fname is None:
return "## paper not found"
else:
with fs.open(fname, "r") as f:
papertxt = f.read()
return papertxt.split("---")[2]
def publish_paper(title, authors, tags, abst, data):
paperid = ""
year = datetime.datetime.now().year
month = datetime.datetime.now().month
if month < 10:
month = "0"+str(month)
day = datetime.datetime.now().day
if day < 10:
day = "0"+str(day)
idx = 1
while True:
paperid = f"{year}-{month}{day}.{idx}"
if not fs.exists(datasetdir+paperid+".md"):
break
idx += 1
if idx > 100:
return "could not generate paperid, try again tomorrow"
bad_chars = "<>:|\\" # primitive anti-xss sanitization
for c in bad_chars:
title = title.replace(c, "")
authors = authors.replace(c, "")
tags = tags.replace(c, "")
abst = abst.replace(c, "")
metadata = f"""---
title: {title}
author: {authors}
tags: {tags}
abstract: {abst}
date_published: {year}-{month}-{day}
paperid: {paperid}
---\n"""
with fs.open(datasetdir+paperid+".md", "w") as f:
raw = metadata + data
f.write(raw)
def makepreview(x):
return x
def upload(prefix, fname, ext, file):
fname = prefix+"-"+fname
with fs.open(datasetdir+"uploads/"+fname+"."+ext, "wb") as f:
f.write(file)
return f"uploaded, use https://huggingface.co/datasets/yoinked/blue-arxiv-papers/resolve/main/uploads/{fname}.{ext} to include in your paper (so like ) for inline img"
with gr.Blocks(css=basecss, theme='NoCrypt/miku') as demo:
gr.Image("./blue-arxiv.png", container=False, label=None, interactive=False, show_fullscreen_button=False, show_share_button=False, show_download_button=False)
with gr.Tab("search"):
with gr.Row():
query = gr.Textbox(label="tags (optional, comma seperated)", lines=1, interactive=True)
searchbutton = gr.Button("π")
with gr.Row():
papercards = gr.HTML("Click the π to load all papers!")
with gr.Tab("read"):
with gr.Row():
paperid = gr.Textbox(label="paper id", lines=1, interactive=True)
readbutton = gr.Button("read")
with gr.Row():
paper = gr.Markdown()
with gr.Tab("publish"):
with gr.Row():
title = gr.Textbox(label="title", lines=1, interactive=True)
authors = gr.Textbox(label="author(s)", lines=1, interactive=True)
with gr.Row():
tags = gr.Textbox(label="tags (optional, comma seperated)", lines=1, interactive=True)
abst = gr.Textbox(label="abriged abstract (aka tooltip)", lines=2, interactive=True)
markd = gr.Textbox(label="markdown", lines=10, interactive=True, max_lines=1e3)
preview = gr.Markdown()
with gr.Row():
status = gr.Textbox(label="status", lines=1, interactive=False)
publishbutton = gr.Button("publish")
with gr.Tab("files"):
with gr.Row():
prefix = gr.Textbox(label="prefix", lines=1, interactive=True)
file_name = gr.Textbox(label="file name", lines=1, interactive=True)
with gr.Row():
file = gr.File(label="file", file_types=[".png", ".gif", ".webp", ".jpg", ".wav", ".mp3"], type="binary")
fileext = gr.Dropdown(label="filetype", choices=["png", "gif", "webp", "jpg", "wav", "mp3"])
uploadbutton = gr.Button("upload")
statii = gr.Textbox(label="status", interactive=False)
uploadbutton.click(fn=upload, inputs=[prefix, file_name, fileext, file], outputs=statii)
markd.change(fn=makepreview, inputs=markd, outputs=preview)
publishbutton.click(fn=publish_paper, inputs=[title, authors, tags, abst, markd], outputs=status)
searchbutton.click(fn=make_paper_cards, inputs=query, outputs=papercards)
readbutton.click(fn=get_paper_markdown, inputs=paperid, outputs=paper)
demo.launch() |