Spaces:
Sleeping
Sleeping
Commit
·
ea4d0fa
1
Parent(s):
eae6163
Add code for querying and displaying papers
Browse files
app.py
ADDED
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import httpx
|
2 |
+
from cytoolz import groupby
|
3 |
+
from functools import lru_cache
|
4 |
+
from rich import print
|
5 |
+
from functools import partial
|
6 |
+
import gradio as gr
|
7 |
+
from typing import Optional
|
8 |
+
|
9 |
+
|
10 |
+
def query_author(author_name: str):
|
11 |
+
url = f"https://api.semanticscholar.org/graph/v1/author/search?query={author_name}&fields=name,url,externalIds,papers.externalIds,papers.title,papers.year"
|
12 |
+
resp = httpx.get(url)
|
13 |
+
resp.raise_for_status()
|
14 |
+
return resp.json()["data"]
|
15 |
+
|
16 |
+
|
17 |
+
def get_arxiv_paper(papers):
|
18 |
+
papers_with_externalIds = [paper for paper in papers if paper.get("externalIds")]
|
19 |
+
return [
|
20 |
+
paper for paper in papers_with_externalIds if paper["externalIds"].get("ArXiv")
|
21 |
+
]
|
22 |
+
|
23 |
+
|
24 |
+
def check_arxiv_in_papers(arxiv_ids, papers):
|
25 |
+
papers_with_externalIds = [paper for paper in papers if paper.get("externalIds")]
|
26 |
+
papers_with_arxiv_ids = [
|
27 |
+
paper for paper in papers_with_externalIds if paper["externalIds"].get("ArXiv")
|
28 |
+
]
|
29 |
+
return any(
|
30 |
+
paper
|
31 |
+
for paper in papers_with_arxiv_ids
|
32 |
+
if paper["externalIds"].get("ArXiv") in arxiv_ids
|
33 |
+
)
|
34 |
+
|
35 |
+
|
36 |
+
def get_author_from_options(potential_authors, positive_arxiv_ids):
|
37 |
+
return next(
|
38 |
+
(
|
39 |
+
author
|
40 |
+
for author in potential_authors
|
41 |
+
if check_arxiv_in_papers(set(positive_arxiv_ids), author["papers"])
|
42 |
+
),
|
43 |
+
None,
|
44 |
+
)
|
45 |
+
|
46 |
+
|
47 |
+
def sort_by_date(papers):
|
48 |
+
return sorted(papers, key=lambda paper: paper["year"], reverse=True)
|
49 |
+
|
50 |
+
|
51 |
+
@lru_cache()
|
52 |
+
def lookup_hf_paper(arxiv_id):
|
53 |
+
url = f"https://huggingface.co/api/papers/{arxiv_id}"
|
54 |
+
resp = httpx.get(url)
|
55 |
+
return resp.json()
|
56 |
+
|
57 |
+
|
58 |
+
def check_if_index_hf_paper(paper):
|
59 |
+
arxiv_id = paper["externalIds"]["ArXiv"]
|
60 |
+
data = lookup_hf_paper(arxiv_id)
|
61 |
+
return not data.get("error")
|
62 |
+
|
63 |
+
|
64 |
+
def groupby_indexed_by_hf_papers(papers):
|
65 |
+
return groupby(check_if_index_hf_paper, papers)
|
66 |
+
|
67 |
+
|
68 |
+
def check_hf_user_in_authors(paper, hf_user_name):
|
69 |
+
authors = paper["authors"]
|
70 |
+
authors = [author for author in authors if author.get("user")]
|
71 |
+
return any(author["user"]["user"] == hf_user_name for author in authors)
|
72 |
+
|
73 |
+
|
74 |
+
def groupby_hf_user_papers(papers, hf_user_name):
|
75 |
+
check_hf_user_in_authors_partial = partial(
|
76 |
+
check_hf_user_in_authors, hf_user_name=hf_user_name
|
77 |
+
)
|
78 |
+
return groupby(check_hf_user_in_authors_partial, papers)
|
79 |
+
|
80 |
+
|
81 |
+
def get_papers(
|
82 |
+
author_name, positive_arxiv_ids, hf_user_name: Optional[gr.OAuthProfile]
|
83 |
+
):
|
84 |
+
hf_user_name = hf_user_name.preferred_username
|
85 |
+
positive_arxiv_ids = positive_arxiv_ids.split(",")
|
86 |
+
potential_authors = query_author(author_name)
|
87 |
+
author = get_author_from_options(potential_authors, positive_arxiv_ids)
|
88 |
+
papers = get_arxiv_paper(author["papers"])
|
89 |
+
papers = sort_by_date(papers)
|
90 |
+
papers_indexed_by_hf = groupby_indexed_by_hf_papers(papers)
|
91 |
+
# print(papers_indexed_by_hf[True])
|
92 |
+
|
93 |
+
indexed_papers = [
|
94 |
+
lookup_hf_paper(paper["externalIds"]["ArXiv"])
|
95 |
+
for paper in papers_indexed_by_hf[True]
|
96 |
+
]
|
97 |
+
|
98 |
+
already_claimed = groupby_hf_user_papers(indexed_papers, hf_user_name)
|
99 |
+
results = (
|
100 |
+
"# Papers already indexed by Hugging Face which you haven't claimed\n"
|
101 |
+
+ "These papers are already indexed by Hugging Face, but you haven't claimed them yet. You can claim them by clicking on the link and then clicking on the 'Claim' button on the Hugging Face papers page.\n"
|
102 |
+
)
|
103 |
+
for paper in already_claimed[False]:
|
104 |
+
url = f"https://huggingface.co/papers/{paper['id']}"
|
105 |
+
results += f"- [{paper['title']}]({url})\n"
|
106 |
+
return results
|
107 |
+
|
108 |
+
|
109 |
+
hf_user_name = "thomwolf"
|
110 |
+
positive_arxiv_ids = ["1910.01108"]
|
111 |
+
author_name = "Thomas Wolf"
|
112 |
+
|
113 |
+
|
114 |
+
# inputs = [
|
115 |
+
# gr.Textbox("1910.01108", interactive=True),
|
116 |
+
# gr.Textbox("thomwolf", label="HF username", interactive=True),
|
117 |
+
# ]
|
118 |
+
# gr.Interface(get_papers, inputs=inputs, outputs=gr.Markdown()).launch(debug=True)
|
119 |
+
|
120 |
+
with gr.Blocks() as demo:
|
121 |
+
gr.LoginButton()
|
122 |
+
gr.LogoutButton()
|
123 |
+
author_name = gr.Textbox(label="Author name", interactive=True)
|
124 |
+
positive_arxiv_ids = gr.Textbox("1910.01108", label="ArXiv IDs", interactive=True)
|
125 |
+
btn = gr.Button("Get papers")
|
126 |
+
btn.click(get_papers, [author_name, positive_arxiv_ids], gr.Markdown())
|
127 |
+
|
128 |
+
demo.launch(debug=True)
|