|
import requests |
|
import xml.etree.ElementTree as ET |
|
import re |
|
from fuzzywuzzy import fuzz |
|
from semanticscholar import SemanticScholar |
|
import gradio as gr |
|
|
|
def normalize_name(name): |
|
return re.sub(r'\W+', '', name.lower()) |
|
|
|
def create_bibtex_key(authors, year, title): |
|
first_author = authors.split(" and ")[0] |
|
surname = first_author.split()[-1].lower() |
|
surname_normalized = normalize_name(surname) |
|
first_word_of_title = title.split()[0].lower() |
|
return f"{surname_normalized}_{year}_{first_word_of_title}" |
|
|
|
def is_title_match(input_title, db_title): |
|
return fuzz.ratio(input_title.lower(), db_title.lower()) > 90 |
|
|
|
def get_crossref_bibtex(title, rows=1): |
|
url = "https://api.crossref.org/works" |
|
params = {"query.title": title, "rows": rows} |
|
|
|
response = requests.get(url, params=params) |
|
if response.status_code == 200: |
|
data = response.json() |
|
if data["message"]["items"]: |
|
for item in data["message"]["items"]: |
|
db_title = item["title"][0] |
|
if is_title_match(title, db_title): |
|
doi = item["DOI"] |
|
authors = " and ".join([author["given"] + " " + author["family"] for author in item["author"]]) |
|
journal = item.get("container-title", ["Unknown"])[0] |
|
year = item.get("issued", {}).get("date-parts", [[None]])[0][0] |
|
title = item["title"][0] |
|
bibtex_key = create_bibtex_key(authors, year, title) |
|
|
|
bibtex_entry = f"""@article{{{bibtex_key}, |
|
author = {{{authors}}}, |
|
title = {{{title}}}, |
|
journal = {{{journal}}}, |
|
year = {{{year}}}, |
|
doi = {{{doi}}}, |
|
url = {{{'https://doi.org/' + doi}}} |
|
}}""" |
|
return bibtex_entry |
|
return None |
|
return None |
|
|
|
def get_arxiv_bibtex(title, max_results=1): |
|
url = "http://export.arxiv.org/api/query" |
|
params = {"search_query": f"ti:{title}", "start": 0, "max_results": max_results} |
|
print(f"Querying arXiv with params: {params}") |
|
|
|
response = requests.get(url, params=params) |
|
if response.status_code == 200: |
|
data = response.text |
|
root = ET.fromstring(data) |
|
namespace = "{http://www.w3.org/2005/Atom}" |
|
|
|
|
|
entries = root.findall(f"{namespace}entry") |
|
if not entries: |
|
return "No entries found in arXiv." |
|
|
|
for entry in entries: |
|
arxiv_title = entry.find(f"{namespace}title").text.strip() |
|
print(f"Found arXiv title: {arxiv_title} - Input title: {title}") |
|
|
|
if is_title_match(title, arxiv_title): |
|
|
|
arxiv_id = entry.find(f"{namespace}id").text.split('/abs/')[-1] |
|
arxiv_url = f"https://arxiv.org/abs/{arxiv_id}" |
|
authors = [author.find(f"{namespace}name").text for author in entry.findall(f"{namespace}author")] |
|
author_str = " and ".join(authors) |
|
published = entry.find(f"{namespace}published").text[:4] |
|
bibtex_key = create_bibtex_key(author_str, published, arxiv_title) |
|
|
|
|
|
bibtex_entry = f"""@article{{{bibtex_key}, |
|
author = {{{author_str}}}, |
|
title = {{{arxiv_title}}}, |
|
journal = {{arXiv preprint arXiv:{arxiv_id}}}, |
|
year = {{{published}}}, |
|
url = {{{arxiv_url}}} |
|
}}""" |
|
return bibtex_entry |
|
|
|
return None |
|
return None |
|
|
|
def get_semanticscholar_bibtex(title): |
|
sch = SemanticScholar() |
|
try: |
|
papers = sch.search_paper(title) |
|
for paper in papers['data']: |
|
if is_title_match(title, paper['title']): |
|
authors = " and ".join([author['name'] for author in paper['authors']]) |
|
year = paper.get('year', 'Unknown') |
|
journal = paper.get('venue', 'Unknown') |
|
doi = paper.get('doi', 'Unknown') |
|
bibtex_key = create_bibtex_key(authors, year, title) |
|
|
|
bibtex_entry = f"""@article{{{bibtex_key}, |
|
author = {{{authors}}}, |
|
title = {{{paper['title']}}}, |
|
journal = {{{journal}}}, |
|
year = {{{year}}}, |
|
doi = {{{doi}}}, |
|
url = {{{'https://doi.org/' + doi if doi else 'N/A'}}} |
|
}}""" |
|
return bibtex_entry |
|
return None |
|
except Exception as e: |
|
return None |
|
|
|
def get_crossref_bibtex_by_doi(doi): |
|
url = f"https://api.crossref.org/works/{doi}" |
|
|
|
response = requests.get(url) |
|
if response.status_code == 200: |
|
item = response.json()["message"] |
|
authors = " and ".join([author["given"] + " " + author["family"] for author in item["author"]]) |
|
journal = item.get("container-title", ["Unknown"])[0] |
|
year = item.get("issued", {}).get("date-parts", [[None]])[0][0] |
|
title = item["title"][0] |
|
bibtex_key = create_bibtex_key(authors, year, title) |
|
|
|
bibtex_entry = f"""@article{{{bibtex_key}, |
|
author = {{{authors}}}, |
|
title = {{{title}}}, |
|
journal = {{{journal}}}, |
|
year = {{{year}}}, |
|
doi = {{{doi}}}, |
|
url = {{{'https://doi.org/' + doi}}} |
|
}}""" |
|
return bibtex_entry |
|
return "CrossRef request by DOI failed." |
|
|
|
def get_arxiv_bibtex_by_id(arxiv_id): |
|
arxiv_url = f"https://arxiv.org/abs/{arxiv_id}" |
|
url = f"http://export.arxiv.org/api/query?id_list={arxiv_id}" |
|
|
|
response = requests.get(url) |
|
if response.status_code == 200: |
|
data = response.text |
|
root = ET.fromstring(data) |
|
namespace = "{http://www.w3.org/2005/Atom}" |
|
entry = root.find(f"{namespace}entry") |
|
if entry is not None: |
|
arxiv_title = entry.find(f"{namespace}title").text.strip() |
|
authors = [author.find(f"{namespace}name").text for author in entry.findall(f"{namespace}author")] |
|
author_str = " and ".join(authors) |
|
published = entry.find(f"{namespace}published").text[:4] |
|
bibtex_key = create_bibtex_key(author_str, published, arxiv_title) |
|
|
|
bibtex_entry = f"""@article{{{bibtex_key}, |
|
author = {{{author_str}}}, |
|
title = {{{arxiv_title}}}, |
|
journal = {{arXiv preprint arXiv:{arxiv_id}}}, |
|
year = {{{published}}}, |
|
url = {{{arxiv_url}}} |
|
}}""" |
|
return bibtex_entry |
|
return "ArXiv request by ID failed." |
|
|
|
def get_bibtex_for_paper(title=None, doi=None, arxiv_id=None, crossref_rows=1, arxiv_max_results=10): |
|
if doi: |
|
return get_crossref_bibtex_by_doi(doi) |
|
elif arxiv_id: |
|
return get_arxiv_bibtex_by_id(arxiv_id) |
|
elif title: |
|
|
|
bibtex = get_crossref_bibtex(title, crossref_rows) |
|
if bibtex: |
|
return bibtex |
|
else: |
|
print("No peer-reviewed version found in CrossRef.") |
|
|
|
|
|
bibtex = get_semanticscholar_bibtex(title) |
|
if bibtex: |
|
return bibtex |
|
else: |
|
print("No BibTeX entry found in Semantic Scholar.") |
|
|
|
|
|
bibtex = get_arxiv_bibtex(title, arxiv_max_results) |
|
if bibtex: |
|
return bibtex |
|
else: |
|
print("No arXiv preprint found.") |
|
|
|
return "No BibTeX entry found for this paper." |
|
|
|
|
|
def gradio_app(title=None, doi=None, arxiv_id=None): |
|
return get_bibtex_for_paper(title=title, doi=doi, arxiv_id=arxiv_id) |
|
|
|
|
|
interface = gr.Interface( |
|
fn=gradio_app, |
|
inputs=[ |
|
gr.Textbox(placeholder="Enter Title", label="Title"), |
|
gr.Textbox(placeholder="Enter DOI", label="DOI"), |
|
gr.Textbox(placeholder="Enter ArXiv ID", label="ArXiv ID"), |
|
], |
|
outputs=gr.Code(label="BibTeX Entry"), |
|
title="BibTeX Generator", |
|
description="Provide a paper title, DOI, or ArXiv ID to retrieve the BibTeX entry.", |
|
examples=[ |
|
["Attention is All You Need", None, None], |
|
[None, "10.1016/S0021-9258(19)52451-6", None], |
|
[None, None, "1706.03762"], |
|
] |
|
|
|
) |
|
|
|
if __name__ == "__main__": |
|
interface.launch() |
|
|