bib-me / app.py
Moreno La Quatra
Updated examples
6f41a56
import requests
import xml.etree.ElementTree as ET
import re
from fuzzywuzzy import fuzz
from semanticscholar import SemanticScholar
import gradio as gr
def normalize_name(name):
return re.sub(r'\W+', '', name.lower())
def create_bibtex_key(authors, year, title):
first_author = authors.split(" and ")[0]
surname = first_author.split()[-1].lower()
surname_normalized = normalize_name(surname)
first_word_of_title = title.split()[0].lower()
return f"{surname_normalized}_{year}_{first_word_of_title}"
def is_title_match(input_title, db_title):
return fuzz.ratio(input_title.lower(), db_title.lower()) > 90 # Fuzzy match threshold
def get_crossref_bibtex(title, rows=1):
url = "https://api.crossref.org/works"
params = {"query.title": title, "rows": rows}
response = requests.get(url, params=params)
if response.status_code == 200:
data = response.json()
if data["message"]["items"]:
for item in data["message"]["items"]:
db_title = item["title"][0]
if is_title_match(title, db_title):
doi = item["DOI"]
authors = " and ".join([author["given"] + " " + author["family"] for author in item["author"]])
journal = item.get("container-title", ["Unknown"])[0]
year = item.get("issued", {}).get("date-parts", [[None]])[0][0]
title = item["title"][0]
bibtex_key = create_bibtex_key(authors, year, title)
bibtex_entry = f"""@article{{{bibtex_key},
author = {{{authors}}},
title = {{{title}}},
journal = {{{journal}}},
year = {{{year}}},
doi = {{{doi}}},
url = {{{'https://doi.org/' + doi}}}
}}"""
return bibtex_entry
return None
return None
def get_arxiv_bibtex(title, max_results=1):
url = "http://export.arxiv.org/api/query"
params = {"search_query": f"ti:{title}", "start": 0, "max_results": max_results}
print(f"Querying arXiv with params: {params}")
response = requests.get(url, params=params)
if response.status_code == 200:
data = response.text
root = ET.fromstring(data)
namespace = "{http://www.w3.org/2005/Atom}"
# Find all the entries returned
entries = root.findall(f"{namespace}entry")
if not entries:
return "No entries found in arXiv."
for entry in entries:
arxiv_title = entry.find(f"{namespace}title").text.strip()
print(f"Found arXiv title: {arxiv_title} - Input title: {title}")
if is_title_match(title, arxiv_title):
# Extract relevant information if title matches
arxiv_id = entry.find(f"{namespace}id").text.split('/abs/')[-1]
arxiv_url = f"https://arxiv.org/abs/{arxiv_id}"
authors = [author.find(f"{namespace}name").text for author in entry.findall(f"{namespace}author")]
author_str = " and ".join(authors)
published = entry.find(f"{namespace}published").text[:4]
bibtex_key = create_bibtex_key(author_str, published, arxiv_title)
# Build the BibTeX entry
bibtex_entry = f"""@article{{{bibtex_key},
author = {{{author_str}}},
title = {{{arxiv_title}}},
journal = {{arXiv preprint arXiv:{arxiv_id}}},
year = {{{published}}},
url = {{{arxiv_url}}}
}}"""
return bibtex_entry
return None
return None
def get_semanticscholar_bibtex(title):
sch = SemanticScholar()
try:
papers = sch.search_paper(title)
for paper in papers['data']:
if is_title_match(title, paper['title']):
authors = " and ".join([author['name'] for author in paper['authors']])
year = paper.get('year', 'Unknown')
journal = paper.get('venue', 'Unknown')
doi = paper.get('doi', 'Unknown')
bibtex_key = create_bibtex_key(authors, year, title)
bibtex_entry = f"""@article{{{bibtex_key},
author = {{{authors}}},
title = {{{paper['title']}}},
journal = {{{journal}}},
year = {{{year}}},
doi = {{{doi}}},
url = {{{'https://doi.org/' + doi if doi else 'N/A'}}}
}}"""
return bibtex_entry
return None
except Exception as e:
return None
def get_crossref_bibtex_by_doi(doi):
url = f"https://api.crossref.org/works/{doi}"
response = requests.get(url)
if response.status_code == 200:
item = response.json()["message"]
authors = " and ".join([author["given"] + " " + author["family"] for author in item["author"]])
journal = item.get("container-title", ["Unknown"])[0]
year = item.get("issued", {}).get("date-parts", [[None]])[0][0]
title = item["title"][0]
bibtex_key = create_bibtex_key(authors, year, title)
bibtex_entry = f"""@article{{{bibtex_key},
author = {{{authors}}},
title = {{{title}}},
journal = {{{journal}}},
year = {{{year}}},
doi = {{{doi}}},
url = {{{'https://doi.org/' + doi}}}
}}"""
return bibtex_entry
return "CrossRef request by DOI failed."
def get_arxiv_bibtex_by_id(arxiv_id):
arxiv_url = f"https://arxiv.org/abs/{arxiv_id}"
url = f"http://export.arxiv.org/api/query?id_list={arxiv_id}"
response = requests.get(url)
if response.status_code == 200:
data = response.text
root = ET.fromstring(data)
namespace = "{http://www.w3.org/2005/Atom}"
entry = root.find(f"{namespace}entry")
if entry is not None:
arxiv_title = entry.find(f"{namespace}title").text.strip()
authors = [author.find(f"{namespace}name").text for author in entry.findall(f"{namespace}author")]
author_str = " and ".join(authors)
published = entry.find(f"{namespace}published").text[:4]
bibtex_key = create_bibtex_key(author_str, published, arxiv_title)
bibtex_entry = f"""@article{{{bibtex_key},
author = {{{author_str}}},
title = {{{arxiv_title}}},
journal = {{arXiv preprint arXiv:{arxiv_id}}},
year = {{{published}}},
url = {{{arxiv_url}}}
}}"""
return bibtex_entry
return "ArXiv request by ID failed."
def get_bibtex_for_paper(title=None, doi=None, arxiv_id=None, crossref_rows=1, arxiv_max_results=10):
if doi:
return get_crossref_bibtex_by_doi(doi)
elif arxiv_id:
return get_arxiv_bibtex_by_id(arxiv_id)
elif title:
# Try CrossRef
bibtex = get_crossref_bibtex(title, crossref_rows)
if bibtex:
return bibtex
else:
print("No peer-reviewed version found in CrossRef.")
# Try Semantic Scholar
bibtex = get_semanticscholar_bibtex(title)
if bibtex:
return bibtex
else:
print("No BibTeX entry found in Semantic Scholar.")
# Try arXiv
bibtex = get_arxiv_bibtex(title, arxiv_max_results)
if bibtex:
return bibtex
else:
print("No arXiv preprint found.")
return "No BibTeX entry found for this paper."
# Gradio Interface
def gradio_app(title=None, doi=None, arxiv_id=None):
return get_bibtex_for_paper(title=title, doi=doi, arxiv_id=arxiv_id)
# Create the Gradio interface
interface = gr.Interface(
fn=gradio_app,
inputs=[
gr.Textbox(placeholder="Enter Title", label="Title"),
gr.Textbox(placeholder="Enter DOI", label="DOI"),
gr.Textbox(placeholder="Enter ArXiv ID", label="ArXiv ID"),
],
outputs=gr.Code(label="BibTeX Entry"),
title="BibTeX Generator",
description="Provide a paper title, DOI, or ArXiv ID to retrieve the BibTeX entry.",
examples=[
["Attention is All You Need", None, None],
[None, "10.1016/S0021-9258(19)52451-6", None],
[None, None, "1706.03762"],
]
)
if __name__ == "__main__":
interface.launch()