import requests import xml.etree.ElementTree as ET import re from fuzzywuzzy import fuzz from semanticscholar import SemanticScholar import gradio as gr def normalize_name(name): return re.sub(r'\W+', '', name.lower()) def create_bibtex_key(authors, year, title): first_author = authors.split(" and ")[0] surname = first_author.split()[-1].lower() surname_normalized = normalize_name(surname) first_word_of_title = title.split()[0].lower() return f"{surname_normalized}_{year}_{first_word_of_title}" def is_title_match(input_title, db_title): return fuzz.ratio(input_title.lower(), db_title.lower()) > 90 # Fuzzy match threshold def get_crossref_bibtex(title, rows=1): url = "https://api.crossref.org/works" params = {"query.title": title, "rows": rows} response = requests.get(url, params=params) if response.status_code == 200: data = response.json() if data["message"]["items"]: for item in data["message"]["items"]: db_title = item["title"][0] if is_title_match(title, db_title): doi = item["DOI"] authors = " and ".join([author["given"] + " " + author["family"] for author in item["author"]]) journal = item.get("container-title", ["Unknown"])[0] year = item.get("issued", {}).get("date-parts", [[None]])[0][0] title = item["title"][0] bibtex_key = create_bibtex_key(authors, year, title) bibtex_entry = f"""@article{{{bibtex_key}, author = {{{authors}}}, title = {{{title}}}, journal = {{{journal}}}, year = {{{year}}}, doi = {{{doi}}}, url = {{{'https://doi.org/' + doi}}} }}""" return bibtex_entry return None return None def get_arxiv_bibtex(title, max_results=1): url = "http://export.arxiv.org/api/query" params = {"search_query": f"ti:{title}", "start": 0, "max_results": max_results} print(f"Querying arXiv with params: {params}") response = requests.get(url, params=params) if response.status_code == 200: data = response.text root = ET.fromstring(data) namespace = "{http://www.w3.org/2005/Atom}" # Find all the entries returned entries = root.findall(f"{namespace}entry") if not entries: return "No entries found in arXiv." for entry in entries: arxiv_title = entry.find(f"{namespace}title").text.strip() print(f"Found arXiv title: {arxiv_title} - Input title: {title}") if is_title_match(title, arxiv_title): # Extract relevant information if title matches arxiv_id = entry.find(f"{namespace}id").text.split('/abs/')[-1] arxiv_url = f"https://arxiv.org/abs/{arxiv_id}" authors = [author.find(f"{namespace}name").text for author in entry.findall(f"{namespace}author")] author_str = " and ".join(authors) published = entry.find(f"{namespace}published").text[:4] bibtex_key = create_bibtex_key(author_str, published, arxiv_title) # Build the BibTeX entry bibtex_entry = f"""@article{{{bibtex_key}, author = {{{author_str}}}, title = {{{arxiv_title}}}, journal = {{arXiv preprint arXiv:{arxiv_id}}}, year = {{{published}}}, url = {{{arxiv_url}}} }}""" return bibtex_entry return None return None def get_semanticscholar_bibtex(title): sch = SemanticScholar() try: papers = sch.search_paper(title) for paper in papers['data']: if is_title_match(title, paper['title']): authors = " and ".join([author['name'] for author in paper['authors']]) year = paper.get('year', 'Unknown') journal = paper.get('venue', 'Unknown') doi = paper.get('doi', 'Unknown') bibtex_key = create_bibtex_key(authors, year, title) bibtex_entry = f"""@article{{{bibtex_key}, author = {{{authors}}}, title = {{{paper['title']}}}, journal = {{{journal}}}, year = {{{year}}}, doi = {{{doi}}}, url = {{{'https://doi.org/' + doi if doi else 'N/A'}}} }}""" return bibtex_entry return None except Exception as e: return None def get_crossref_bibtex_by_doi(doi): url = f"https://api.crossref.org/works/{doi}" response = requests.get(url) if response.status_code == 200: item = response.json()["message"] authors = " and ".join([author["given"] + " " + author["family"] for author in item["author"]]) journal = item.get("container-title", ["Unknown"])[0] year = item.get("issued", {}).get("date-parts", [[None]])[0][0] title = item["title"][0] bibtex_key = create_bibtex_key(authors, year, title) bibtex_entry = f"""@article{{{bibtex_key}, author = {{{authors}}}, title = {{{title}}}, journal = {{{journal}}}, year = {{{year}}}, doi = {{{doi}}}, url = {{{'https://doi.org/' + doi}}} }}""" return bibtex_entry return "CrossRef request by DOI failed." def get_arxiv_bibtex_by_id(arxiv_id): arxiv_url = f"https://arxiv.org/abs/{arxiv_id}" url = f"http://export.arxiv.org/api/query?id_list={arxiv_id}" response = requests.get(url) if response.status_code == 200: data = response.text root = ET.fromstring(data) namespace = "{http://www.w3.org/2005/Atom}" entry = root.find(f"{namespace}entry") if entry is not None: arxiv_title = entry.find(f"{namespace}title").text.strip() authors = [author.find(f"{namespace}name").text for author in entry.findall(f"{namespace}author")] author_str = " and ".join(authors) published = entry.find(f"{namespace}published").text[:4] bibtex_key = create_bibtex_key(author_str, published, arxiv_title) bibtex_entry = f"""@article{{{bibtex_key}, author = {{{author_str}}}, title = {{{arxiv_title}}}, journal = {{arXiv preprint arXiv:{arxiv_id}}}, year = {{{published}}}, url = {{{arxiv_url}}} }}""" return bibtex_entry return "ArXiv request by ID failed." def get_bibtex_for_paper(title=None, doi=None, arxiv_id=None, crossref_rows=1, arxiv_max_results=10): if doi: return get_crossref_bibtex_by_doi(doi) elif arxiv_id: return get_arxiv_bibtex_by_id(arxiv_id) elif title: # Try CrossRef bibtex = get_crossref_bibtex(title, crossref_rows) if bibtex: return bibtex else: print("No peer-reviewed version found in CrossRef.") # Try Semantic Scholar bibtex = get_semanticscholar_bibtex(title) if bibtex: return bibtex else: print("No BibTeX entry found in Semantic Scholar.") # Try arXiv bibtex = get_arxiv_bibtex(title, arxiv_max_results) if bibtex: return bibtex else: print("No arXiv preprint found.") return "No BibTeX entry found for this paper." # Gradio Interface def gradio_app(title=None, doi=None, arxiv_id=None): return get_bibtex_for_paper(title=title, doi=doi, arxiv_id=arxiv_id) # Create the Gradio interface interface = gr.Interface( fn=gradio_app, inputs=[ gr.Textbox(placeholder="Enter Title", label="Title"), gr.Textbox(placeholder="Enter DOI", label="DOI"), gr.Textbox(placeholder="Enter ArXiv ID", label="ArXiv ID"), ], outputs=gr.Code(label="BibTeX Entry"), title="BibTeX Generator", description="Provide a paper title, DOI, or ArXiv ID to retrieve the BibTeX entry.", examples=[ ["Attention is All You Need", None, None], [None, "10.1016/S0021-9258(19)52451-6", None], [None, None, "1706.03762"], ] ) if __name__ == "__main__": interface.launch()