Spaces:

morenolq
/

bib-me

Runtime error

File size: 8,119 Bytes

import requests
import xml.etree.ElementTree as ET
import re
from fuzzywuzzy import fuzz
from semanticscholar import SemanticScholar
import gradio as gr

def normalize_name(name):
    return re.sub(r'\W+', '', name.lower())

def create_bibtex_key(authors, year, title):
    first_author = authors.split(" and ")[0]
    surname = first_author.split()[-1].lower()
    surname_normalized = normalize_name(surname)
    first_word_of_title = title.split()[0].lower()
    return f"{surname_normalized}_{year}_{first_word_of_title}"

def is_title_match(input_title, db_title):
    return fuzz.ratio(input_title.lower(), db_title.lower()) > 90  # Fuzzy match threshold

def get_crossref_bibtex(title, rows=1):
    url = "https://api.crossref.org/works"
    params = {"query.title": title, "rows": rows}
    
    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json()
        if data["message"]["items"]:
            for item in data["message"]["items"]:
                db_title = item["title"][0]
                if is_title_match(title, db_title):
                    doi = item["DOI"]
                    authors = " and ".join([author["given"] + " " + author["family"] for author in item["author"]])
                    journal = item.get("container-title", ["Unknown"])[0]
                    year = item.get("issued", {}).get("date-parts", [[None]])[0][0]
                    title = item["title"][0]
                    bibtex_key = create_bibtex_key(authors, year, title)
                    
                    bibtex_entry = f"""@article{{{bibtex_key},
  author = {{{authors}}},
  title = {{{title}}},
  journal = {{{journal}}},
  year = {{{year}}},
  doi = {{{doi}}},
  url = {{{'https://doi.org/' + doi}}}
}}"""
                    return bibtex_entry
        return None
    return None

def get_arxiv_bibtex(title, max_results=1):
    url = "http://export.arxiv.org/api/query"
    params = {"search_query": f"ti:{title}", "start": 0, "max_results": max_results}
    print(f"Querying arXiv with params: {params}")
    
    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.text
        root = ET.fromstring(data)
        namespace = "{http://www.w3.org/2005/Atom}"

        # Find all the entries returned
        entries = root.findall(f"{namespace}entry")
        if not entries:
            return "No entries found in arXiv."

        for entry in entries:
            arxiv_title = entry.find(f"{namespace}title").text.strip()
            print(f"Found arXiv title: {arxiv_title} - Input title: {title}")
            
            if is_title_match(title, arxiv_title):
                # Extract relevant information if title matches
                arxiv_id = entry.find(f"{namespace}id").text.split('/abs/')[-1]
                arxiv_url = f"https://arxiv.org/abs/{arxiv_id}"
                authors = [author.find(f"{namespace}name").text for author in entry.findall(f"{namespace}author")]
                author_str = " and ".join(authors)
                published = entry.find(f"{namespace}published").text[:4]
                bibtex_key = create_bibtex_key(author_str, published, arxiv_title)
                
                # Build the BibTeX entry
                bibtex_entry = f"""@article{{{bibtex_key},
  author = {{{author_str}}},
  title = {{{arxiv_title}}},
  journal = {{arXiv preprint arXiv:{arxiv_id}}},
  year = {{{published}}},
  url = {{{arxiv_url}}}
}}"""
                return bibtex_entry

        return None
    return None

def get_semanticscholar_bibtex(title):
    sch = SemanticScholar()
    try:
        papers = sch.search_paper(title)
        for paper in papers['data']:
            if is_title_match(title, paper['title']):
                authors = " and ".join([author['name'] for author in paper['authors']])
                year = paper.get('year', 'Unknown')
                journal = paper.get('venue', 'Unknown')
                doi = paper.get('doi', 'Unknown')
                bibtex_key = create_bibtex_key(authors, year, title)

                bibtex_entry = f"""@article{{{bibtex_key},
  author = {{{authors}}},
  title = {{{paper['title']}}},
  journal = {{{journal}}},
  year = {{{year}}},
  doi = {{{doi}}},
  url = {{{'https://doi.org/' + doi if doi else 'N/A'}}}
}}"""
                return bibtex_entry
        return None
    except Exception as e:
        return None

def get_crossref_bibtex_by_doi(doi):
    url = f"https://api.crossref.org/works/{doi}"
    
    response = requests.get(url)
    if response.status_code == 200:
        item = response.json()["message"]
        authors = " and ".join([author["given"] + " " + author["family"] for author in item["author"]])
        journal = item.get("container-title", ["Unknown"])[0]
        year = item.get("issued", {}).get("date-parts", [[None]])[0][0]
        title = item["title"][0]
        bibtex_key = create_bibtex_key(authors, year, title)
        
        bibtex_entry = f"""@article{{{bibtex_key},
  author = {{{authors}}},
  title = {{{title}}},
  journal = {{{journal}}},
  year = {{{year}}},
  doi = {{{doi}}},
  url = {{{'https://doi.org/' + doi}}}
}}"""
        return bibtex_entry
    return "CrossRef request by DOI failed."

def get_arxiv_bibtex_by_id(arxiv_id):
    arxiv_url = f"https://arxiv.org/abs/{arxiv_id}"
    url = f"http://export.arxiv.org/api/query?id_list={arxiv_id}"
    
    response = requests.get(url)
    if response.status_code == 200:
        data = response.text
        root = ET.fromstring(data)
        namespace = "{http://www.w3.org/2005/Atom}"
        entry = root.find(f"{namespace}entry")
        if entry is not None:
            arxiv_title = entry.find(f"{namespace}title").text.strip()
            authors = [author.find(f"{namespace}name").text for author in entry.findall(f"{namespace}author")]
            author_str = " and ".join(authors)
            published = entry.find(f"{namespace}published").text[:4]
            bibtex_key = create_bibtex_key(author_str, published, arxiv_title)
            
            bibtex_entry = f"""@article{{{bibtex_key},
  author = {{{author_str}}},
  title = {{{arxiv_title}}},
  journal = {{arXiv preprint arXiv:{arxiv_id}}},
  year = {{{published}}},
  url = {{{arxiv_url}}}
}}"""
            return bibtex_entry
    return "ArXiv request by ID failed."

def get_bibtex_for_paper(title=None, doi=None, arxiv_id=None, crossref_rows=1, arxiv_max_results=10):
    if doi:
        return get_crossref_bibtex_by_doi(doi)
    elif arxiv_id:
        return get_arxiv_bibtex_by_id(arxiv_id)
    elif title:
        # Try CrossRef
        bibtex = get_crossref_bibtex(title, crossref_rows)
        if bibtex:
            return bibtex
        else:
            print("No peer-reviewed version found in CrossRef.")
        
        # Try Semantic Scholar
        bibtex = get_semanticscholar_bibtex(title)
        if bibtex:
            return bibtex
        else:
            print("No BibTeX entry found in Semantic Scholar.")
            
        # Try arXiv
        bibtex = get_arxiv_bibtex(title, arxiv_max_results)
        if bibtex:
            return bibtex
        else:
            print("No arXiv preprint found.")
            
    return "No BibTeX entry found for this paper."

# Gradio Interface
def gradio_app(title=None, doi=None, arxiv_id=None):
    return get_bibtex_for_paper(title=title, doi=doi, arxiv_id=arxiv_id)

# Create the Gradio interface
interface = gr.Interface(
    fn=gradio_app,
    inputs=[
        gr.Textbox(placeholder="Enter Title", label="Title"),
        gr.Textbox(placeholder="Enter DOI", label="DOI"),
        gr.Textbox(placeholder="Enter ArXiv ID", label="ArXiv ID"),
    ],
    outputs=gr.Code(label="BibTeX Entry"),
    title="BibTeX Generator",
    description="Provide a paper title, DOI, or ArXiv ID to retrieve the BibTeX entry.",
    examples=[
        ["Attention is All You Need", None, None],
        [None, "10.1016/S0021-9258(19)52451-6", None],
        [None, None, "1706.03762"],
    ]

)

if __name__ == "__main__":
    interface.launch()