import requests import pprint import json import os import gradio as gr import requests from bs4 import BeautifulSoup def get_dblp_bibref(title): print(f'DBLP query: {title}') try: # Replace spaces in the title with '+' title = title.replace(' ', '+') # Send a GET request to the DBLP search page with the paper title response = requests.get(f'https://dblp.org/search/publ/api?q={title}&format=xml') soup = BeautifulSoup(response.content, 'lxml') # Get the URL of the first paper in the search results url = soup.select_one('url').text + '.bib' response = requests.get(url) paper_link = soup.select_one('url').text + '.html' return response.text, paper_link except Exception as e: return f'Error during get bibref from DBLP: {e}', None # set pprint width pp = pprint.PrettyPrinter(width=128) API_KEY = 'eRLnjZeWSs4gHjSemy5af1X7IbugACFg1tSX6F3R' FIELDS = "paperId,title,url,year,authors,venue,abstract,citationCount,openAccessPdf,fieldsOfStudy,publicationDate,citations,references" # def get_name_mapping(venues_data='/nfs/delong/data/s2orc/s2ag_full/publication-venues'): # name_mapping = {} # from full name to abbreviated name # for file in os.listdir(venues_data): # with open(os.path.join(venues_data, file), 'r') as f: # venues = [json.loads(line) for line in f.readlines()] # print(f"Total number of venues in {file}: {len(venues)}") # for venue in venues: # if len(venue['alternate_names'])>0: # # name_mapping[venue['name']] = venue['alternate_names'][0] # # instead of using the first alternate name, use the shortest one # name_mapping[venue['name']] = min(venue['alternate_names'], key=len) # name_mapping['Neural Information Processing Systems'] = 'NeurIPS' # print(f'loaded {len(name_mapping)} venues from {venues_data}') # return name_mapping # name_mapping = get_name_mapping() # json.dump(name_mapping, open('name_mapping.json', 'w'), indent=4) name_mapping = json.load(open('name_mapping.json', 'r')) print(f'loaded {len(name_mapping)} venues from name_mapping.json') def search_paper_title_semanticscholar(title): url = "https://api.semanticscholar.org/graph/v1/paper/search" headers = {"Accept": "application/json", "x-api-key": API_KEY} params = {"query": title, "limit": 1} response = requests.get(url, headers=headers, params=params) if response.status_code == 200: data = response.json() if data['total']!=0: paper_id = data['data'][0]['paperId'] url = f"https://api.semanticscholar.org/graph/v1/paper/{paper_id}" params = {"fields": FIELDS} response = requests.get(url, headers=headers, params=params) if response.status_code == 200: data = response.json() return data else: print(f"Error: {response.status_code}") return None else: print("No paper found with the given title.") return None else: print(f"Error: {response.status_code}") return None def get_abbreviated_venue(name): if name in name_mapping: return name_mapping[name] else: return name def get_md_citation(paper_info): # citation_str = paper_info['authors'][0]['name'] + " *et al.* " # citation_str = ', '.join([author['name'] for author in paper_info['authors']]) + '. ' citation_str = '' for author in paper_info['authors'][:5]: citation_str += f"{author['name']}, " if len(paper_info['authors'])>5: citation_str += '*et al.* ' else: citation_str = citation_str[:-2] + '. ' citation_str += f"[{paper_info['title']}]({paper_info['url']}). " citation_str += f"*{get_abbreviated_venue(paper_info['venue'])}*" # citation_str += f" ({paper_info['year']})." citation_str += f" ({paper_info['publicationDate'][:-3].replace('-', '.')})." return citation_str def summarize_paper_info(paper_info): info_str = "" # info_str += f"**Venue**: {paper_info['venue']}\n\n" author_str = '' for author in paper_info['authors']: author_str += f"[{author['name']}](https://www.semanticscholar.org/author/{author['authorId']}), " author_str = author_str[:-2] info_str += f"**Authors**:\n\n{author_str}\n\n" info_str += f"\n\n> **Abstract**: {paper_info['abstract']}\n\n" info_str += f"**Citation Count**: {paper_info['citationCount']}\n\n" return info_str def get_output(title): print(f"Title query: {title}") paper_info = search_paper_title_semanticscholar(title) if paper_info is not None: citation_str = get_md_citation(paper_info) else: citation_str = "No paper found with that title." bibtex, dblp_link = get_dblp_bibref(paper_info['title']) citation_str = f""" ```text {paper_info['title']} ``` {citation_str} --- **Markdown source code** ```markdown {citation_str} ``` **BibTex** ```bibtex {bibtex} ``` {summarize_paper_info(paper_info)} --- 🔗 [[Open in Semantic Scholar]](https://www.semanticscholar.org/paper/{paper_info['paperId']}) | [[DBLP Page]]({dblp_link}) """ print(citation_str) return citation_str def main(): iface = gr.Interface( fn=get_output, inputs=gr.components.Textbox( lines=1, label="Please input the title of the paper to get its citation.", placeholder="Your title here", autofocus=True, ), outputs="markdown", allow_flagging='never', title="Citation Tool", description="### Search paper title from [Semantic Scholar](https://www.semanticscholar.org/) and [DBLP](http://dblp.org/), and get structured citation.", ) iface.launch() if __name__=="__main__": main()