Spaces:

Gaborandi
/

PubMed_Downloader

Running

App Files Files Community

PubMed_Downloader / app.py

Gaborandi

Update app.py

cfd6947 over 2 years ago

raw

history blame

2.57 kB

	import logging
	import pandas as pd
	import gradio as gr
	from pymed import PubMed
	import urllib.parse
	import urllib.request
	import ipywidgets as widgets

	def search_pubmed(search_term, keywords, max_results, tool, email):
	# Validate the input
	if max_results is None or max_results < 1:
	raise ValueError("Max Results must be a positive integer")

	# Connect to PubMed database
	pubmed = PubMed(tool=tool, email=email)
	results = pubmed.query(search_term, max_results=max_results)

	# Prepare the lists to store article information
	articleList = []
	articleInfo = []

	# Try to retrieve the articles and process them
	try:
	for article in results:
	articleDict = article.toDict()
	articleList.append(articleDict)
	except Exception as e:
	# Log the error if it occurs
	logging.error("Error while processing articles: {}".format(e))
	raise

	# Store the information of each article in articleInfo
	for article in articleList:
	pubmedId = article['pubmed_id'].partition('\n')[0]
	articleInfo.append({u'pubmed_id': pubmedId,
	u'title': article['title'],
	u'abstract': article['abstract']
	})

	# Convert the article information to a Pandas dataframe
	cardio_abstract = pd.DataFrame.from_dict(articleInfo)

	# Filter the dataframe based on the selected keywords
	cardio_abstract = cardio_abstract[keywords]

	# Return the filtered dataframe
	return cardio_abstract

	def download_csv(b):
	download_button.description = "Downloading..."
	download_button.disabled = True
	input_dict = interface.process(raw=False)
	search_term = input_dict["Search Term"]
	keywords = input_dict["Keywords"]
	max_results = input_dict["Max Results"]
	dataframe = search_pubmed(search_term, keywords, max_results)
	dataframe.to_csv("pubmed_results.csv", index=False)
	download_button.description = "Download CSV"
	download_button.disabled = False

	inputs = [gr.inputs.Textbox(label="Search Term"),
	gr.inputs.Checkbox(["pubmed_id", "title", "abstract"], label="Keywords"),
	gr.inputs.Slider(minimum=1, maximum=10000, default=100, label="Max Results")]

	outputs = [gr.outputs.Dataframe(type="pandas")]

	interface = gr.Interface(search_pubmed, inputs, outputs, title="PubMed Search")

	result = interface.launch(share=True)

	download_button = widgets.Button(description="Download CSV")
	download_button.on_click(download_csv)
	display(download_button)