Spaces:
Running
Running
import logging | |
import pandas as pd | |
import gradio as gr | |
from pymed import PubMed | |
import urllib.parse | |
import urllib.request | |
import ipywidgets as widgets | |
def search_pubmed(search_term, keywords, max_results, tool, email): | |
# Validate the input | |
if max_results is None or max_results < 1: | |
raise ValueError("Max Results must be a positive integer") | |
# Connect to PubMed database | |
pubmed = PubMed(tool=tool, email=email) | |
results = pubmed.query(search_term, max_results=max_results) | |
# Prepare the lists to store article information | |
articleList = [] | |
articleInfo = [] | |
# Try to retrieve the articles and process them | |
try: | |
for article in results: | |
articleDict = article.toDict() | |
articleList.append(articleDict) | |
except Exception as e: | |
# Log the error if it occurs | |
logging.error("Error while processing articles: {}".format(e)) | |
raise | |
# Store the information of each article in articleInfo | |
for article in articleList: | |
pubmedId = article['pubmed_id'].partition('\n')[0] | |
articleInfo.append({u'pubmed_id': pubmedId, | |
u'title': article['title'], | |
u'abstract': article['abstract'] | |
}) | |
# Convert the article information to a Pandas dataframe | |
cardio_abstract = pd.DataFrame.from_dict(articleInfo) | |
# Filter the dataframe based on the selected keywords | |
cardio_abstract = cardio_abstract[keywords] | |
# Return the filtered dataframe | |
return cardio_abstract | |
def download_csv(b): | |
download_button.description = "Downloading..." | |
download_button.disabled = True | |
input_dict = interface.process(raw=False) | |
search_term = input_dict["Search Term"] | |
keywords = input_dict["Keywords"] | |
max_results = input_dict["Max Results"] | |
dataframe = search_pubmed(search_term, keywords, max_results) | |
dataframe.to_csv("pubmed_results.csv", index=False) | |
download_button.description = "Download CSV" | |
download_button.disabled = False | |
inputs = [gr.inputs.Textbox(label="Search Term"), | |
gr.inputs.Checkbox(["pubmed_id", "title", "abstract"], label="Keywords"), | |
gr.inputs.Slider(minimum=1, maximum=10000, default=100, label="Max Results")] | |
outputs = [gr.outputs.Dataframe(type="pandas")] | |
interface = gr.Interface(search_pubmed, inputs, outputs, title="PubMed Search") | |
result = interface.launch(share=True) | |
download_button = widgets.Button(description="Download CSV") | |
download_button.on_click(download_csv) | |
display(download_button) |