Spaces:

Gaborandi
/

PubMed_Downloader

Running

File size: 2,565 Bytes

dcb44a1
 
 
 
cfd6947
 
 
dcb44a1
cfd6947
dcb44a1
cfd6947
dcb44a1
 
 
cfd6947
dcb44a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cfd6947
 
 
 
 
dcb44a1

import logging
import pandas as pd
import gradio as gr
from pymed import PubMed
import urllib.parse
import urllib.request
import ipywidgets as widgets

def search_pubmed(search_term, keywords, max_results, tool, email):
    # Validate the input
    if max_results is None or max_results < 1:
        raise ValueError("Max Results must be a positive integer")
    
    # Connect to PubMed database
    pubmed = PubMed(tool=tool, email=email)
    results = pubmed.query(search_term, max_results=max_results)
    
    # Prepare the lists to store article information
    articleList = []
    articleInfo = []

    # Try to retrieve the articles and process them
    try:
        for article in results:
            articleDict = article.toDict()
            articleList.append(articleDict)
    except Exception as e:
        # Log the error if it occurs
        logging.error("Error while processing articles: {}".format(e))
        raise

    # Store the information of each article in articleInfo
    for article in articleList:
        pubmedId = article['pubmed_id'].partition('\n')[0]
        articleInfo.append({u'pubmed_id': pubmedId,
                          u'title': article['title'],
                          u'abstract': article['abstract']
                          })

    # Convert the article information to a Pandas dataframe
    cardio_abstract = pd.DataFrame.from_dict(articleInfo)
    
    # Filter the dataframe based on the selected keywords
    cardio_abstract = cardio_abstract[keywords]
    
    # Return the filtered dataframe
    return cardio_abstract

def download_csv(b):
    download_button.description = "Downloading..."
    download_button.disabled = True
    input_dict = interface.process(raw=False)
    search_term = input_dict["Search Term"]
    keywords = input_dict["Keywords"]
    max_results = input_dict["Max Results"]
    dataframe = search_pubmed(search_term, keywords, max_results)
    dataframe.to_csv("pubmed_results.csv", index=False)
    download_button.description = "Download CSV"
    download_button.disabled = False

inputs = [gr.inputs.Textbox(label="Search Term"),
gr.inputs.Checkbox(["pubmed_id", "title", "abstract"], label="Keywords"),
gr.inputs.Slider(minimum=1, maximum=10000, default=100, label="Max Results")]

outputs = [gr.outputs.Dataframe(type="pandas")]

interface = gr.Interface(search_pubmed, inputs, outputs, title="PubMed Search")

result = interface.launch(share=True)

download_button = widgets.Button(description="Download CSV")
download_button.on_click(download_csv)
display(download_button)