Spaces:

drvikasgaur
/

Pubmed-search-app

Running

File size: 6,695 Bytes

5d3056e

import gradio as gr
import requests
import pandas as pd
import io
from docx import Document
import tempfile
import os
os.system("pip install python-docx")


API_BASE_URL = "https://pubmed-api-jwfq.onrender.com/search_pubmed"

global_df = None  # Global variable to store search results for export

def fetch_pubmed_articles(query, max_results=10, page=1, sort_by="Year", filter_journal="All", min_year=None, max_year=None):
    """
    Fetches PubMed articles and applies sorting and filtering.
    """
    try:
        url = f"{API_BASE_URL}?query={query}&max_results={max_results}&page={page}"
        response = requests.get(url)

        if response.status_code != 200:
            return f"⚠️ API Error: {response.status_code} - {response.text}", None

        articles = response.json()

        if not articles:
            return "No articles found for this query.", None

        for article in articles:
            try:
                article["Year"] = int(article["Year"])
            except:
                article["Year"] = 0

        # Apply journal filtering
        if filter_journal and filter_journal != "All":
            articles = [a for a in articles if filter_journal.lower() in a['Journal'].lower()]

        # Apply year filtering
        if min_year:
            articles = [a for a in articles if a["Year"] >= int(min_year)]
        if max_year:
            articles = [a for a in articles if a["Year"] <= int(max_year)]

        # Apply sorting
        if sort_by == "Year":
            articles.sort(key=lambda x: x["Year"], reverse=True)
        elif sort_by == "Title":
            articles.sort(key=lambda x: x["Title"])
        elif sort_by == "Journal":
            articles.sort(key=lambda x: x["Journal"])

        # Format results
        formatted_results = []
        for article in articles:
            formatted_results.append(
                f"## 📰 {article['Title']}\n"
                f"📖 **<span style='color:blue'>{article['Journal']}</span>** ({article['Year']})\n"
                f"👨‍🔬 **<span style='color:gray'>{article['Authors']}</span>**\n"
                f"🔗 [Read on PubMed]({article['PubMed_URL']})\n\n"
                f"<details><summary>📄 **Show Abstract**</summary>\n{article['Abstract']}\n</details>"
                f"\n---\n"
            )

        df = pd.DataFrame(articles)
        return "\n\n".join(formatted_results), df

    except Exception as e:
        return f"⚠️ Error fetching data: {str(e)}", None



def export_results(df, format_type):
    """
    Exports search results as a CSV or DOCX file.
    - Returns the file path instead of BytesIO to avoid TypeError in Gradio.
    """
    if df is None or df.empty:
        return None

    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=f".{format_type.lower()}")
    temp_file_path = temp_file.name  # Store the temporary file path

    if format_type == "CSV":
        df.to_csv(temp_file_path, index=False)
    elif format_type == "DOCX":
        doc = Document()
        doc.add_heading("PubMed Search Results", level=1)
        for _, row in df.iterrows():
            doc.add_heading(row["Title"], level=2)
            doc.add_paragraph(f"📖 Journal: {row['Journal']} ({row['Year']})")
            doc.add_paragraph(f"👨‍🔬 Authors: {row['Authors']}")
            doc.add_paragraph(f"🔗 Link: {row['PubMed_URL']}")
            doc.add_paragraph(f"📄 Abstract: {row['Abstract']}")
            doc.add_paragraph("---")
        doc.save(temp_file_path)

    temp_file.close()  # Close the file before returning the path
    return temp_file_path  # Return file path instead of BytesIO


with gr.Blocks() as app:
    gr.Markdown("""
    # 🔍 **PubMed Search Tool with Advanced Features**

    ## 📖 **How to Use This App**
    1️⃣ **Enter a Search Query** *(e.g., "Deep Learning in Psychiatry")*
    2️⃣ **Set the Number of Results & Page Number** *(Default: 10 results per page)*
    3️⃣ **Choose Sorting Option** *(Year, Title, or Journal - Default: Year)*
    4️⃣ **(Optional) Filter by Journal Name** *(e.g., "Nature", "JAMA")*
    5️⃣ **(Optional) Filter by Year Range** *(Set min & max year, e.g., 2015 - 2023)*
    6️⃣ **Click "🔍 Search" to fetch results**
    7️⃣ **Click "📂 Export as CSV" or "📄 Export as Word DOCX" to save articles**
    8️⃣ **Click "📄 Show Abstract" under each result to expand full abstract**

    ## ⚠️ **Important Notes**
    - **Sorting & Filtering can be combined** *(e.g., show only "Nature" articles from 2020-2024, sorted by Title)*
    
    """)

    with gr.Row():
        query_input = gr.Textbox(label="🔎 Search Query", placeholder="Enter topic (e.g., 'Neural Networks in Psychiatry')", lines=1)

    with gr.Row():
        max_results_input = gr.Slider(1, 50, value=10, step=1, label="📄 Number of Results per Page")
        page_input = gr.Slider(1, 200, value=1, step=1, label="📄 Page Number")

    with gr.Row():
        sort_input = gr.Dropdown(choices=["Year", "Title", "Journal"], value="Year", label="🔄 Sort By")
        journal_filter_input = gr.Textbox(label="🎯 Filter by Journal (Optional)", placeholder="Enter journal name or leave blank")

    with gr.Row():
        min_year_input = gr.Number(label="📅 Min Year", value=None)
        max_year_input = gr.Number(label="📅 Max Year", value=None)

    with gr.Row():
        search_button = gr.Button("🔍 Search")
        export_csv_button = gr.Button("📂 Export as CSV")
        export_docx_button = gr.Button("📄 Export as Word DOCX")

    results_output = gr.HTML()
    export_csv_output = gr.File(label="Download CSV")
    export_docx_output = gr.File(label="Download Word DOCX")

    def search_and_display(query, max_results, page, sort_by, journal_filter, min_year, max_year):
        global global_df
        result_text, df = fetch_pubmed_articles(query, max_results, page, sort_by, journal_filter, min_year, max_year)
        global_df = df
        return result_text

    def export_csv():
        if global_df is not None:
            return export_results(global_df, "CSV")

    def export_docx():
        if global_df is not None:
            return export_results(global_df, "DOCX")

    search_button.click(search_and_display,
                        inputs=[query_input, max_results_input, page_input, sort_input, journal_filter_input, min_year_input, max_year_input],
                        outputs=results_output)

    export_csv_button.click(export_csv, outputs=export_csv_output)
    export_docx_button.click(export_docx, outputs=export_docx_output)

if __name__ == "__main__":
    app.launch(inbrowser=True)