Spaces:

drvikasgaur
/

Pubmed-search-app

Running

App Files Files Community

drvikasgaur commited on May 17

Commit

53a1484

verified ·

1 Parent(s): 4fe5fbb

Update app.py

Browse files

Files changed (1) hide show

app.py +144 -139

app.py CHANGED Viewed

@@ -1,164 +1,169 @@
 import gradio as gr
-import requests
 import pandas as pd
-import io
 from docx import Document
-import tempfile
-import os
-os.system("pip install python-docx")
-API_BASE_URL = "https://pubmed-api-jwfq.onrender.com/search_pubmed"
-global_df = None  # Global variable to store search results for export
-def fetch_pubmed_articles(query, max_results=10, page=1, sort_by="Year", filter_journal="All", min_year=None, max_year=None):
-    """
-    Fetches PubMed articles and applies sorting and filtering.
-    """
-    try:
-        url = f"{API_BASE_URL}?query={query}&max_results={max_results}&page={page}"
-        response = requests.get(url)
-        if response.status_code != 200:
-            return f"⚠️ API Error: {response.status_code} - {response.text}", None
-        articles = response.json()
-        if not articles:
-            return "No articles found for this query.", None
-        for article in articles:
-            try:
-                article["Year"] = int(article["Year"])
-            except:
-                article["Year"] = 0
-        if filter_journal and filter_journal != "All":
-            articles = [a for a in articles if filter_journal.lower() in a['Journal'].lower()]
-        if min_year:
-            articles = [a for a in articles if a["Year"] >= int(min_year)]
-        if max_year:
-            articles = [a for a in articles if a["Year"] <= int(max_year)]
-        if sort_by == "Year":
-            articles.sort(key=lambda x: x["Year"], reverse=True)
-        elif sort_by == "Title":
-            articles.sort(key=lambda x: x["Title"])
-        elif sort_by == "Journal":
-            articles.sort(key=lambda x: x["Journal"])
-        formatted_results = []
-        for article in articles:
-            formatted_results.append(
-                f"## 📰 {article['Title']}\n"
-                f"📖 **<span style='color:blue'>{article['Journal']}</span>** ({article['Year']})\n"
-                f"👨‍🔬 **<span style='color:gray'>{article['Authors']}</span>**\n"
-                f"🔗 [Read on PubMed]({article['PubMed_URL']})\n\n"
-                f"<details><summary>📄 **Show Abstract**</summary>\n{article['Abstract']}\n</details>"
-                f"\n---\n"
-            )
-        df = pd.DataFrame(articles)
-        return "\n\n".join(formatted_results), df
-    except Exception as e:
-        return f"⚠️ Error fetching data: {str(e)}", None
-def export_results(df, format_type):
     if df is None or df.empty:
         return None
-    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=f".{format_type.lower()}")
-    temp_file_path = temp_file.name
-    if format_type == "CSV":
-        df.to_csv(temp_file_path, index=False)
-    elif format_type == "DOCX":
         doc = Document()
-        doc.add_heading("PubMed Search Results", level=1)
         for _, row in df.iterrows():
-            doc.add_heading(row["Title"], level=2)
-            doc.add_paragraph(f"📖 Journal: {row['Journal']} ({row['Year']})")
-            doc.add_paragraph(f"👨‍🔬 Authors: {row['Authors']}")
-            doc.add_paragraph(f"🔗 Link: {row['PubMed_URL']}")
-            doc.add_paragraph(f"📄 Abstract: {row['Abstract']}")
-            doc.add_paragraph("---")
-        doc.save(temp_file_path)
-    temp_file.close()
-    return temp_file_path
-with gr.Blocks() as app:
-    gr.Markdown("""
-    # 🔍 **PubMed Search Tool with Advanced Features**
-    ## 📖 **How to Use This App**
-    1️⃣ Enter a Search Query (e.g., "Deep Learning in Psychiatry")
-    2️⃣ Set Number of Results & Page
-    3️⃣ Choose Sort Option (Year, Title, Journal)
-    4️⃣ (Optional) Filter by Journal or Year
-    5️⃣ Click **Search** to fetch articles
-    6️⃣ Click **Export as CSV/DOCX** to download
-    """)
-    with gr.Row():
-        query_input = gr.Textbox(label="🔎 Search Query", placeholder="Enter topic (e.g., 'Neural Networks in Psychiatry')")
-    with gr.Row():
-        max_results_input = gr.Slider(1, 50, value=10, step=1, label="📄 Number of Results per Page")
-        page_input = gr.Slider(1, 200, value=1, step=1, label="📄 Page Number")
-    with gr.Row():
-        sort_input = gr.Dropdown(choices=["Year", "Title", "Journal"], value="Year", label="🔄 Sort By")
-        journal_filter_input = gr.Textbox(label="🎯 Filter by Journal (Optional)", placeholder="Enter journal name or leave blank")
     with gr.Row():
-        min_year_input = gr.Number(label="📅 Min Year", value=None)
-        max_year_input = gr.Number(label="📅 Max Year", value=None)
     with gr.Row():
-        search_button = gr.Button("🔍 Search")
-        export_csv_button = gr.Button("📂 Export as CSV")
-        export_docx_button = gr.Button("📄 Export as Word DOCX")
-    results_output = gr.HTML()
-    status_output = gr.Text(label="🔄 Status", interactive=False)
-    export_csv_output = gr.File(label="Download CSV")
-    export_docx_output = gr.File(label="Download Word DOCX")
-    def search_and_display_wrapper(query, max_results, page, sort_by, journal_filter, min_year, max_year):
-        global global_df
-        yield gr.update(value="🔄 Searching PubMed..."), gr.update(value="")
-        result_text, df = fetch_pubmed_articles(query, max_results, page, sort_by, journal_filter, min_year, max_year)
-        global_df = df
-        if df is not None:
-            status = "✅ Done"
-        else:
-            status = "❌ No Results Found"
-        yield gr.update(value=status), gr.update(value=result_text)
-    def export_csv():
-        if global_df is not None:
-            return export_results(global_df, "CSV")
-    def export_docx():
-        if global_df is not None:
-            return export_results(global_df, "DOCX")
     search_button.click(
         fn=search_and_display_wrapper,
         inputs=[query_input, max_results_input, page_input, sort_input, journal_filter_input, min_year_input, max_year_input],
-        outputs=[status_output, results_output]
     )
-    export_csv_button.click(export_csv, outputs=export_csv_output)
-    export_docx_button.click(export_docx, outputs=export_docx_output)
-if __name__ == "__main__":
-    app.launch()

 import gradio as gr
 import pandas as pd
+import requests
 from docx import Document
+from datetime import datetime
+# Function to call the API and return results
+def fetch_pubmed_articles(query, max_results, page, sort_by, journal_filter, min_year, max_year):
+    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
+    api_key = ""  # optional: insert your NCBI API key
+    params = {
+        "db": "pubmed",
+        "term": query,
+        "retmax": max_results,
+        "retstart": max_results * (page - 1),
+        "retmode": "json",
+        "api_key": api_key
+    }
+    response = requests.get(base_url, params=params)
+    if response.status_code != 200:
+        return f"❌ Error: {response.status_code} - {response.reason}", None
+    id_list = response.json().get("esearchresult", {}).get("idlist", [])
+    if not id_list:
+        return "❌ No results found for this query.", None
+    ids = ",".join(id_list)
+    fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
+    fetch_params = {
+        "db": "pubmed",
+        "id": ids,
+        "retmode": "xml",
+        "api_key": api_key
+    }
+    fetch_response = requests.get(fetch_url, params=fetch_params)
+    if fetch_response.status_code != 200:
+        return f"❌ Error fetching details: {fetch_response.status_code} - {fetch_response.reason}", None
+    from xml.etree import ElementTree as ET
+    root = ET.fromstring(fetch_response.content)
+    articles = []
+    for article in root.findall(".//PubmedArticle"):
+        try:
+            title = article.findtext(".//ArticleTitle", default="No Title")
+            abstract = " ".join([abst.text for abst in article.findall(".//AbstractText") if abst.text])
+            journal = article.findtext(".//Journal/Title", default="No Journal")
+            year = article.findtext(".//PubDate/Year")
+            pmid = article.findtext(".//PMID", default="")
+            year = int(year) if year and year.isdigit() else None
+            articles.append({
+                "Title": title,
+                "Abstract": abstract,
+                "Journal": journal,
+                "Year": year,
+                "PMID": pmid
+            })
+        except Exception:
+            continue
+    df = pd.DataFrame(articles)
+    if df.empty:
+        return "❌ No results could be parsed.", None
+    # Filter by journal and year
+    if journal_filter:
+        df = df[df["Journal"].str.contains(journal_filter, case=False, na=False)]
+    if min_year is not None:
+        df = df[df["Year"] >= min_year]
+    if max_year is not None:
+        df = df[df["Year"] <= max_year]
+    if df.empty:
+        return "❌ No results matched filters.", None
+    # Sort
+    if sort_by == "Year":
+        df = df.sort_values(by="Year", ascending=False)
+    elif sort_by == "Title":
+        df = df.sort_values(by="Title", ascending=True)
+    elif sort_by == "Journal":
+        df = df.sort_values(by="Journal", ascending=True)
+    # Markdown result text
+    result_text = ""
+    for _, row in df.iterrows():
+        pmid_url = f"https://pubmed.ncbi.nlm.nih.gov/{row['PMID']}/"
+        result_text += f"### [{row['Title']}]({pmid_url})\n"
+        result_text += f"**Journal:** {row['Journal']}  \n"
+        result_text += f"**Year:** {row['Year']}  \n"
+        result_text += f"**Abstract:** {row['Abstract']}\n\n---\n"
+    return result_text, df
+# Export function
+def export_results(df, file_type):
+    now = datetime.now().strftime("%Y%m%d_%H%M%S")
     if df is None or df.empty:
         return None
+    if file_type == "CSV":
+        path = f"pubmed_results_{now}.csv"
+        df.to_csv(path, index=False)
+        return path
+    elif file_type == "DOCX":
+        path = f"pubmed_results_{now}.docx"
         doc = Document()
+        doc.add_heading("PubMed Search Results", 0)
         for _, row in df.iterrows():
+            doc.add_heading(row["Title"], level=1)
+            doc.add_paragraph(f"Journal: {row['Journal']}")
+            doc.add_paragraph(f"Year: {row['Year']}")
+            doc.add_paragraph(f"PMID: {row['PMID']}")
+            doc.add_paragraph(f"Abstract: {row['Abstract']}")
+            doc.add_paragraph("--------")
+        doc.save(path)
+        return path
+    return None
+# Gradio logic
+def search_and_display_wrapper(query, max_results, page, sort_by, journal_filter, min_year, max_year):
+    yield "🔄 Searching PubMed, please wait...", None
+    result_text, df = fetch_pubmed_articles(query, max_results, page, sort_by, journal_filter, min_year, max_year)
+    yield result_text, df
+with gr.Blocks() as app:
+    gr.Markdown("# 🔍 PubMed Search Tool\nEnter a biomedical research topic to fetch recent articles from PubMed.")
     with gr.Row():
+        query_input = gr.Textbox(label="Search Query", placeholder="e.g., breast cancer treatment", lines=2)
+        max_results_input = gr.Number(label="Max Results", value=20)
+        page_input = gr.Number(label="Page Number", value=1)
     with gr.Row():
+        sort_input = gr.Dropdown(["Year", "Title", "Journal"], value="Year", label="Sort By")
+        journal_filter_input = gr.Textbox(label="Journal Filter (optional)", placeholder="e.g., Nature")
+        min_year_input = gr.Number(label="Min Year (optional)", value=2000)
+        max_year_input = gr.Number(label="Max Year (optional)", value=2025)
+    df_state = gr.State()
+    search_button = gr.Button("🔍 Search PubMed")
+    status_output = gr.Markdown()
+    results_output = gr.Markdown()
     search_button.click(
         fn=search_and_display_wrapper,
         inputs=[query_input, max_results_input, page_input, sort_input, journal_filter_input, min_year_input, max_year_input],
+        outputs=[results_output, df_state]
     )
+    with gr.Row():
+        export_csv_button = gr.Button("⬇️ Export CSV")
+        export_docx_button = gr.Button("⬇️ Export DOCX")
+        export_csv_output = gr.File()
+        export_docx_output = gr.File()
+    export_csv_button.click(lambda df: export_results(df, "CSV"), inputs=[df_state], outputs=[export_csv_output])
+    export_docx_button.click(lambda df: export_results(df, "DOCX"), inputs=[df_state], outputs=[export_docx_output])
+app.launch()