Spaces:

drvikasgaur
/

Pubmed-search-app

Sleeping

App Files Files Community

drvikasgaur commited on May 17

Commit

21eada5

verified ·

1 Parent(s): 9460907

Update app.py

Browse files

Files changed (1) hide show

app.py +144 -137

app.py CHANGED Viewed

@@ -1,160 +1,167 @@
 import gradio as gr
-import pandas as pd
 import requests
 from docx import Document
-from datetime import datetime
-from xml.etree import ElementTree as ET
-# Fetch articles from PubMed
-def fetch_pubmed_articles(query, max_results, page, sort_by, journal_filter, min_year, max_year):
-    if not query or query.strip() == "":
-        return "❌ Please enter a search query.", "", pd.DataFrame()
     try:
-        base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
-        params = {
-            "db": "pubmed",
-            "term": query,
-            "retmax": int(max_results),
-            "retstart": int(max_results) * (int(page) - 1),
-            "retmode": "json"
-        }
-        response = requests.get(base_url, params=params, timeout=15)
         if response.status_code != 200:
-            return f"❌ Error: {response.status_code}", "", pd.DataFrame()
-        id_list = response.json().get("esearchresult", {}).get("idlist", [])
-        if not id_list:
-            return "❌ No results found.", "", pd.DataFrame()
-        ids = ",".join(id_list)
-        fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
-        fetch_params = {
-            "db": "pubmed",
-            "id": ids,
-            "retmode": "xml",
-        }
-        fetch_response = requests.get(fetch_url, params=fetch_params, timeout=15)
-        if fetch_response.status_code != 200:
-            return f"❌ Error fetching details.", "", pd.DataFrame()
-        root = ET.fromstring(fetch_response.content)
-        articles = []
-        for article in root.findall(".//PubmedArticle"):
             try:
-                title = article.findtext(".//ArticleTitle", default="No Title")
-                abstract = " ".join([abst.text for abst in article.findall(".//AbstractText") if abst.text])
-                journal = article.findtext(".//Journal/Title", default="No Journal")
-                year = article.findtext(".//PubDate/Year")
-                pmid = article.findtext(".//PMID", default="")
-                year = int(year) if year and year.isdigit() else None
-                articles.append({
-                    "Title": title,
-                    "Abstract": abstract,
-                    "Journal": journal,
-                    "Year": year,
-                    "PMID": pmid
-                })
-            except Exception as e:
-                print(f"Error processing article: {e}") # Important: Log the error.  Don't just 'pass'
-                continue
-        df = pd.DataFrame(articles)
-        if df.empty:
-            return "❌ No results could be parsed.", "", pd.DataFrame()
-        # Filter
-        if journal_filter:
-            df = df[df["Journal"].str.contains(journal_filter, case=False, na=False)]
-        if min_year and str(min_year).isdigit():
-            df = df[df["Year"] >= int(min_year)]
-        if max_year and str(max_year).isdigit():
-            df = df[df["Year"] <= int(max_year)]
-        if df.empty:
-            return "❌ No results matched your filters.", "", pd.DataFrame()
-        # Sort
         if sort_by == "Year":
-            df = df.sort_values(by="Year", ascending=False, na_position='last')
         elif sort_by == "Title":
-            df = df.sort_values(by="Title", ascending=True, na_position='last')
         elif sort_by == "Journal":
-            df = df.sort_values(by="Journal", ascending=True, na_position='last')
-        # Markdown rendering
-        result_text = f"**Showing {len(df)} results:**\n\n"
-        for _, row in df.iterrows():
-            pmid_url = f"https://pubmed.ncbi.nlm.nih.gov/{row['PMID']}/"
-            result_text += f"#### [{row['Title']}]({pmid_url})\n"
-            result_text += f"**Journal:** {row['Journal']}  \n"
-            result_text += f"**Year:** {row['Year']}  \n"
-            result_text += f"**Abstract:** {row['Abstract']}\n\n---\n"
-        return "✅ Search complete!", result_text, df
     except Exception as e:
-        return f"❌ An unexpected error occurred: {e}", "", pd.DataFrame()
-# Export results
-def export_results(df, file_type):
     if df is None or df.empty:
         return None
-    now = datetime.now().strftime("%Y%m%d_%H%M%S")
-    if file_type == "CSV":
-        path = f"pubmed_results_{now}.csv"
-        df.to_csv(path, index=False, encoding="utf-8") # Specify encoding
-        return path
-    elif file_type == "DOCX":
-        path = f"pubmed_results_{now}.docx"
         doc = Document()
-        doc.add_heading("PubMed Search Results", 0)
         for _, row in df.iterrows():
-            doc.add_heading(str(row["Title"]), level=1)
-            doc.add_paragraph(f"Journal: {row['Journal']}")
-            doc.add_paragraph(f"Year: {row['Year']}")
-            doc.add_paragraph(f"PMID: {row['PMID']}")
-            doc.add_paragraph(f"Abstract: {row['Abstract']}")
-            doc.add_paragraph("--------")
-        doc.save(path)
-        return path
-    return None
-# Gradio Interface
-with gr.Blocks(theme="soft") as app:
     gr.Markdown("""
-    # 🔬 PubMed Article Search Tool
-    **Instructions:**
-    - Enter a biomedical search term (e.g., `autism risk factors`)
-    - Adjust optional filters below
-    - Click **Search PubMed** to retrieve articles
-    - Use export buttons to download results
     """)
     with gr.Row():
-        with gr.Column():
-            query_input = gr.Textbox(label="Search Query", placeholder="e.g., brain inflammation", lines=2)
-            max_results_input = gr.Number(label="Max Results (1–100)", value=20, minimum=1, maximum=100)
-            page_input = gr.Number(label="Page Number", value=1, minimum=1)
-            sort_input = gr.Dropdown(["Year", "Title", "Journal"], value="Year", label="Sort By")
-            journal_filter_input = gr.Textbox(label="Journal Filter (optional)")
-            min_year_input = gr.Number(label="Min Year", value=2000, minimum=1800, maximum=2100)
-            max_year_input = gr.Number(label="Max Year", value=2025, minimum=1800, maximum=2100)
-            search_button = gr.Button("🔍 Search PubMed")
-            status_output = gr.Markdown(value="")
-        with gr.Column():
-            markdown_output = gr.Markdown(value="Results will appear here.")
-            table_output = gr.DataFrame(label="Results Table", visible=True, interactive=False)
-            export_df = gr.DataFrame(visible=False)  # hidden for internal export
     with gr.Row():
-        export_csv_button = gr.Button("⬇️ Export CSV")
-        export_docx_button = gr.Button("⬇️ Export DOCX")
-        export_csv_output = gr.File(label="Download CSV")
-        export_docx_output = gr.File(label="Download DOCX")
-    # Logic
-    def run_search(query, max_results, page, sort_by, journal_filter, min_year, max_year):
-        status, md, df = fetch_pubmed_articles(query, max_results, page, sort_by, journal_filter, min_year, max_year)
-        return status, md, df, df
-    search_button.click(
-        fn=run_search,
-        inputs=[query_input, max_results_input, page_input, sort_input, journal_filter_input, min_year_input, max_year_input],
-        outputs=[status_output, markdown_output, table_output, export_df]
-    )
-    export_csv_button.click(lambda df: export_results(df, "CSV"), inputs=[export_df], outputs=[export_csv_output])
-    export_docx_button.click(lambda df: export_results(df, "DOCX"), inputs=[export_df], outputs=[export_docx_output])
-app.launch()

 import gradio as gr
 import requests
+import pandas as pd
+import io
 from docx import Document
+import tempfile
+API_BASE_URL = "https://pubmed-api-jwfq.onrender.com/search_pubmed"
+global_df = None  # Global variable to store search results for export
+def fetch_pubmed_articles(query, max_results=10, page=1, sort_by="Year", filter_journal="All", min_year=None, max_year=None):
+    """
+    Fetches PubMed articles and applies sorting and filtering.
+    """
     try:
+        url = f"{API_BASE_URL}?query={query}&max_results={max_results}&page={page}"
+        response = requests.get(url)
         if response.status_code != 200:
+            return f"⚠️ API Error: {response.status_code} - {response.text}", None
+        articles = response.json()
+        if not articles:
+            return "No articles found for this query.", None
+        for article in articles:
             try:
+                article["Year"] = int(article["Year"])
+            except:
+                article["Year"] = 0
+        # Apply journal filtering
+        if filter_journal and filter_journal != "All":
+            articles = [a for a in articles if filter_journal.lower() in a['Journal'].lower()]
+        # Apply year filtering
+        if min_year:
+            articles = [a for a in articles if a["Year"] >= int(min_year)]
+        if max_year:
+            articles = [a for a in articles if a["Year"] <= int(max_year)]
+        # Apply sorting
         if sort_by == "Year":
+            articles.sort(key=lambda x: x["Year"], reverse=True)
         elif sort_by == "Title":
+            articles.sort(key=lambda x: x["Title"])
         elif sort_by == "Journal":
+            articles.sort(key=lambda x: x["Journal"])
+        # Format results
+        formatted_results = []
+        for article in articles:
+            formatted_results.append(
+                f"## 📰 {article['Title']}\n"
+                f"📖 **<span style='color:blue'>{article['Journal']}</span>** ({article['Year']})\n"
+                f"👨‍🔬 **<span style='color:gray'>{article['Authors']}</span>**\n"
+                f"🔗 [Read on PubMed]({article['PubMed_URL']})\n\n"
+                f"<details><summary>📄 **Show Abstract**</summary>\n{article['Abstract']}\n</details>"
+                f"\n---\n"
+            )
+        df = pd.DataFrame(articles)
+        return "\n\n".join(formatted_results), df
     except Exception as e:
+        return f"⚠️ Error fetching data: {str(e)}", None
+def export_results(df, format_type):
+    """
+    Exports search results as a CSV or DOCX file.
+    - Returns the file path instead of BytesIO to avoid TypeError in Gradio.
+    """
     if df is None or df.empty:
         return None
+    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=f".{format_type.lower()}")
+    temp_file_path = temp_file.name  # Store the temporary file path
+    if format_type == "CSV":
+        df.to_csv(temp_file_path, index=False)
+    elif format_type == "DOCX":
         doc = Document()
+        doc.add_heading("PubMed Search Results", level=1)
         for _, row in df.iterrows():
+            doc.add_heading(row["Title"], level=2)
+            doc.add_paragraph(f"📖 Journal: {row['Journal']} ({row['Year']})")
+            doc.add_paragraph(f"👨‍🔬 Authors: {row['Authors']}")
+            doc.add_paragraph(f"🔗 Link: {row['PubMed_URL']}")
+            doc.add_paragraph(f"📄 Abstract: {row['Abstract']}")
+            doc.add_paragraph("---")
+        doc.save(temp_file_path)
+    temp_file.close()  # Close the file before returning the path
+    return temp_file_path  # Return file path instead of BytesIO
+with gr.Blocks() as app:
     gr.Markdown("""
+    # 🔍 **PubMed Search Tool with Advanced Features**
+    ## 📖 **How to Use This App**
+    1️⃣ **Enter a Search Query** *(e.g., "Deep Learning in Psychiatry")*
+    2️⃣ **Set the Number of Results & Page Number** *(Default: 10 results per page)*
+    3️⃣ **Choose Sorting Option** *(Year, Title, or Journal - Default: Year)*
+    4️⃣ **(Optional) Filter by Journal Name** *(e.g., "Nature", "JAMA")*
+    5️⃣ **(Optional) Filter by Year Range** *(Set min & max year, e.g., 2015 - 2023)*
+    6️⃣ **Click "🔍 Search" to fetch results**
+    7️⃣ **Click "📂 Export as CSV" or "📄 Export as Word DOCX" to save articles**
+    8️⃣ **Click "📄 Show Abstract" under each result to expand full abstract**
+    ## ⚠️ **Important Notes**
+    - **Sorting & Filtering can be combined** *(e.g., show only "Nature" articles from 2020-2024, sorted by Title)*
     """)
     with gr.Row():
+        query_input = gr.Textbox(label="🔎 Search Query", placeholder="Enter topic (e.g., 'Neural Networks in Psychiatry')", lines=1)
+    with gr.Row():
+        max_results_input = gr.Slider(1, 50, value=10, step=1, label="📄 Number of Results per Page")
+        page_input = gr.Slider(1, 200, value=1, step=1, label="📄 Page Number")
+    with gr.Row():
+        sort_input = gr.Dropdown(choices=["Year", "Title", "Journal"], value="Year", label="🔄 Sort By")
+        journal_filter_input = gr.Textbox(label="🎯 Filter by Journal (Optional)", placeholder="Enter journal name or leave blank")
     with gr.Row():
+        min_year_input = gr.Number(label="📅 Min Year", value=None)
+        max_year_input = gr.Number(label="📅 Max Year", value=None)
+    with gr.Row():
+        search_button = gr.Button("🔍 Search")
+        export_csv_button = gr.Button("📂 Export as CSV")
+        export_docx_button = gr.Button("📄 Export as Word DOCX")
+    results_output = gr.HTML()
+    export_csv_output = gr.File(label="Download CSV")
+    export_docx_output = gr.File(label="Download Word DOCX")
+    def search_and_display(query, max_results, page, sort_by, journal_filter, min_year, max_year):
+        global global_df
+        result_text, df = fetch_pubmed_articles(query, max_results, page, sort_by, journal_filter, min_year, max_year)
+        global_df = df
+        return result_text
+    def export_csv():
+        if global_df is not None:
+            return export_results(global_df, "CSV")
+    def export_docx():
+        if global_df is not None:
+            return export_results(global_df, "DOCX")
+    search_button.click(search_and_display,
+                        inputs=[query_input, max_results_input, page_input, sort_input, journal_filter_input, min_year_input, max_year_input],
+                        outputs=results_output)
+    export_csv_button.click(export_csv, outputs=export_csv_output)
+    export_docx_button.click(export_docx, outputs=export_docx_output)
+if __name__ == "__main__":
+    app.launch(inbrowser=True)