drvikasgaur commited on
Commit
21eada5
Β·
verified Β·
1 Parent(s): 9460907

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +144 -137
app.py CHANGED
@@ -1,160 +1,167 @@
1
  import gradio as gr
2
- import pandas as pd
3
  import requests
 
 
4
  from docx import Document
5
- from datetime import datetime
6
- from xml.etree import ElementTree as ET
 
7
 
8
- # Fetch articles from PubMed
9
- def fetch_pubmed_articles(query, max_results, page, sort_by, journal_filter, min_year, max_year):
10
- if not query or query.strip() == "":
11
- return "❌ Please enter a search query.", "", pd.DataFrame()
 
 
12
  try:
13
- base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
14
- params = {
15
- "db": "pubmed",
16
- "term": query,
17
- "retmax": int(max_results),
18
- "retstart": int(max_results) * (int(page) - 1),
19
- "retmode": "json"
20
- }
21
- response = requests.get(base_url, params=params, timeout=15)
22
  if response.status_code != 200:
23
- return f"❌ Error: {response.status_code}", "", pd.DataFrame()
24
- id_list = response.json().get("esearchresult", {}).get("idlist", [])
25
- if not id_list:
26
- return "❌ No results found.", "", pd.DataFrame()
27
- ids = ",".join(id_list)
28
- fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
29
- fetch_params = {
30
- "db": "pubmed",
31
- "id": ids,
32
- "retmode": "xml",
33
- }
34
- fetch_response = requests.get(fetch_url, params=fetch_params, timeout=15)
35
- if fetch_response.status_code != 200:
36
- return f"❌ Error fetching details.", "", pd.DataFrame()
37
- root = ET.fromstring(fetch_response.content)
38
- articles = []
39
- for article in root.findall(".//PubmedArticle"):
40
  try:
41
- title = article.findtext(".//ArticleTitle", default="No Title")
42
- abstract = " ".join([abst.text for abst in article.findall(".//AbstractText") if abst.text])
43
- journal = article.findtext(".//Journal/Title", default="No Journal")
44
- year = article.findtext(".//PubDate/Year")
45
- pmid = article.findtext(".//PMID", default="")
46
- year = int(year) if year and year.isdigit() else None
47
- articles.append({
48
- "Title": title,
49
- "Abstract": abstract,
50
- "Journal": journal,
51
- "Year": year,
52
- "PMID": pmid
53
- })
54
- except Exception as e:
55
- print(f"Error processing article: {e}") # Important: Log the error. Don't just 'pass'
56
- continue
57
- df = pd.DataFrame(articles)
58
- if df.empty:
59
- return "❌ No results could be parsed.", "", pd.DataFrame()
60
- # Filter
61
- if journal_filter:
62
- df = df[df["Journal"].str.contains(journal_filter, case=False, na=False)]
63
- if min_year and str(min_year).isdigit():
64
- df = df[df["Year"] >= int(min_year)]
65
- if max_year and str(max_year).isdigit():
66
- df = df[df["Year"] <= int(max_year)]
67
- if df.empty:
68
- return "❌ No results matched your filters.", "", pd.DataFrame()
69
- # Sort
70
  if sort_by == "Year":
71
- df = df.sort_values(by="Year", ascending=False, na_position='last')
72
  elif sort_by == "Title":
73
- df = df.sort_values(by="Title", ascending=True, na_position='last')
74
  elif sort_by == "Journal":
75
- df = df.sort_values(by="Journal", ascending=True, na_position='last')
76
- # Markdown rendering
77
- result_text = f"**Showing {len(df)} results:**\n\n"
78
- for _, row in df.iterrows():
79
- pmid_url = f"https://pubmed.ncbi.nlm.nih.gov/{row['PMID']}/"
80
- result_text += f"#### [{row['Title']}]({pmid_url})\n"
81
- result_text += f"**Journal:** {row['Journal']} \n"
82
- result_text += f"**Year:** {row['Year']} \n"
83
- result_text += f"**Abstract:** {row['Abstract']}\n\n---\n"
84
- return "βœ… Search complete!", result_text, df
 
 
 
 
 
 
 
85
  except Exception as e:
86
- return f"❌ An unexpected error occurred: {e}", "", pd.DataFrame()
 
87
 
88
- # Export results
89
- def export_results(df, file_type):
 
 
 
 
90
  if df is None or df.empty:
91
  return None
92
- now = datetime.now().strftime("%Y%m%d_%H%M%S")
93
- if file_type == "CSV":
94
- path = f"pubmed_results_{now}.csv"
95
- df.to_csv(path, index=False, encoding="utf-8") # Specify encoding
96
- return path
97
- elif file_type == "DOCX":
98
- path = f"pubmed_results_{now}.docx"
99
  doc = Document()
100
- doc.add_heading("PubMed Search Results", 0)
101
  for _, row in df.iterrows():
102
- doc.add_heading(str(row["Title"]), level=1)
103
- doc.add_paragraph(f"Journal: {row['Journal']}")
104
- doc.add_paragraph(f"Year: {row['Year']}")
105
- doc.add_paragraph(f"PMID: {row['PMID']}")
106
- doc.add_paragraph(f"Abstract: {row['Abstract']}")
107
- doc.add_paragraph("--------")
108
- doc.save(path)
109
- return path
110
- return None
111
-
112
- # Gradio Interface
113
- with gr.Blocks(theme="soft") as app:
 
114
  gr.Markdown("""
115
- # πŸ”¬ PubMed Article Search Tool
116
- **Instructions:**
117
- - Enter a biomedical search term (e.g., `autism risk factors`)
118
- - Adjust optional filters below
119
- - Click **Search PubMed** to retrieve articles
120
- - Use export buttons to download results
 
 
 
 
 
 
 
 
 
121
  """)
122
 
123
  with gr.Row():
124
- with gr.Column():
125
- query_input = gr.Textbox(label="Search Query", placeholder="e.g., brain inflammation", lines=2)
126
- max_results_input = gr.Number(label="Max Results (1–100)", value=20, minimum=1, maximum=100)
127
- page_input = gr.Number(label="Page Number", value=1, minimum=1)
128
- sort_input = gr.Dropdown(["Year", "Title", "Journal"], value="Year", label="Sort By")
129
- journal_filter_input = gr.Textbox(label="Journal Filter (optional)")
130
- min_year_input = gr.Number(label="Min Year", value=2000, minimum=1800, maximum=2100)
131
- max_year_input = gr.Number(label="Max Year", value=2025, minimum=1800, maximum=2100)
132
- search_button = gr.Button("πŸ” Search PubMed")
133
- status_output = gr.Markdown(value="")
134
- with gr.Column():
135
- markdown_output = gr.Markdown(value="Results will appear here.")
136
- table_output = gr.DataFrame(label="Results Table", visible=True, interactive=False)
137
- export_df = gr.DataFrame(visible=False) # hidden for internal export
138
 
139
  with gr.Row():
140
- export_csv_button = gr.Button("⬇️ Export CSV")
141
- export_docx_button = gr.Button("⬇️ Export DOCX")
142
- export_csv_output = gr.File(label="Download CSV")
143
- export_docx_output = gr.File(label="Download DOCX")
144
-
145
- # Logic
146
- def run_search(query, max_results, page, sort_by, journal_filter, min_year, max_year):
147
- status, md, df = fetch_pubmed_articles(query, max_results, page, sort_by, journal_filter, min_year, max_year)
148
- return status, md, df, df
149
-
150
- search_button.click(
151
- fn=run_search,
152
- inputs=[query_input, max_results_input, page_input, sort_input, journal_filter_input, min_year_input, max_year_input],
153
- outputs=[status_output, markdown_output, table_output, export_df]
154
- )
155
-
156
- export_csv_button.click(lambda df: export_results(df, "CSV"), inputs=[export_df], outputs=[export_csv_output])
157
- export_docx_button.click(lambda df: export_results(df, "DOCX"), inputs=[export_df], outputs=[export_docx_output])
158
-
159
- app.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
 
1
  import gradio as gr
 
2
  import requests
3
+ import pandas as pd
4
+ import io
5
  from docx import Document
6
+ import tempfile
7
+
8
+ API_BASE_URL = "https://pubmed-api-jwfq.onrender.com/search_pubmed"
9
 
10
+ global_df = None # Global variable to store search results for export
11
+
12
+ def fetch_pubmed_articles(query, max_results=10, page=1, sort_by="Year", filter_journal="All", min_year=None, max_year=None):
13
+ """
14
+ Fetches PubMed articles and applies sorting and filtering.
15
+ """
16
  try:
17
+ url = f"{API_BASE_URL}?query={query}&max_results={max_results}&page={page}"
18
+ response = requests.get(url)
19
+
 
 
 
 
 
 
20
  if response.status_code != 200:
21
+ return f"⚠️ API Error: {response.status_code} - {response.text}", None
22
+
23
+ articles = response.json()
24
+
25
+ if not articles:
26
+ return "No articles found for this query.", None
27
+
28
+ for article in articles:
 
 
 
 
 
 
 
 
 
29
  try:
30
+ article["Year"] = int(article["Year"])
31
+ except:
32
+ article["Year"] = 0
33
+
34
+ # Apply journal filtering
35
+ if filter_journal and filter_journal != "All":
36
+ articles = [a for a in articles if filter_journal.lower() in a['Journal'].lower()]
37
+
38
+ # Apply year filtering
39
+ if min_year:
40
+ articles = [a for a in articles if a["Year"] >= int(min_year)]
41
+ if max_year:
42
+ articles = [a for a in articles if a["Year"] <= int(max_year)]
43
+
44
+ # Apply sorting
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  if sort_by == "Year":
46
+ articles.sort(key=lambda x: x["Year"], reverse=True)
47
  elif sort_by == "Title":
48
+ articles.sort(key=lambda x: x["Title"])
49
  elif sort_by == "Journal":
50
+ articles.sort(key=lambda x: x["Journal"])
51
+
52
+ # Format results
53
+ formatted_results = []
54
+ for article in articles:
55
+ formatted_results.append(
56
+ f"## πŸ“° {article['Title']}\n"
57
+ f"πŸ“– **<span style='color:blue'>{article['Journal']}</span>** ({article['Year']})\n"
58
+ f"πŸ‘¨β€πŸ”¬ **<span style='color:gray'>{article['Authors']}</span>**\n"
59
+ f"πŸ”— [Read on PubMed]({article['PubMed_URL']})\n\n"
60
+ f"<details><summary>πŸ“„ **Show Abstract**</summary>\n{article['Abstract']}\n</details>"
61
+ f"\n---\n"
62
+ )
63
+
64
+ df = pd.DataFrame(articles)
65
+ return "\n\n".join(formatted_results), df
66
+
67
  except Exception as e:
68
+ return f"⚠️ Error fetching data: {str(e)}", None
69
+
70
 
71
+
72
+ def export_results(df, format_type):
73
+ """
74
+ Exports search results as a CSV or DOCX file.
75
+ - Returns the file path instead of BytesIO to avoid TypeError in Gradio.
76
+ """
77
  if df is None or df.empty:
78
  return None
79
+
80
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=f".{format_type.lower()}")
81
+ temp_file_path = temp_file.name # Store the temporary file path
82
+
83
+ if format_type == "CSV":
84
+ df.to_csv(temp_file_path, index=False)
85
+ elif format_type == "DOCX":
86
  doc = Document()
87
+ doc.add_heading("PubMed Search Results", level=1)
88
  for _, row in df.iterrows():
89
+ doc.add_heading(row["Title"], level=2)
90
+ doc.add_paragraph(f"πŸ“– Journal: {row['Journal']} ({row['Year']})")
91
+ doc.add_paragraph(f"πŸ‘¨β€πŸ”¬ Authors: {row['Authors']}")
92
+ doc.add_paragraph(f"πŸ”— Link: {row['PubMed_URL']}")
93
+ doc.add_paragraph(f"πŸ“„ Abstract: {row['Abstract']}")
94
+ doc.add_paragraph("---")
95
+ doc.save(temp_file_path)
96
+
97
+ temp_file.close() # Close the file before returning the path
98
+ return temp_file_path # Return file path instead of BytesIO
99
+
100
+
101
+ with gr.Blocks() as app:
102
  gr.Markdown("""
103
+ # πŸ” **PubMed Search Tool with Advanced Features**
104
+
105
+ ## πŸ“– **How to Use This App**
106
+ 1️⃣ **Enter a Search Query** *(e.g., "Deep Learning in Psychiatry")*
107
+ 2️⃣ **Set the Number of Results & Page Number** *(Default: 10 results per page)*
108
+ 3️⃣ **Choose Sorting Option** *(Year, Title, or Journal - Default: Year)*
109
+ 4️⃣ **(Optional) Filter by Journal Name** *(e.g., "Nature", "JAMA")*
110
+ 5️⃣ **(Optional) Filter by Year Range** *(Set min & max year, e.g., 2015 - 2023)*
111
+ 6️⃣ **Click "πŸ” Search" to fetch results**
112
+ 7️⃣ **Click "πŸ“‚ Export as CSV" or "πŸ“„ Export as Word DOCX" to save articles**
113
+ 8️⃣ **Click "πŸ“„ Show Abstract" under each result to expand full abstract**
114
+
115
+ ## ⚠️ **Important Notes**
116
+ - **Sorting & Filtering can be combined** *(e.g., show only "Nature" articles from 2020-2024, sorted by Title)*
117
+
118
  """)
119
 
120
  with gr.Row():
121
+ query_input = gr.Textbox(label="πŸ”Ž Search Query", placeholder="Enter topic (e.g., 'Neural Networks in Psychiatry')", lines=1)
122
+
123
+ with gr.Row():
124
+ max_results_input = gr.Slider(1, 50, value=10, step=1, label="πŸ“„ Number of Results per Page")
125
+ page_input = gr.Slider(1, 200, value=1, step=1, label="πŸ“„ Page Number")
126
+
127
+ with gr.Row():
128
+ sort_input = gr.Dropdown(choices=["Year", "Title", "Journal"], value="Year", label="πŸ”„ Sort By")
129
+ journal_filter_input = gr.Textbox(label="🎯 Filter by Journal (Optional)", placeholder="Enter journal name or leave blank")
 
 
 
 
 
130
 
131
  with gr.Row():
132
+ min_year_input = gr.Number(label="πŸ“… Min Year", value=None)
133
+ max_year_input = gr.Number(label="πŸ“… Max Year", value=None)
134
+
135
+ with gr.Row():
136
+ search_button = gr.Button("πŸ” Search")
137
+ export_csv_button = gr.Button("πŸ“‚ Export as CSV")
138
+ export_docx_button = gr.Button("πŸ“„ Export as Word DOCX")
139
+
140
+ results_output = gr.HTML()
141
+ export_csv_output = gr.File(label="Download CSV")
142
+ export_docx_output = gr.File(label="Download Word DOCX")
143
+
144
+ def search_and_display(query, max_results, page, sort_by, journal_filter, min_year, max_year):
145
+ global global_df
146
+ result_text, df = fetch_pubmed_articles(query, max_results, page, sort_by, journal_filter, min_year, max_year)
147
+ global_df = df
148
+ return result_text
149
+
150
+ def export_csv():
151
+ if global_df is not None:
152
+ return export_results(global_df, "CSV")
153
+
154
+ def export_docx():
155
+ if global_df is not None:
156
+ return export_results(global_df, "DOCX")
157
+
158
+ search_button.click(search_and_display,
159
+ inputs=[query_input, max_results_input, page_input, sort_input, journal_filter_input, min_year_input, max_year_input],
160
+ outputs=results_output)
161
+
162
+ export_csv_button.click(export_csv, outputs=export_csv_output)
163
+ export_docx_button.click(export_docx, outputs=export_docx_output)
164
+
165
+ if __name__ == "__main__":
166
+ app.launch(inbrowser=True)
167