drvikasgaur commited on
Commit
baf2d04
Β·
verified Β·
1 Parent(s): 311b65d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -39
app.py CHANGED
@@ -5,10 +5,11 @@ from docx import Document
5
  from datetime import datetime
6
  from xml.etree import ElementTree as ET
7
 
8
- # Fetch articles from PubMed
9
  def fetch_pubmed_articles(query, max_results, page, sort_by, journal_filter, min_year, max_year):
10
  if not query or query.strip() == "":
11
  return "❌ Please enter a search query.", "", pd.DataFrame()
 
12
  try:
13
  base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
14
  params = {
@@ -21,9 +22,11 @@ def fetch_pubmed_articles(query, max_results, page, sort_by, journal_filter, min
21
  response = requests.get(base_url, params=params, timeout=15)
22
  if response.status_code != 200:
23
  return f"❌ Error: {response.status_code}", "", pd.DataFrame()
 
24
  id_list = response.json().get("esearchresult", {}).get("idlist", [])
25
  if not id_list:
26
  return "❌ No results found.", "", pd.DataFrame()
 
27
  ids = ",".join(id_list)
28
  fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
29
  fetch_params = {
@@ -34,6 +37,7 @@ def fetch_pubmed_articles(query, max_results, page, sort_by, journal_filter, min
34
  fetch_response = requests.get(fetch_url, params=fetch_params, timeout=15)
35
  if fetch_response.status_code != 200:
36
  return f"❌ Error fetching details.", "", pd.DataFrame()
 
37
  root = ET.fromstring(fetch_response.content)
38
  articles = []
39
  for article in root.findall(".//PubmedArticle"):
@@ -53,10 +57,11 @@ def fetch_pubmed_articles(query, max_results, page, sort_by, journal_filter, min
53
  })
54
  except Exception:
55
  continue
 
56
  df = pd.DataFrame(articles)
57
  if df.empty:
58
  return "❌ No results could be parsed.", "", pd.DataFrame()
59
- # Filter
60
  if journal_filter:
61
  df = df[df["Journal"].str.contains(journal_filter, case=False, na=False)]
62
  if min_year and str(min_year).isdigit():
@@ -65,14 +70,14 @@ def fetch_pubmed_articles(query, max_results, page, sort_by, journal_filter, min
65
  df = df[df["Year"] <= int(max_year)]
66
  if df.empty:
67
  return "❌ No results matched your filters.", "", pd.DataFrame()
68
- # Sort
69
  if sort_by == "Year":
70
- df = df.sort_values(by="Year", ascending=False, na_position='last')
71
  elif sort_by == "Title":
72
- df = df.sort_values(by="Title", ascending=True, na_position='last')
73
  elif sort_by == "Journal":
74
- df = df.sort_values(by="Journal", ascending=True, na_position='last')
75
- # Markdown rendering
76
  result_text = f"**Showing {len(df)} results:**\n\n"
77
  for _, row in df.iterrows():
78
  pmid_url = f"https://pubmed.ncbi.nlm.nih.gov/{row['PMID']}/"
@@ -80,11 +85,15 @@ def fetch_pubmed_articles(query, max_results, page, sort_by, journal_filter, min
80
  result_text += f"**Journal:** {row['Journal']} \n"
81
  result_text += f"**Year:** {row['Year']} \n"
82
  result_text += f"**Abstract:** {row['Abstract']}\n\n---\n"
83
- return "βœ… Search complete!", result_text, df
84
 
85
- # Export results
 
 
 
 
 
86
  def export_results(df, file_type):
87
- if df is None or df.empty:
88
  return None
89
  now = datetime.now().strftime("%Y%m%d_%H%M%S")
90
  if file_type == "CSV":
@@ -106,54 +115,61 @@ def export_results(df, file_type):
106
  return path
107
  return None
108
 
109
- # Gradio Interface
110
- with gr.Blocks(theme="soft") as app:
 
 
 
 
111
  gr.Markdown("""
112
  # πŸ”¬ PubMed Article Search Tool
 
 
113
  **Instructions:**
114
- - Enter a biomedical search term (e.g., `autism risk factors`)
115
- - Adjust optional filters below
116
- - Click **Search PubMed** to retrieve articles
117
- - Use export buttons to download results
118
  """)
119
 
120
  with gr.Row():
121
  with gr.Column():
122
  query_input = gr.Textbox(label="Search Query", placeholder="e.g., brain inflammation", lines=2)
123
- max_results_input = gr.Number(label="Max Results (1–100)", value=20, minimum=1, maximum=100)
124
  page_input = gr.Number(label="Page Number", value=1, minimum=1)
125
  sort_input = gr.Dropdown(["Year", "Title", "Journal"], value="Year", label="Sort By")
126
  journal_filter_input = gr.Textbox(label="Journal Filter (optional)")
127
- min_year_input = gr.Number(label="Min Year", value=2000, minimum=1800, maximum=2100)
128
- max_year_input = gr.Number(label="Max Year", value=2025, minimum=1800, maximum=2100)
129
  search_button = gr.Button("πŸ” Search PubMed")
130
- status_output = gr.Markdown(value="")
131
- with gr.Column():
132
- markdown_output = gr.Markdown(value="Results will appear here.")
133
- table_output = gr.DataFrame(label="Results Table", visible=True, interactive=False)
134
- export_df = gr.Dataframe(visible=False) # hidden for internal export
135
 
136
- with gr.Row():
137
- export_csv_button = gr.Button("⬇️ Export CSV")
138
- export_docx_button = gr.Button("⬇️ Export DOCX")
139
- export_csv_output = gr.File(label="Download CSV")
140
- export_docx_output = gr.File(label="Download DOCX")
 
 
141
 
142
- # Logic
143
  def run_search(query, max_results, page, sort_by, journal_filter, min_year, max_year):
144
- status, md, df = fetch_pubmed_articles(query, max_results, page, sort_by, journal_filter, min_year, max_year)
145
- return status, md, df, df
146
 
147
  search_button.click(
 
 
148
  fn=run_search,
149
- inputs=[query_input, max_results_input, page_input, sort_input, journal_filter_input, min_year_input, max_year_input],
150
- outputs=[status_output, markdown_output, table_output, export_df]
 
 
 
 
151
  )
152
 
153
- export_csv_button.click(lambda df: export_results(df, "CSV"), inputs=[export_df], outputs=[export_csv_output])
154
- export_docx_button.click(lambda df: export_results(df, "DOCX"), inputs=[export_df], outputs=[export_docx_output])
155
 
156
  app.launch()
157
-
158
-
159
-
 
5
  from datetime import datetime
6
  from xml.etree import ElementTree as ET
7
 
8
+ # ------------------- PubMed Fetching Logic -------------------
9
  def fetch_pubmed_articles(query, max_results, page, sort_by, journal_filter, min_year, max_year):
10
  if not query or query.strip() == "":
11
  return "❌ Please enter a search query.", "", pd.DataFrame()
12
+
13
  try:
14
  base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
15
  params = {
 
22
  response = requests.get(base_url, params=params, timeout=15)
23
  if response.status_code != 200:
24
  return f"❌ Error: {response.status_code}", "", pd.DataFrame()
25
+
26
  id_list = response.json().get("esearchresult", {}).get("idlist", [])
27
  if not id_list:
28
  return "❌ No results found.", "", pd.DataFrame()
29
+
30
  ids = ",".join(id_list)
31
  fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
32
  fetch_params = {
 
37
  fetch_response = requests.get(fetch_url, params=fetch_params, timeout=15)
38
  if fetch_response.status_code != 200:
39
  return f"❌ Error fetching details.", "", pd.DataFrame()
40
+
41
  root = ET.fromstring(fetch_response.content)
42
  articles = []
43
  for article in root.findall(".//PubmedArticle"):
 
57
  })
58
  except Exception:
59
  continue
60
+
61
  df = pd.DataFrame(articles)
62
  if df.empty:
63
  return "❌ No results could be parsed.", "", pd.DataFrame()
64
+
65
  if journal_filter:
66
  df = df[df["Journal"].str.contains(journal_filter, case=False, na=False)]
67
  if min_year and str(min_year).isdigit():
 
70
  df = df[df["Year"] <= int(max_year)]
71
  if df.empty:
72
  return "❌ No results matched your filters.", "", pd.DataFrame()
73
+
74
  if sort_by == "Year":
75
+ df = df.sort_values(by="Year", ascending=False)
76
  elif sort_by == "Title":
77
+ df = df.sort_values(by="Title", ascending=True)
78
  elif sort_by == "Journal":
79
+ df = df.sort_values(by="Journal", ascending=True)
80
+
81
  result_text = f"**Showing {len(df)} results:**\n\n"
82
  for _, row in df.iterrows():
83
  pmid_url = f"https://pubmed.ncbi.nlm.nih.gov/{row['PMID']}/"
 
85
  result_text += f"**Journal:** {row['Journal']} \n"
86
  result_text += f"**Year:** {row['Year']} \n"
87
  result_text += f"**Abstract:** {row['Abstract']}\n\n---\n"
 
88
 
89
+ return "βœ… Search complete!", result_text, df.reset_index(drop=True)
90
+
91
+ except Exception as e:
92
+ return f"❌ Error: {str(e)}", "", pd.DataFrame()
93
+
94
+ # ------------------- Export Logic -------------------
95
  def export_results(df, file_type):
96
+ if df is None or len(df) == 0:
97
  return None
98
  now = datetime.now().strftime("%Y%m%d_%H%M%S")
99
  if file_type == "CSV":
 
115
  return path
116
  return None
117
 
118
+ # ------------------- Spinner Toggle -------------------
119
+ def toggle_spinner(show: bool):
120
+ return gr.update(visible=show)
121
+
122
+ # ------------------- Gradio App -------------------
123
+ with gr.Blocks(title="PubMed Search Tool") as app:
124
  gr.Markdown("""
125
  # πŸ”¬ PubMed Article Search Tool
126
+ Search biomedical literature from PubMed with filtering and export options.
127
+
128
  **Instructions:**
129
+ 1. Enter a search term like `"brain inflammation"` or `"COVID vaccine"`
130
+ 2. Adjust optional filters (journal name, year, sorting)
131
+ 3. Click **πŸ” Search PubMed** to get article summaries
132
+ 4. Export to **CSV** or **Word** using the buttons below
133
  """)
134
 
135
  with gr.Row():
136
  with gr.Column():
137
  query_input = gr.Textbox(label="Search Query", placeholder="e.g., brain inflammation", lines=2)
138
+ max_results_input = gr.Number(label="Max Results", value=20, minimum=1, maximum=100)
139
  page_input = gr.Number(label="Page Number", value=1, minimum=1)
140
  sort_input = gr.Dropdown(["Year", "Title", "Journal"], value="Year", label="Sort By")
141
  journal_filter_input = gr.Textbox(label="Journal Filter (optional)")
142
+ min_year_input = gr.Number(label="Min Year (optional)", value=2000)
143
+ max_year_input = gr.Number(label="Max Year (optional)", value=2025)
144
  search_button = gr.Button("πŸ” Search PubMed")
145
+ status_output = gr.Markdown()
146
+ spinner = gr.Markdown("⏳ Please wait...", visible=False)
 
 
 
147
 
148
+ with gr.Column():
149
+ markdown_output = gr.Markdown("Search results will appear here.")
150
+ table_output = gr.DataFrame(label="Results Table", interactive=False)
151
+ export_csv_button = gr.Button("⬇️ Export CSV")
152
+ export_docx_button = gr.Button("⬇️ Export DOCX")
153
+ export_csv_output = gr.File(label="Download CSV")
154
+ export_docx_output = gr.File(label="Download DOCX")
155
 
156
+ # ------------- Define search and export interaction -------------
157
  def run_search(query, max_results, page, sort_by, journal_filter, min_year, max_year):
158
+ return fetch_pubmed_articles(query, max_results, page, sort_by, journal_filter, min_year, max_year)
 
159
 
160
  search_button.click(
161
+ lambda: toggle_spinner(True), inputs=[], outputs=[spinner], show_progress=False
162
+ ).then(
163
  fn=run_search,
164
+ inputs=[query_input, max_results_input, page_input, sort_input,
165
+ journal_filter_input, min_year_input, max_year_input],
166
+ outputs=[status_output, markdown_output, table_output],
167
+ show_progress=True
168
+ ).then(
169
+ lambda: toggle_spinner(False), inputs=[], outputs=[spinner], show_progress=False
170
  )
171
 
172
+ export_csv_button.click(lambda df: export_results(df, "CSV"), inputs=[table_output], outputs=[export_csv_output])
173
+ export_docx_button.click(lambda df: export_results(df, "DOCX"), inputs=[table_output], outputs=[export_docx_output])
174
 
175
  app.launch()