drvikasgaur commited on
Commit
e8b239b
Β·
verified Β·
1 Parent(s): 65338ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -118
app.py CHANGED
@@ -5,102 +5,90 @@ from docx import Document
5
  from datetime import datetime
6
  from xml.etree import ElementTree as ET
7
 
8
- # Function to fetch PubMed articles
9
  def fetch_pubmed_articles(query, max_results, page, sort_by, journal_filter, min_year, max_year):
10
- base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
11
- api_key = "" # Optional
12
- params = {
13
- "db": "pubmed",
14
- "term": query,
15
- "retmax": max_results,
16
- "retstart": max_results * (page - 1),
17
- "retmode": "json",
18
- "api_key": api_key
19
- }
20
-
21
- response = requests.get(base_url, params=params)
22
- if response.status_code != 200:
23
- return f"❌ Error: {response.status_code}", None, None
24
-
25
- id_list = response.json().get("esearchresult", {}).get("idlist", [])
26
- if not id_list:
27
- return "❌ No results found.", None, None
28
-
29
- ids = ",".join(id_list)
30
- fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
31
- fetch_params = {
32
- "db": "pubmed",
33
- "id": ids,
34
- "retmode": "xml",
35
- "api_key": api_key
36
- }
37
-
38
- fetch_response = requests.get(fetch_url, params=fetch_params)
39
- if fetch_response.status_code != 200:
40
- return f"❌ Error fetching details.", None, None
41
-
42
- root = ET.fromstring(fetch_response.content)
43
- articles = []
44
- for article in root.findall(".//PubmedArticle"):
45
- try:
46
- title = article.findtext(".//ArticleTitle", default="No Title")
47
- abstract = " ".join([abst.text for abst in article.findall(".//AbstractText") if abst.text])
48
- journal = article.findtext(".//Journal/Title", default="No Journal")
49
- year = article.findtext(".//PubDate/Year")
50
- pmid = article.findtext(".//PMID", default="")
51
-
52
- year = int(year) if year and year.isdigit() else None
53
-
54
- articles.append({
55
- "Title": title,
56
- "Abstract": abstract,
57
- "Journal": journal,
58
- "Year": year,
59
- "PMID": pmid
60
- })
61
- except Exception:
62
- continue
63
-
64
- df = pd.DataFrame(articles)
65
- if df.empty:
66
- return "❌ No results could be parsed.", None, None
67
-
68
- # Filtering
69
- if journal_filter:
70
- df = df[df["Journal"].str.contains(journal_filter, case=False, na=False)]
71
- if min_year is not None:
72
- df = df[df["Year"] >= min_year]
73
- if max_year is not None:
74
- df = df[df["Year"] <= max_year]
75
- if df.empty:
76
- return "❌ No results matched filters.", None, None
77
-
78
- # Sorting
79
- if sort_by == "Year":
80
- df = df.sort_values(by="Year", ascending=False)
81
- elif sort_by == "Title":
82
- df = df.sort_values(by="Title", ascending=True)
83
- elif sort_by == "Journal":
84
- df = df.sort_values(by="Journal", ascending=True)
85
-
86
- # Markdown rendering
87
- result_text = ""
88
- for _, row in df.iterrows():
89
- pmid_url = f"https://pubmed.ncbi.nlm.nih.gov/{row['PMID']}/"
90
- result_text += f"### [{row['Title']}]({pmid_url})\n"
91
- result_text += f"**Journal:** {row['Journal']} \n"
92
- result_text += f"**Year:** {row['Year']} \n"
93
- result_text += f"**Abstract:** {row['Abstract']}\n\n---\n"
94
-
95
- return "βœ… Search complete!", result_text, df
96
-
97
-
98
- # Export results to file
99
  def export_results(df, file_type):
100
- now = datetime.now().strftime("%Y%m%d_%H%M%S")
101
- if df is None or df.empty:
102
  return None
103
-
104
  if file_type == "CSV":
105
  path = f"pubmed_results_{now}.csv"
106
  df.to_csv(path, index=False)
@@ -110,7 +98,7 @@ def export_results(df, file_type):
110
  doc = Document()
111
  doc.add_heading("PubMed Search Results", 0)
112
  for _, row in df.iterrows():
113
- doc.add_heading(row["Title"], level=1)
114
  doc.add_paragraph(f"Journal: {row['Journal']}")
115
  doc.add_paragraph(f"Year: {row['Year']}")
116
  doc.add_paragraph(f"PMID: {row['PMID']}")
@@ -120,46 +108,57 @@ def export_results(df, file_type):
120
  return path
121
  return None
122
 
 
 
 
 
123
 
124
- # App UI
125
- with gr.Blocks() as app:
126
- gr.Markdown("## πŸ”¬ PubMed Search Tool\nEnter your biomedical search query below.")
 
 
127
 
128
- with gr.Row():
129
- query_input = gr.Textbox(label="Search Query", placeholder="e.g., brain inflammation", lines=2)
130
- max_results_input = gr.Number(label="Max Results", value=20)
131
- page_input = gr.Number(label="Page Number", value=1)
132
 
133
  with gr.Row():
134
- sort_input = gr.Dropdown(["Year", "Title", "Journal"], value="Year", label="Sort By")
135
- journal_filter_input = gr.Textbox(label="Journal Filter (optional)")
136
- min_year_input = gr.Number(label="Min Year (optional)", value=2000)
137
- max_year_input = gr.Number(label="Max Year (optional)", value=2025)
 
 
 
 
 
 
 
 
 
 
138
 
139
- status_output = gr.Markdown()
140
- markdown_output = gr.Markdown()
141
- table_output = gr.Dataframe()
142
- df_state = gr.State()
 
143
 
 
144
  def run_search(query, max_results, page, sort_by, journal_filter, min_year, max_year):
145
- status_output.update("πŸ”„ Searching PubMed...")
146
- status, md, df = fetch_pubmed_articles(query, max_results, page, sort_by, journal_filter, min_year, max_year)
 
147
  return status, md, df, df
148
 
149
- search_button = gr.Button("πŸ” Search PubMed")
150
  search_button.click(
151
  fn=run_search,
152
  inputs=[query_input, max_results_input, page_input, sort_input, journal_filter_input, min_year_input, max_year_input],
153
  outputs=[status_output, markdown_output, table_output, df_state]
154
  )
155
 
156
- with gr.Row():
157
- export_csv_button = gr.Button("⬇️ Export CSV")
158
- export_docx_button = gr.Button("⬇️ Export DOCX")
159
- export_csv_output = gr.File()
160
- export_docx_output = gr.File()
161
-
162
  export_csv_button.click(lambda df: export_results(df, "CSV"), inputs=[df_state], outputs=[export_csv_output])
163
  export_docx_button.click(lambda df: export_results(df, "DOCX"), inputs=[df_state], outputs=[export_docx_output])
164
 
165
  app.launch()
 
 
5
  from datetime import datetime
6
  from xml.etree import ElementTree as ET
7
 
8
+ # 1. Function to fetch PubMed articles
9
  def fetch_pubmed_articles(query, max_results, page, sort_by, journal_filter, min_year, max_year):
10
+ if not query or query.strip() == "":
11
+ return "❌ Please enter a search query.", "", pd.DataFrame()
12
+ try:
13
+ base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
14
+ params = {
15
+ "db": "pubmed",
16
+ "term": query,
17
+ "retmax": int(max_results),
18
+ "retstart": int(max_results) * (int(page) - 1),
19
+ "retmode": "json"
20
+ }
21
+ response = requests.get(base_url, params=params, timeout=15)
22
+ if response.status_code != 200:
23
+ return f"❌ Error: {response.status_code}", "", pd.DataFrame()
24
+ id_list = response.json().get("esearchresult", {}).get("idlist", [])
25
+ if not id_list:
26
+ return "❌ No results found.", "", pd.DataFrame()
27
+ ids = ",".join(id_list)
28
+ fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
29
+ fetch_params = {
30
+ "db": "pubmed",
31
+ "id": ids,
32
+ "retmode": "xml",
33
+ }
34
+ fetch_response = requests.get(fetch_url, params=fetch_params, timeout=15)
35
+ if fetch_response.status_code != 200:
36
+ return f"❌ Error fetching details.", "", pd.DataFrame()
37
+ root = ET.fromstring(fetch_response.content)
38
+ articles = []
39
+ for article in root.findall(".//PubmedArticle"):
40
+ try:
41
+ title = article.findtext(".//ArticleTitle", default="No Title")
42
+ abstract = " ".join([abst.text for abst in article.findall(".//AbstractText") if abst.text])
43
+ journal = article.findtext(".//Journal/Title", default="No Journal")
44
+ year = article.findtext(".//PubDate/Year")
45
+ pmid = article.findtext(".//PMID", default="")
46
+ year = int(year) if year and year.isdigit() else None
47
+ articles.append({
48
+ "Title": title,
49
+ "Abstract": abstract,
50
+ "Journal": journal,
51
+ "Year": year,
52
+ "PMID": pmid
53
+ })
54
+ except Exception:
55
+ continue
56
+ df = pd.DataFrame(articles)
57
+ if df.empty:
58
+ return "❌ No results could be parsed.", "", pd.DataFrame()
59
+ # Filter
60
+ if journal_filter:
61
+ df = df[df["Journal"].str.contains(journal_filter, case=False, na=False)]
62
+ if min_year is not None and str(min_year).isdigit():
63
+ df = df[df["Year"] >= int(min_year)]
64
+ if max_year is not None and str(max_year).isdigit():
65
+ df = df[df["Year"] <= int(max_year)]
66
+ if df.empty:
67
+ return "❌ No results matched your filters.", "", pd.DataFrame()
68
+ # Sort
69
+ if sort_by == "Year":
70
+ df = df.sort_values(by="Year", ascending=False, na_position='last')
71
+ elif sort_by == "Title":
72
+ df = df.sort_values(by="Title", ascending=True, na_position='last')
73
+ elif sort_by == "Journal":
74
+ df = df.sort_values(by="Journal", ascending=True, na_position='last')
75
+ # Markdown rendering
76
+ result_text = f"**Showing {len(df)} results:**\n\n"
77
+ for _, row in df.iterrows():
78
+ pmid_url = f"https://pubmed.ncbi.nlm.nih.gov/{row['PMID']}/"
79
+ result_text += f"#### [{row['Title']}]({pmid_url})\n"
80
+ result_text += f"**Journal:** {row['Journal']} \n"
81
+ result_text += f"**Year:** {row['Year']} \n"
82
+ result_text += f"**Abstract:** {row['Abstract']}\n\n---\n"
83
+ return "βœ… Search complete!", result_text, df.reset_index(drop=True)
84
+ except Exception as e:
85
+ return f"❌ Error: {str(e)}", "", pd.DataFrame()
86
+
87
+ # 2. Export results to file
 
 
 
 
 
 
 
 
 
 
 
88
  def export_results(df, file_type):
89
+ if df is None or len(df) == 0:
 
90
  return None
91
+ now = datetime.now().strftime("%Y%m%d_%H%M%S")
92
  if file_type == "CSV":
93
  path = f"pubmed_results_{now}.csv"
94
  df.to_csv(path, index=False)
 
98
  doc = Document()
99
  doc.add_heading("PubMed Search Results", 0)
100
  for _, row in df.iterrows():
101
+ doc.add_heading(str(row["Title"]), level=1)
102
  doc.add_paragraph(f"Journal: {row['Journal']}")
103
  doc.add_paragraph(f"Year: {row['Year']}")
104
  doc.add_paragraph(f"PMID: {row['PMID']}")
 
108
  return path
109
  return None
110
 
111
+ # 3. Gradio Interface
112
+ with gr.Blocks(theme="soft") as app:
113
+ gr.Markdown("""
114
+ # πŸ”¬ PubMed Article Search Tool
115
 
116
+ **Instructions:**
117
+ 1. Enter your biomedical search query (e.g., `brain inflammation`) in the Search Query box.
118
+ 2. Adjust other filters (journal, year, sort) as needed.
119
+ 3. Click **Search PubMed** to retrieve results.
120
+ 4. Export results to CSV or DOCX after the search.
121
 
122
+ ---
123
+ """)
 
 
124
 
125
  with gr.Row():
126
+ with gr.Column():
127
+ query_input = gr.Textbox(label="Search Query", placeholder="e.g., brain inflammation", lines=2)
128
+ max_results_input = gr.Number(label="Max Results (1-100)", value=20, minimum=1, maximum=100)
129
+ page_input = gr.Number(label="Page Number", value=1, minimum=1)
130
+ sort_input = gr.Dropdown(["Year", "Title", "Journal"], value="Year", label="Sort By")
131
+ journal_filter_input = gr.Textbox(label="Journal Filter (optional)")
132
+ min_year_input = gr.Number(label="Min Year (optional)", value=2000, minimum=1800, maximum=2100)
133
+ max_year_input = gr.Number(label="Max Year (optional)", value=2025, minimum=1800, maximum=2100)
134
+ search_button = gr.Button("πŸ” Search PubMed", elem_id="search-btn")
135
+ status_output = gr.Markdown(value="")
136
+ with gr.Column():
137
+ markdown_output = gr.Markdown(value="Results will appear here.")
138
+ table_output = gr.DataFrame(value=pd.DataFrame(), label="Results Table", visible=True, interactive=False)
139
+ df_state = gr.State(value=pd.DataFrame())
140
 
141
+ with gr.Row():
142
+ export_csv_button = gr.Button("⬇️ Export CSV")
143
+ export_docx_button = gr.Button("⬇️ Export DOCX")
144
+ export_csv_output = gr.File(label="Download CSV")
145
+ export_docx_output = gr.File(label="Download DOCX")
146
 
147
+ # 4. Logic to control buttons and flow
148
  def run_search(query, max_results, page, sort_by, journal_filter, min_year, max_year):
149
+ status, md, df = fetch_pubmed_articles(
150
+ query, max_results, page, sort_by, journal_filter, min_year, max_year
151
+ )
152
  return status, md, df, df
153
 
 
154
  search_button.click(
155
  fn=run_search,
156
  inputs=[query_input, max_results_input, page_input, sort_input, journal_filter_input, min_year_input, max_year_input],
157
  outputs=[status_output, markdown_output, table_output, df_state]
158
  )
159
 
 
 
 
 
 
 
160
  export_csv_button.click(lambda df: export_results(df, "CSV"), inputs=[df_state], outputs=[export_csv_output])
161
  export_docx_button.click(lambda df: export_results(df, "DOCX"), inputs=[df_state], outputs=[export_docx_output])
162
 
163
  app.launch()
164
+