drvikasgaur commited on
Commit
53a1484
Β·
verified Β·
1 Parent(s): 4fe5fbb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +144 -139
app.py CHANGED
@@ -1,164 +1,169 @@
1
  import gradio as gr
2
- import requests
3
  import pandas as pd
4
- import io
5
  from docx import Document
6
- import tempfile
7
- import os
8
- os.system("pip install python-docx")
9
-
10
- API_BASE_URL = "https://pubmed-api-jwfq.onrender.com/search_pubmed"
11
-
12
- global_df = None # Global variable to store search results for export
13
-
14
-
15
- def fetch_pubmed_articles(query, max_results=10, page=1, sort_by="Year", filter_journal="All", min_year=None, max_year=None):
16
- """
17
- Fetches PubMed articles and applies sorting and filtering.
18
- """
19
- try:
20
- url = f"{API_BASE_URL}?query={query}&max_results={max_results}&page={page}"
21
- response = requests.get(url)
22
-
23
- if response.status_code != 200:
24
- return f"⚠️ API Error: {response.status_code} - {response.text}", None
25
-
26
- articles = response.json()
27
-
28
- if not articles:
29
- return "No articles found for this query.", None
30
-
31
- for article in articles:
32
- try:
33
- article["Year"] = int(article["Year"])
34
- except:
35
- article["Year"] = 0
36
-
37
- if filter_journal and filter_journal != "All":
38
- articles = [a for a in articles if filter_journal.lower() in a['Journal'].lower()]
39
-
40
- if min_year:
41
- articles = [a for a in articles if a["Year"] >= int(min_year)]
42
- if max_year:
43
- articles = [a for a in articles if a["Year"] <= int(max_year)]
44
-
45
- if sort_by == "Year":
46
- articles.sort(key=lambda x: x["Year"], reverse=True)
47
- elif sort_by == "Title":
48
- articles.sort(key=lambda x: x["Title"])
49
- elif sort_by == "Journal":
50
- articles.sort(key=lambda x: x["Journal"])
51
-
52
- formatted_results = []
53
- for article in articles:
54
- formatted_results.append(
55
- f"## πŸ“° {article['Title']}\n"
56
- f"πŸ“– **<span style='color:blue'>{article['Journal']}</span>** ({article['Year']})\n"
57
- f"πŸ‘¨β€πŸ”¬ **<span style='color:gray'>{article['Authors']}</span>**\n"
58
- f"πŸ”— [Read on PubMed]({article['PubMed_URL']})\n\n"
59
- f"<details><summary>πŸ“„ **Show Abstract**</summary>\n{article['Abstract']}\n</details>"
60
- f"\n---\n"
61
- )
62
-
63
- df = pd.DataFrame(articles)
64
- return "\n\n".join(formatted_results), df
65
-
66
- except Exception as e:
67
- return f"⚠️ Error fetching data: {str(e)}", None
68
-
69
-
70
- def export_results(df, format_type):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  if df is None or df.empty:
72
  return None
73
 
74
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=f".{format_type.lower()}")
75
- temp_file_path = temp_file.name
76
-
77
- if format_type == "CSV":
78
- df.to_csv(temp_file_path, index=False)
79
- elif format_type == "DOCX":
80
  doc = Document()
81
- doc.add_heading("PubMed Search Results", level=1)
82
  for _, row in df.iterrows():
83
- doc.add_heading(row["Title"], level=2)
84
- doc.add_paragraph(f"πŸ“– Journal: {row['Journal']} ({row['Year']})")
85
- doc.add_paragraph(f"πŸ‘¨β€πŸ”¬ Authors: {row['Authors']}")
86
- doc.add_paragraph(f"πŸ”— Link: {row['PubMed_URL']}")
87
- doc.add_paragraph(f"πŸ“„ Abstract: {row['Abstract']}")
88
- doc.add_paragraph("---")
89
- doc.save(temp_file_path)
 
 
90
 
91
- temp_file.close()
92
- return temp_file_path
93
 
 
 
 
 
 
94
 
95
- with gr.Blocks() as app:
96
- gr.Markdown("""
97
- # πŸ” **PubMed Search Tool with Advanced Features**
98
- ## πŸ“– **How to Use This App**
99
- 1️⃣ Enter a Search Query (e.g., "Deep Learning in Psychiatry")
100
- 2️⃣ Set Number of Results & Page
101
- 3️⃣ Choose Sort Option (Year, Title, Journal)
102
- 4️⃣ (Optional) Filter by Journal or Year
103
- 5️⃣ Click **Search** to fetch articles
104
- 6️⃣ Click **Export as CSV/DOCX** to download
105
- """)
106
-
107
- with gr.Row():
108
- query_input = gr.Textbox(label="πŸ”Ž Search Query", placeholder="Enter topic (e.g., 'Neural Networks in Psychiatry')")
109
 
110
- with gr.Row():
111
- max_results_input = gr.Slider(1, 50, value=10, step=1, label="πŸ“„ Number of Results per Page")
112
- page_input = gr.Slider(1, 200, value=1, step=1, label="πŸ“„ Page Number")
113
-
114
- with gr.Row():
115
- sort_input = gr.Dropdown(choices=["Year", "Title", "Journal"], value="Year", label="πŸ”„ Sort By")
116
- journal_filter_input = gr.Textbox(label="🎯 Filter by Journal (Optional)", placeholder="Enter journal name or leave blank")
117
 
118
  with gr.Row():
119
- min_year_input = gr.Number(label="πŸ“… Min Year", value=None)
120
- max_year_input = gr.Number(label="πŸ“… Max Year", value=None)
 
121
 
122
  with gr.Row():
123
- search_button = gr.Button("πŸ” Search")
124
- export_csv_button = gr.Button("πŸ“‚ Export as CSV")
125
- export_docx_button = gr.Button("πŸ“„ Export as Word DOCX")
 
126
 
127
- results_output = gr.HTML()
128
- status_output = gr.Text(label="πŸ”„ Status", interactive=False)
129
- export_csv_output = gr.File(label="Download CSV")
130
- export_docx_output = gr.File(label="Download Word DOCX")
131
 
132
- def search_and_display_wrapper(query, max_results, page, sort_by, journal_filter, min_year, max_year):
133
- global global_df
134
- yield gr.update(value="πŸ”„ Searching PubMed..."), gr.update(value="")
135
-
136
- result_text, df = fetch_pubmed_articles(query, max_results, page, sort_by, journal_filter, min_year, max_year)
137
- global_df = df
138
-
139
- if df is not None:
140
- status = "βœ… Done"
141
- else:
142
- status = "❌ No Results Found"
143
-
144
- yield gr.update(value=status), gr.update(value=result_text)
145
-
146
- def export_csv():
147
- if global_df is not None:
148
- return export_results(global_df, "CSV")
149
-
150
- def export_docx():
151
- if global_df is not None:
152
- return export_results(global_df, "DOCX")
153
 
154
  search_button.click(
155
  fn=search_and_display_wrapper,
156
  inputs=[query_input, max_results_input, page_input, sort_input, journal_filter_input, min_year_input, max_year_input],
157
- outputs=[status_output, results_output]
158
  )
159
 
160
- export_csv_button.click(export_csv, outputs=export_csv_output)
161
- export_docx_button.click(export_docx, outputs=export_docx_output)
 
 
 
 
 
 
 
 
162
 
163
- if __name__ == "__main__":
164
- app.launch()
 
1
  import gradio as gr
 
2
  import pandas as pd
3
+ import requests
4
  from docx import Document
5
+ from datetime import datetime
6
+
7
+ # Function to call the API and return results
8
+ def fetch_pubmed_articles(query, max_results, page, sort_by, journal_filter, min_year, max_year):
9
+ base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
10
+ api_key = "" # optional: insert your NCBI API key
11
+ params = {
12
+ "db": "pubmed",
13
+ "term": query,
14
+ "retmax": max_results,
15
+ "retstart": max_results * (page - 1),
16
+ "retmode": "json",
17
+ "api_key": api_key
18
+ }
19
+
20
+ response = requests.get(base_url, params=params)
21
+ if response.status_code != 200:
22
+ return f"❌ Error: {response.status_code} - {response.reason}", None
23
+
24
+ id_list = response.json().get("esearchresult", {}).get("idlist", [])
25
+ if not id_list:
26
+ return "❌ No results found for this query.", None
27
+
28
+ ids = ",".join(id_list)
29
+ fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
30
+ fetch_params = {
31
+ "db": "pubmed",
32
+ "id": ids,
33
+ "retmode": "xml",
34
+ "api_key": api_key
35
+ }
36
+
37
+ fetch_response = requests.get(fetch_url, params=fetch_params)
38
+ if fetch_response.status_code != 200:
39
+ return f"❌ Error fetching details: {fetch_response.status_code} - {fetch_response.reason}", None
40
+
41
+ from xml.etree import ElementTree as ET
42
+ root = ET.fromstring(fetch_response.content)
43
+
44
+ articles = []
45
+ for article in root.findall(".//PubmedArticle"):
46
+ try:
47
+ title = article.findtext(".//ArticleTitle", default="No Title")
48
+ abstract = " ".join([abst.text for abst in article.findall(".//AbstractText") if abst.text])
49
+ journal = article.findtext(".//Journal/Title", default="No Journal")
50
+ year = article.findtext(".//PubDate/Year")
51
+ pmid = article.findtext(".//PMID", default="")
52
+
53
+ year = int(year) if year and year.isdigit() else None
54
+
55
+ articles.append({
56
+ "Title": title,
57
+ "Abstract": abstract,
58
+ "Journal": journal,
59
+ "Year": year,
60
+ "PMID": pmid
61
+ })
62
+ except Exception:
63
+ continue
64
+
65
+ df = pd.DataFrame(articles)
66
+ if df.empty:
67
+ return "❌ No results could be parsed.", None
68
+
69
+ # Filter by journal and year
70
+ if journal_filter:
71
+ df = df[df["Journal"].str.contains(journal_filter, case=False, na=False)]
72
+ if min_year is not None:
73
+ df = df[df["Year"] >= min_year]
74
+ if max_year is not None:
75
+ df = df[df["Year"] <= max_year]
76
+
77
+ if df.empty:
78
+ return "❌ No results matched filters.", None
79
+
80
+ # Sort
81
+ if sort_by == "Year":
82
+ df = df.sort_values(by="Year", ascending=False)
83
+ elif sort_by == "Title":
84
+ df = df.sort_values(by="Title", ascending=True)
85
+ elif sort_by == "Journal":
86
+ df = df.sort_values(by="Journal", ascending=True)
87
+
88
+ # Markdown result text
89
+ result_text = ""
90
+ for _, row in df.iterrows():
91
+ pmid_url = f"https://pubmed.ncbi.nlm.nih.gov/{row['PMID']}/"
92
+ result_text += f"### [{row['Title']}]({pmid_url})\n"
93
+ result_text += f"**Journal:** {row['Journal']} \n"
94
+ result_text += f"**Year:** {row['Year']} \n"
95
+ result_text += f"**Abstract:** {row['Abstract']}\n\n---\n"
96
+
97
+ return result_text, df
98
+
99
+
100
+ # Export function
101
+ def export_results(df, file_type):
102
+ now = datetime.now().strftime("%Y%m%d_%H%M%S")
103
  if df is None or df.empty:
104
  return None
105
 
106
+ if file_type == "CSV":
107
+ path = f"pubmed_results_{now}.csv"
108
+ df.to_csv(path, index=False)
109
+ return path
110
+ elif file_type == "DOCX":
111
+ path = f"pubmed_results_{now}.docx"
112
  doc = Document()
113
+ doc.add_heading("PubMed Search Results", 0)
114
  for _, row in df.iterrows():
115
+ doc.add_heading(row["Title"], level=1)
116
+ doc.add_paragraph(f"Journal: {row['Journal']}")
117
+ doc.add_paragraph(f"Year: {row['Year']}")
118
+ doc.add_paragraph(f"PMID: {row['PMID']}")
119
+ doc.add_paragraph(f"Abstract: {row['Abstract']}")
120
+ doc.add_paragraph("--------")
121
+ doc.save(path)
122
+ return path
123
+ return None
124
 
 
 
125
 
126
+ # Gradio logic
127
+ def search_and_display_wrapper(query, max_results, page, sort_by, journal_filter, min_year, max_year):
128
+ yield "πŸ”„ Searching PubMed, please wait...", None
129
+ result_text, df = fetch_pubmed_articles(query, max_results, page, sort_by, journal_filter, min_year, max_year)
130
+ yield result_text, df
131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
+ with gr.Blocks() as app:
134
+ gr.Markdown("# πŸ” PubMed Search Tool\nEnter a biomedical research topic to fetch recent articles from PubMed.")
 
 
 
 
 
135
 
136
  with gr.Row():
137
+ query_input = gr.Textbox(label="Search Query", placeholder="e.g., breast cancer treatment", lines=2)
138
+ max_results_input = gr.Number(label="Max Results", value=20)
139
+ page_input = gr.Number(label="Page Number", value=1)
140
 
141
  with gr.Row():
142
+ sort_input = gr.Dropdown(["Year", "Title", "Journal"], value="Year", label="Sort By")
143
+ journal_filter_input = gr.Textbox(label="Journal Filter (optional)", placeholder="e.g., Nature")
144
+ min_year_input = gr.Number(label="Min Year (optional)", value=2000)
145
+ max_year_input = gr.Number(label="Max Year (optional)", value=2025)
146
 
147
+ df_state = gr.State()
 
 
 
148
 
149
+ search_button = gr.Button("πŸ” Search PubMed")
150
+ status_output = gr.Markdown()
151
+ results_output = gr.Markdown()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
  search_button.click(
154
  fn=search_and_display_wrapper,
155
  inputs=[query_input, max_results_input, page_input, sort_input, journal_filter_input, min_year_input, max_year_input],
156
+ outputs=[results_output, df_state]
157
  )
158
 
159
+ with gr.Row():
160
+ export_csv_button = gr.Button("⬇️ Export CSV")
161
+ export_docx_button = gr.Button("⬇️ Export DOCX")
162
+ export_csv_output = gr.File()
163
+ export_docx_output = gr.File()
164
+
165
+ export_csv_button.click(lambda df: export_results(df, "CSV"), inputs=[df_state], outputs=[export_csv_output])
166
+ export_docx_button.click(lambda df: export_results(df, "DOCX"), inputs=[df_state], outputs=[export_docx_output])
167
+
168
+ app.launch()
169