mgbam commited on
Commit
e852f8c
·
verified ·
1 Parent(s): f0207a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +457 -39
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  import io
3
  import json
 
4
  import asyncio
5
  import xml.etree.ElementTree as ET
6
  from typing import Any, Dict, Optional, Tuple, Union, List
@@ -149,19 +150,114 @@ def parse_pubmed_xml(xml_data: str) -> List[Dict[str, Any]]:
149
  })
150
  return articles
151
 
152
- def interpret_clinical_test_results(results: str) -> str:
153
- """Generate an expert-level clinical interpretation from raw results."""
154
- try:
155
- response = client.chat.completions.create(
156
- model="gpt-3.5-turbo",
157
- messages=[{"role": "user", "content": f"Interpret these clinical test results:\n{results}"}],
158
- max_tokens=500,
159
- temperature=0.7,
160
- )
161
- return response.choices[0].message.content.strip()
162
- except Exception as e:
163
- logger.error(f"Interpretation error: {e}")
164
- return "Failed to interpret results."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
  ###############################################################################
167
  # 6) CORE FUNCTIONS #
@@ -175,7 +271,7 @@ def summarize_text(text: str) -> str:
175
  response = client.chat.completions.create(
176
  model="gpt-3.5-turbo",
177
  messages=[{"role": "user", "content": f"Summarize this clinical data:\n{text}"}],
178
- max_tokens=300,
179
  temperature=0.7,
180
  )
181
  return response.choices[0].message.content.strip()
@@ -183,6 +279,21 @@ def summarize_text(text: str) -> str:
183
  logger.error(f"Summarization error: {e}")
184
  return "Summarization failed."
185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  def generate_report(text: str, filename: str = "clinical_report.pdf") -> Optional[str]:
187
  """Generate a professional PDF report from the text."""
188
  try:
@@ -220,65 +331,372 @@ def visualize_predictions(predictions: Dict[str, float]) -> alt.Chart:
220
  )
221
  return chart
222
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  ###############################################################################
224
- # 7) BUILDING THE GRADIO APP #
225
  ###############################################################################
226
 
227
  with gr.Blocks() as demo:
228
- gr.Markdown("# 🏥 AI-Driven Clinical Assistant")
229
  gr.Markdown("""
230
  **Highlights**:
231
  - **Summarize** clinical text (OpenAI GPT-3.5)
232
- - **Interpret** clinical test results with expert-level insights
 
 
 
233
  - **Generate** professional PDF reports
 
 
234
  """)
235
 
236
- text_input = gr.Textbox(label="Input Text", lines=5, placeholder="Enter clinical text or test results...")
 
 
 
 
 
 
237
  action = gr.Radio(
238
  [
239
  "Summarize",
240
- "Interpret Clinical Test Results",
241
  "Generate Report",
 
 
 
 
 
 
 
242
  ],
243
  label="Select an Action",
244
  )
245
-
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  output_text = gr.Textbox(label="Output", lines=8)
 
 
 
247
  output_file = gr.File(label="Generated File")
248
-
249
  submit_btn = gr.Button("Submit")
250
 
 
 
 
 
 
251
  async def handle_action(
252
  action: str,
253
  txt: str,
254
- report_fn: str
255
- ) -> Tuple[Optional[str], Optional[str]]:
256
- """Handle clinical actions based on the user's selection."""
 
 
 
 
 
 
 
 
257
  try:
258
  combined_text = txt.strip()
259
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  if action == "Summarize":
261
- summary = summarize_text(combined_text)
262
- return summary, None
263
-
264
- elif action == "Interpret Clinical Test Results":
265
- interpretation = interpret_clinical_test_results(combined_text)
266
- return interpretation, None
 
 
 
 
 
 
 
 
267
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  elif action == "Generate Report":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  path = generate_report(combined_text, report_fn)
270
  msg = f"Report generated: {path}" if path else "Report generation failed."
271
- return msg, path
272
-
273
- return "Invalid action.", None
274
- except Exception as e:
275
- logger.error(f"Exception: {e}")
276
- return f"Error: {str(e)}", None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
 
278
  submit_btn.click(
279
  fn=handle_action,
280
- inputs=[action, text_input, report_filename_input],
281
- outputs=[output_text, output_file],
282
  )
283
 
284
  # Launch the Gradio interface
 
1
  import os
2
  import io
3
  import json
4
+ import csv
5
  import asyncio
6
  import xml.etree.ElementTree as ET
7
  from typing import Any, Dict, Optional, Tuple, Union, List
 
150
  })
151
  return articles
152
 
153
+ ###############################################################################
154
+ # 5) ASYNC FETCH FUNCTIONS #
155
+ ###############################################################################
156
+
157
+ async def fetch_articles_by_nct_id(nct_id: str) -> Dict[str, Any]:
158
+ params = {"query": nct_id, "format": "json"}
159
+ async with httpx.AsyncClient() as client_http:
160
+ try:
161
+ resp = await client_http.get(EUROPE_PMC_BASE_URL, params=params)
162
+ resp.raise_for_status()
163
+ return resp.json()
164
+ except Exception as e:
165
+ logger.error(f"Error fetching articles for {nct_id}: {e}")
166
+ return {"error": str(e)}
167
+
168
+ async def fetch_articles_by_query(query_params: str) -> Dict[str, Any]:
169
+ """Europe PMC query via JSON input."""
170
+ parsed_params = safe_json_parse(query_params)
171
+ if not parsed_params or not isinstance(parsed_params, dict):
172
+ return {"error": "Invalid JSON."}
173
+ query_string = " AND ".join(f"{k}:{v}" for k, v in parsed_params.items())
174
+ req_params = {"query": query_string, "format": "json"}
175
+ async with httpx.AsyncClient() as client_http:
176
+ try:
177
+ resp = await client_http.get(EUROPE_PMC_BASE_URL, params=req_params)
178
+ resp.raise_for_status()
179
+ return resp.json()
180
+ except Exception as e:
181
+ logger.error(f"Error fetching articles: {e}")
182
+ return {"error": str(e)}
183
+
184
+ async def fetch_pubmed_by_query(query_params: str) -> Dict[str, Any]:
185
+ parsed_params = safe_json_parse(query_params)
186
+ if not parsed_params or not isinstance(parsed_params, dict):
187
+ return {"error": "Invalid JSON for PubMed."}
188
+
189
+ search_params = {
190
+ "db": "pubmed",
191
+ "retmode": "json",
192
+ "email": ENTREZ_EMAIL,
193
+ "retmax": parsed_params.get("retmax", "10"),
194
+ "term": parsed_params.get("term", ""),
195
+ }
196
+ async with httpx.AsyncClient() as client_http:
197
+ try:
198
+ # Search PubMed
199
+ search_resp = await client_http.get(PUBMED_SEARCH_URL, params=search_params)
200
+ search_resp.raise_for_status()
201
+ data = search_resp.json()
202
+ id_list = data.get("esearchresult", {}).get("idlist", [])
203
+ if not id_list:
204
+ return {"result": ""}
205
+
206
+ # Fetch PubMed
207
+ fetch_params = {
208
+ "db": "pubmed",
209
+ "id": ",".join(id_list),
210
+ "retmode": "xml",
211
+ "email": ENTREZ_EMAIL,
212
+ }
213
+ fetch_resp = await client_http.get(PUBMED_FETCH_URL, params=fetch_params)
214
+ fetch_resp.raise_for_status()
215
+ return {"result": fetch_resp.text}
216
+ except Exception as e:
217
+ logger.error(f"Error fetching PubMed articles: {e}")
218
+ return {"error": str(e)}
219
+
220
+ async def fetch_crossref_by_query(query_params: str) -> Dict[str, Any]:
221
+ parsed_params = safe_json_parse(query_params)
222
+ if not parsed_params or not isinstance(parsed_params, dict):
223
+ return {"error": "Invalid JSON for Crossref."}
224
+ async with httpx.AsyncClient() as client_http:
225
+ try:
226
+ resp = await client_http.get(CROSSREF_API_URL, params=parsed_params)
227
+ resp.raise_for_status()
228
+ return resp.json()
229
+ except Exception as e:
230
+ logger.error(f"Error fetching Crossref data: {e}")
231
+ return {"error": str(e)}
232
+
233
+ async def fetch_bioportal_by_query(query_params: str) -> Dict[str, Any]:
234
+ """
235
+ BioPortal fetch for medical ontologies/terminologies.
236
+ Expects JSON like: {"q": "cancer"}
237
+ See: https://data.bioontology.org/documentation
238
+ """
239
+ if not BIOPORTAL_API_KEY:
240
+ return {"error": "No BioPortal API Key set."}
241
+ parsed_params = safe_json_parse(query_params)
242
+ if not parsed_params or not isinstance(parsed_params, dict):
243
+ return {"error": "Invalid JSON for BioPortal."}
244
+
245
+ search_term = parsed_params.get("q", "")
246
+ if not search_term:
247
+ return {"error": "No 'q' found in JSON. Provide a search term."}
248
+
249
+ url = f"{BIOPORTAL_API_BASE}/search"
250
+ headers = {"Authorization": f"apikey token={BIOPORTAL_API_KEY}"}
251
+ req_params = {"q": search_term}
252
+
253
+ async with httpx.AsyncClient() as client_http:
254
+ try:
255
+ resp = await client_http.get(url, params=req_params, headers=headers)
256
+ resp.raise_for_status()
257
+ return resp.json()
258
+ except Exception as e:
259
+ logger.error(f"Error fetching BioPortal data: {e}")
260
+ return {"error": str(e)}
261
 
262
  ###############################################################################
263
  # 6) CORE FUNCTIONS #
 
271
  response = client.chat.completions.create(
272
  model="gpt-3.5-turbo",
273
  messages=[{"role": "user", "content": f"Summarize this clinical data:\n{text}"}],
274
+ max_tokens=200,
275
  temperature=0.7,
276
  )
277
  return response.choices[0].message.content.strip()
 
279
  logger.error(f"Summarization error: {e}")
280
  return "Summarization failed."
281
 
282
+ def predict_outcome(text: str) -> Union[Dict[str, float], str]:
283
+ """Predict outcomes (classification) using a fine-tuned BERT model."""
284
+ if not text.strip():
285
+ return "No text provided for prediction."
286
+ try:
287
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
288
+ inputs = {k: v.to(device) for k, v in inputs.items()}
289
+ with torch.no_grad():
290
+ outputs = model(**inputs)
291
+ probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)[0]
292
+ return {f"Label {i+1}": float(prob.item()) for i, prob in enumerate(probabilities)}
293
+ except Exception as e:
294
+ logger.error(f"Prediction error: {e}")
295
+ return "Prediction failed."
296
+
297
  def generate_report(text: str, filename: str = "clinical_report.pdf") -> Optional[str]:
298
  """Generate a professional PDF report from the text."""
299
  try:
 
331
  )
332
  return chart
333
 
334
+ def translate_text(text: str, translation_option: str) -> str:
335
+ """Translate text between English and French via MarianMT."""
336
+ if not text.strip():
337
+ return "No text provided for translation."
338
+ try:
339
+ if translation_option not in LANGUAGE_MAP:
340
+ return "Unsupported translation option."
341
+ inputs = translation_tokenizer(text, return_tensors="pt", padding=True).to(device)
342
+ translated_tokens = translation_model.generate(**inputs)
343
+ return translation_tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
344
+ except Exception as e:
345
+ logger.error(f"Translation error: {e}")
346
+ return "Translation failed."
347
+
348
+ def perform_named_entity_recognition(text: str) -> str:
349
+ """NER using spaCy (en_core_web_sm)."""
350
+ if not text.strip():
351
+ return "No text provided for NER."
352
+ try:
353
+ doc = nlp(text)
354
+ entities = [(ent.text, ent.label_) for ent in doc.ents]
355
+ if not entities:
356
+ return "No named entities found."
357
+ return "\n".join(f"{t} -> {lbl}" for t, lbl in entities)
358
+ except Exception as e:
359
+ logger.error(f"NER error: {e}")
360
+ return "NER failed."
361
+
362
+ ###############################################################################
363
+ # 7) FILE PARSING (TXT, PDF, CSV, XLS) #
364
+ ###############################################################################
365
+
366
+ def parse_pdf_file_as_str(file_up: gr.File) -> str:
367
+ """Read PDF via PyPDF2. Attempt local path, else read from memory."""
368
+ pdf_path = file_up.name
369
+ if os.path.isfile(pdf_path):
370
+ with open(pdf_path, "rb") as f:
371
+ reader = PyPDF2.PdfReader(f)
372
+ return "\n".join(page.extract_text() or "" for page in reader.pages)
373
+ else:
374
+ if not hasattr(file_up, "file"):
375
+ raise ValueError("No .file attribute found for PDF.")
376
+ pdf_bytes = file_up.file.read()
377
+ reader = PyPDF2.PdfReader(io.BytesIO(pdf_bytes))
378
+ return "\n".join(page.extract_text() or "" for page in reader.pages)
379
+
380
+ def parse_text_file_as_str(file_up: gr.File) -> str:
381
+ """Read .txt from path or fallback to memory."""
382
+ path = file_up.name
383
+ if os.path.isfile(path):
384
+ with open(path, "rb") as f:
385
+ return f.read().decode("utf-8", errors="replace")
386
+ else:
387
+ if not hasattr(file_up, "file"):
388
+ raise ValueError("No .file attribute for TXT.")
389
+ return file_up.file.read().decode("utf-8", errors="replace")
390
+
391
+ def parse_csv_file_to_df(file_up: gr.File) -> pd.DataFrame:
392
+ """
393
+ Attempt multiple encodings for CSV: utf-8, utf-8-sig, latin1, ISO-8859-1.
394
+ """
395
+ path = file_up.name
396
+ if os.path.isfile(path):
397
+ for enc in ["utf-8", "utf-8-sig", "latin1", "ISO-8859-1"]:
398
+ try:
399
+ return pd.read_csv(path, encoding=enc)
400
+ except UnicodeDecodeError:
401
+ logger.warning(f"CSV parse failed (enc={enc}). Trying next...")
402
+ except Exception as e:
403
+ logger.warning(f"CSV parse error (enc={enc}): {e}")
404
+ raise ValueError("Could not parse local CSV with known encodings.")
405
+ else:
406
+ if not hasattr(file_up, "file"):
407
+ raise ValueError("No .file attribute for CSV.")
408
+ raw_bytes = file_up.file.read()
409
+ for enc in ["utf-8", "utf-8-sig", "latin1", "ISO-8859-1"]:
410
+ try:
411
+ text_decoded = raw_bytes.decode(enc, errors="replace")
412
+ from io import StringIO
413
+ return pd.read_csv(StringIO(text_decoded))
414
+ except UnicodeDecodeError:
415
+ logger.warning(f"CSV in-memory parse failed (enc={enc}). Next...")
416
+ except Exception as e:
417
+ logger.warning(f"In-memory CSV error (enc={enc}): {e}")
418
+ raise ValueError("Could not parse in-memory CSV with known encodings.")
419
+
420
+ def parse_excel_file_to_df(file_up: gr.File) -> pd.DataFrame:
421
+ """Read Excel from local path or memory (openpyxl)."""
422
+ path = file_up.name
423
+ if os.path.isfile(path):
424
+ return pd.read_excel(path, engine="openpyxl")
425
+ else:
426
+ if not hasattr(file_up, "file"):
427
+ raise ValueError("No .file attribute for Excel.")
428
+ excel_bytes = file_up.file.read()
429
+ return pd.read_excel(io.BytesIO(excel_bytes), engine="openpyxl")
430
+
431
  ###############################################################################
432
+ # 8) BUILDING THE GRADIO APP #
433
  ###############################################################################
434
 
435
  with gr.Blocks() as demo:
436
+ gr.Markdown("# 🏥 AI-Driven Clinical Assistant (No EDA)")
437
  gr.Markdown("""
438
  **Highlights**:
439
  - **Summarize** clinical text (OpenAI GPT-3.5)
440
+ - **Predict** with a specialized BERT-based model
441
+ - **Translate** (English ↔ French)
442
+ - **Named Entity Recognition** (spaCy)
443
+ - **Fetch** from PubMed, Crossref, Europe PMC, and **BioPortal**
444
  - **Generate** professional PDF reports
445
+
446
+ *Disclaimer*: This is a research demo, **not** a medical device.
447
  """)
448
 
449
+ with gr.Row():
450
+ text_input = gr.Textbox(label="Input Text", lines=5, placeholder="Enter clinical text or notes...")
451
+ file_input = gr.File(
452
+ label="Upload File (txt/csv/xls/xlsx/pdf)",
453
+ file_types=[".txt", ".csv", ".xls", ".xlsx", ".pdf"]
454
+ )
455
+
456
  action = gr.Radio(
457
  [
458
  "Summarize",
459
+ "Predict Outcome",
460
  "Generate Report",
461
+ "Translate",
462
+ "Perform Named Entity Recognition",
463
+ "Fetch Clinical Studies",
464
+ "Fetch PubMed Articles (Legacy)",
465
+ "Fetch PubMed by Query",
466
+ "Fetch Crossref by Query",
467
+ "Fetch BioPortal by Query",
468
  ],
469
  label="Select an Action",
470
  )
471
+ translation_option = gr.Dropdown(
472
+ choices=list(LANGUAGE_MAP.keys()),
473
+ label="Translation Option",
474
+ value="English to French"
475
+ )
476
+ query_params_input = gr.Textbox(
477
+ label="Query Params (JSON)",
478
+ placeholder='{"term": "cancer"} or {"q": "cancer"} for BioPortal'
479
+ )
480
+ nct_id_input = gr.Textbox(label="NCT ID")
481
+ report_filename_input = gr.Textbox(label="Report Filename", value="clinical_report.pdf")
482
+ export_format = gr.Dropdown(choices=["None", "CSV", "JSON"], label="Export Format")
483
+
484
+ # Outputs
485
  output_text = gr.Textbox(label="Output", lines=8)
486
+ with gr.Row():
487
+ output_chart = gr.Plot(label="Chart 1")
488
+ output_chart2 = gr.Plot(label="Chart 2")
489
  output_file = gr.File(label="Generated File")
490
+
491
  submit_btn = gr.Button("Submit")
492
 
493
+ ################################################################
494
+ # 9) MAIN ACTION HANDLER (ASYNC) #
495
+ ################################################################
496
+ import traceback
497
+
498
  async def handle_action(
499
  action: str,
500
  txt: str,
501
+ file_up: gr.File,
502
+ translation_opt: str,
503
+ query_str: str,
504
+ nct_id: str,
505
+ report_fn: str,
506
+ exp_fmt: str
507
+ ) -> Tuple[Optional[str], Optional[Any], Optional[Any], Optional[str]]:
508
+ """
509
+ Master function to handle user actions.
510
+ Returns a 4-tuple mapped to (output_text, output_chart, output_chart2, output_file).
511
+ """
512
  try:
513
  combined_text = txt.strip()
514
+
515
+ # 1) If user uploaded a file, parse minimal text from .txt/.pdf here
516
+ if file_up is not None:
517
+ ext = os.path.splitext(file_up.name)[1].lower()
518
+ if ext == ".txt":
519
+ try:
520
+ txt_data = parse_text_file_as_str(file_up)
521
+ combined_text += "\n" + txt_data
522
+ except Exception as e:
523
+ return f"TXT parse error: {e}", None, None, None
524
+ elif ext == ".pdf":
525
+ try:
526
+ pdf_data = parse_pdf_file_as_str(file_up)
527
+ combined_text += "\n" + pdf_data
528
+ except Exception as e:
529
+ return f"PDF parse error: {e}", None, None, None
530
+ # CSV and Excel are parsed *within* certain actions (e.g. Summarize)
531
+
532
+ # 2) Branch by action
533
  if action == "Summarize":
534
+ if file_up:
535
+ fx = file_up.name.lower()
536
+ if fx.endswith(".csv"):
537
+ try:
538
+ df_csv = parse_csv_file_to_df(file_up)
539
+ combined_text += "\n" + df_csv.to_csv(index=False)
540
+ except Exception as e:
541
+ return f"CSV parse error (Summarize): {e}", None, None, None
542
+ elif fx.endswith((".xls", ".xlsx")):
543
+ try:
544
+ df_xl = parse_excel_file_to_df(file_up)
545
+ combined_text += "\n" + df_xl.to_csv(index=False)
546
+ except Exception as e:
547
+ return f"Excel parse error (Summarize): {e}", None, None, None
548
 
549
+ summary = summarize_text(combined_text)
550
+ return summary, None, None, None
551
+
552
+ elif action == "Predict Outcome":
553
+ if file_up:
554
+ fx = file_up.name.lower()
555
+ if fx.endswith(".csv"):
556
+ try:
557
+ df_csv = parse_csv_file_to_df(file_up)
558
+ combined_text += "\n" + df_csv.to_csv(index=False)
559
+ except Exception as e:
560
+ return f"CSV parse error (Predict): {e}", None, None, None
561
+ elif fx.endswith((".xls", ".xlsx")):
562
+ try:
563
+ df_xl = parse_excel_file_to_df(file_up)
564
+ combined_text += "\n" + df_xl.to_csv(index=False)
565
+ except Exception as e:
566
+ return f"Excel parse error (Predict): {e}", None, None, None
567
+
568
+ preds = predict_outcome(combined_text)
569
+ if isinstance(preds, dict):
570
+ chart = visualize_predictions(preds)
571
+ return json.dumps(preds, indent=2), chart, None, None
572
+ return preds, None, None, None
573
+
574
  elif action == "Generate Report":
575
+ if file_up:
576
+ fx = file_up.name.lower()
577
+ if fx.endswith(".csv"):
578
+ try:
579
+ df_csv = parse_csv_file_to_df(file_up)
580
+ combined_text += "\n" + df_csv.to_csv(index=False)
581
+ except Exception as e:
582
+ return f"CSV parse error (Report): {e}", None, None, None
583
+ elif fx.endswith((".xls", ".xlsx")):
584
+ try:
585
+ df_xl = parse_excel_file_to_df(file_up)
586
+ combined_text += "\n" + df_xl.to_csv(index=False)
587
+ except Exception as e:
588
+ return f"Excel parse error (Report): {e}", None, None, None
589
+
590
  path = generate_report(combined_text, report_fn)
591
  msg = f"Report generated: {path}" if path else "Report generation failed."
592
+ return msg, None, None, path
593
+
594
+ elif action == "Translate":
595
+ if file_up:
596
+ fx = file_up.name.lower()
597
+ if fx.endswith(".csv"):
598
+ try:
599
+ df_csv = parse_csv_file_to_df(file_up)
600
+ combined_text += "\n" + df_csv.to_csv(index=False)
601
+ except Exception as e:
602
+ return f"CSV parse error (Translate): {e}", None, None, None
603
+ elif fx.endswith((".xls", ".xlsx")):
604
+ try:
605
+ df_xl = parse_excel_file_to_df(file_up)
606
+ combined_text += "\n" + df_xl.to_csv(index=False)
607
+ except Exception as e:
608
+ return f"Excel parse error (Translate): {e}", None, None, None
609
+
610
+ translated = translate_text(combined_text, translation_opt)
611
+ return translated, None, None, None
612
+
613
+ elif action == "Perform Named Entity Recognition":
614
+ if file_up:
615
+ fx = file_up.name.lower()
616
+ if fx.endswith(".csv"):
617
+ try:
618
+ df_csv = parse_csv_file_to_df(file_up)
619
+ combined_text += "\n" + df_csv.to_csv(index=False)
620
+ except Exception as e:
621
+ return f"CSV parse error (NER): {e}", None, None, None
622
+ elif fx.endswith((".xls", ".xlsx")):
623
+ try:
624
+ df_xl = parse_excel_file_to_df(file_up)
625
+ combined_text += "\n" + df_xl.to_csv(index=False)
626
+ except Exception as e:
627
+ return f"Excel parse error (NER): {e}", None, None, None
628
+
629
+ ner_result = perform_named_entity_recognition(combined_text)
630
+ return ner_result, None, None, None
631
+
632
+ elif action == "Fetch Clinical Studies":
633
+ if nct_id:
634
+ result = await fetch_articles_by_nct_id(nct_id)
635
+ elif query_str:
636
+ result = await fetch_articles_by_query(query_str)
637
+ else:
638
+ return "Provide either an NCT ID or valid query parameters.", None, None, None
639
+
640
+ articles = result.get("resultList", {}).get("result", [])
641
+ if not articles:
642
+ return "No articles found.", None, None, None
643
+
644
+ formatted = "\n\n".join(
645
+ f"Title: {a.get('title')}\nJournal: {a.get('journalTitle')} ({a.get('pubYear')})"
646
+ for a in articles
647
+ )
648
+ return formatted, None, None, None
649
+
650
+ elif action in ["Fetch PubMed Articles (Legacy)", "Fetch PubMed by Query"]:
651
+ pubmed_result = await fetch_pubmed_by_query(query_str)
652
+ xml_data = pubmed_result.get("result")
653
+ if xml_data:
654
+ articles = parse_pubmed_xml(xml_data)
655
+ if not articles:
656
+ return "No articles found.", None, None, None
657
+ formatted = "\n\n".join(
658
+ f"{a['Title']} - {a['Journal']} ({a['PublicationDate']})"
659
+ for a in articles if a['Title']
660
+ )
661
+ return formatted if formatted else "No articles found.", None, None, None
662
+ return "No articles found or error in fetching PubMed data.", None, None, None
663
+
664
+ elif action == "Fetch Crossref by Query":
665
+ crossref_result = await fetch_crossref_by_query(query_str)
666
+ items = crossref_result.get("message", {}).get("items", [])
667
+ if not items:
668
+ return "No results found.", None, None, None
669
+ crossref_formatted = "\n\n".join(
670
+ f"Title: {it.get('title', ['No title'])[0]}, DOI: {it.get('DOI')}"
671
+ for it in items
672
+ )
673
+ return crossref_formatted, None, None, None
674
+
675
+ elif action == "Fetch BioPortal by Query":
676
+ bp_result = await fetch_bioportal_by_query(query_str)
677
+ collection = bp_result.get("collection", [])
678
+ if not collection:
679
+ return "No BioPortal results found.", None, None, None
680
+ # Format listing
681
+ formatted = "\n\n".join(
682
+ f"Label: {col.get('prefLabel')}, ID: {col.get('@id')}"
683
+ for col in collection
684
+ )
685
+ return formatted, None, None, None
686
+
687
+ # Fallback
688
+ return "Invalid action.", None, None, None
689
+
690
+ except Exception as ex:
691
+ # Catch all exceptions, log, and return traceback to 'output_text'
692
+ tb_str = traceback.format_exc()
693
+ logger.error(f"Exception in handle_action:\n{tb_str}")
694
+ return f"Traceback:\n{tb_str}", None, None, None
695
 
696
  submit_btn.click(
697
  fn=handle_action,
698
+ inputs=[action, text_input, file_input, translation_option, query_params_input, nct_id_input, report_filename_input, export_format],
699
+ outputs=[output_text, output_chart, output_chart2, output_file],
700
  )
701
 
702
  # Launch the Gradio interface