mgbam commited on
Commit
305d993
·
verified ·
1 Parent(s): 520f2f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +301 -312
app.py CHANGED
@@ -26,9 +26,11 @@ import spacy
26
  import spacy.cli
27
  import PyPDF2
28
 
29
- # =========================
30
- # 1) SpaCy Model Download
31
- # =========================
 
 
32
  try:
33
  nlp = spacy.load("en_core_web_sm")
34
  except OSError:
@@ -36,46 +38,38 @@ except OSError:
36
  spacy.cli.download("en_core_web_sm")
37
  nlp = spacy.load("en_core_web_sm")
38
 
39
- # =========================
40
- # 2) Logging Setup
41
- # =========================
42
  logger.add("error_logs.log", rotation="1 MB", level="ERROR")
43
 
44
- # =========================
45
- # 3) Environment Vars
46
- # =========================
47
  load_dotenv()
48
  HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
49
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
50
- BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY") # <--- NEW for BioPortal
51
  ENTREZ_EMAIL = os.getenv("ENTREZ_EMAIL")
52
 
53
  if not HUGGINGFACE_TOKEN or not OPENAI_API_KEY:
54
  logger.error("Missing Hugging Face or OpenAI credentials.")
55
  raise ValueError("Missing credentials for Hugging Face or OpenAI.")
56
 
 
57
  if not BIOPORTAL_API_KEY:
58
- logger.warning("No BioPortal API Key found. BioPortal queries may fail.")
59
 
60
- # =========================
61
- # 4) Hugging Face Login
62
- # =========================
63
  login(HUGGINGFACE_TOKEN)
64
 
65
- # =========================
66
- # 5) OpenAI Client
67
- # =========================
68
  client = OpenAI(api_key=OPENAI_API_KEY)
69
 
70
- # =========================
71
- # 6) Device (CPU/GPU)
72
- # =========================
73
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
74
  logger.info(f"Using device: {device}")
75
 
76
- # =========================
77
- # 7) Models Setup
78
- # =========================
 
79
  MODEL_NAME = "mgbam/bert-base-finetuned-mgbam"
80
  try:
81
  model = AutoModelForSequenceClassification.from_pretrained(
@@ -100,26 +94,28 @@ except Exception as e:
100
  logger.error(f"Translation model load error: {e}")
101
  raise
102
 
 
103
  LANGUAGE_MAP: Dict[str, Tuple[str, str]] = {
104
  "English to French": ("en", "fr"),
105
  "French to English": ("fr", "en"),
106
  }
107
 
108
- # =========================
109
- # 8) API Endpoints
110
- # =========================
 
111
  PUBMED_SEARCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
112
  PUBMED_FETCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
113
  EUROPE_PMC_BASE_URL = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
114
  BIOPORTAL_API_BASE = "https://data.bioontology.org"
115
  CROSSREF_API_URL = "https://api.crossref.org/works"
116
 
117
- ##########################################################
118
- # HELPER FUNCTIONS #
119
- ##########################################################
120
 
121
  def safe_json_parse(text: str) -> Union[Dict[str, Any], None]:
122
- """Parse JSON string into Python dictionary safely."""
123
  try:
124
  return json.loads(text)
125
  except json.JSONDecodeError as e:
@@ -127,7 +123,7 @@ def safe_json_parse(text: str) -> Union[Dict[str, Any], None]:
127
  return None
128
 
129
  def parse_pubmed_xml(xml_data: str) -> List[Dict[str, Any]]:
130
- """Parse PubMed XML into structured articles."""
131
  root = ET.fromstring(xml_data)
132
  articles = []
133
  for article in root.findall(".//PubmedArticle"):
@@ -154,40 +150,38 @@ def parse_pubmed_xml(xml_data: str) -> List[Dict[str, Any]]:
154
  })
155
  return articles
156
 
157
- ##########################################################
158
- # ASYNC FETCH FUNCTIONS #
159
- ##########################################################
160
 
161
  async def fetch_articles_by_nct_id(nct_id: str) -> Dict[str, Any]:
162
- """Europe PMC by NCT ID."""
163
  params = {"query": nct_id, "format": "json"}
164
  async with httpx.AsyncClient() as client_http:
165
  try:
166
- response = await client_http.get(EUROPE_PMC_BASE_URL, params=params)
167
- response.raise_for_status()
168
- return response.json()
169
  except Exception as e:
170
  logger.error(f"Error fetching articles for {nct_id}: {e}")
171
  return {"error": str(e)}
172
 
173
  async def fetch_articles_by_query(query_params: str) -> Dict[str, Any]:
174
- """Europe PMC by JSON query."""
175
  parsed_params = safe_json_parse(query_params)
176
  if not parsed_params or not isinstance(parsed_params, dict):
177
  return {"error": "Invalid JSON."}
178
  query_string = " AND ".join(f"{k}:{v}" for k, v in parsed_params.items())
179
- params = {"query": query_string, "format": "json"}
180
  async with httpx.AsyncClient() as client_http:
181
  try:
182
- response = await client_http.get(EUROPE_PMC_BASE_URL, params=params)
183
- response.raise_for_status()
184
- return response.json()
185
  except Exception as e:
186
  logger.error(f"Error fetching articles: {e}")
187
  return {"error": str(e)}
188
 
189
  async def fetch_pubmed_by_query(query_params: str) -> Dict[str, Any]:
190
- """PubMed by JSON query."""
191
  parsed_params = safe_json_parse(query_params)
192
  if not parsed_params or not isinstance(parsed_params, dict):
193
  return {"error": "Invalid JSON for PubMed."}
@@ -199,18 +193,17 @@ async def fetch_pubmed_by_query(query_params: str) -> Dict[str, Any]:
199
  "retmax": parsed_params.get("retmax", "10"),
200
  "term": parsed_params.get("term", ""),
201
  }
202
-
203
  async with httpx.AsyncClient() as client_http:
204
  try:
205
- # 1) search
206
  search_resp = await client_http.get(PUBMED_SEARCH_URL, params=search_params)
207
  search_resp.raise_for_status()
208
- search_data = search_resp.json()
209
- id_list = search_data.get("esearchresult", {}).get("idlist", [])
210
  if not id_list:
211
  return {"result": ""}
212
 
213
- # 2) fetch
214
  fetch_params = {
215
  "db": "pubmed",
216
  "id": ",".join(id_list),
@@ -225,33 +218,26 @@ async def fetch_pubmed_by_query(query_params: str) -> Dict[str, Any]:
225
  return {"error": str(e)}
226
 
227
  async def fetch_crossref_by_query(query_params: str) -> Dict[str, Any]:
228
- """Crossref by JSON query."""
229
  parsed_params = safe_json_parse(query_params)
230
  if not parsed_params or not isinstance(parsed_params, dict):
231
  return {"error": "Invalid JSON for Crossref."}
232
-
233
  async with httpx.AsyncClient() as client_http:
234
  try:
235
- response = await client_http.get(CROSSREF_API_URL, params=parsed_params)
236
- response.raise_for_status()
237
- return response.json()
238
  except Exception as e:
239
  logger.error(f"Error fetching Crossref data: {e}")
240
  return {"error": str(e)}
241
 
242
- ##########################################################
243
- # BIOPORTAL INTEGRATION #
244
- ##########################################################
245
-
246
  async def fetch_bioportal_by_query(query_params: str) -> Dict[str, Any]:
247
  """
248
- Fetch from BioPortal using JSON query parameters.
249
- Expects something like: {"q": "cancer"}
250
  See: https://data.bioontology.org/documentation
251
  """
252
  if not BIOPORTAL_API_KEY:
253
- return {"error": "No BioPortal API Key set. Cannot fetch BioPortal data."}
254
-
255
  parsed_params = safe_json_parse(query_params)
256
  if not parsed_params or not isinstance(parsed_params, dict):
257
  return {"error": "Invalid JSON for BioPortal."}
@@ -273,26 +259,28 @@ async def fetch_bioportal_by_query(query_params: str) -> Dict[str, Any]:
273
  logger.error(f"Error fetching BioPortal data: {e}")
274
  return {"error": str(e)}
275
 
276
- ##########################################################
277
- # CORE LOGIC #
278
- ##########################################################
279
 
280
  def summarize_text(text: str) -> str:
 
281
  if not text.strip():
282
  return "No text provided for summarization."
283
  try:
284
  response = client.chat.completions.create(
285
  model="gpt-3.5-turbo",
286
- messages=[{"role": "user", "content": f"Summarize the following clinical data:\n{text}"}],
287
  max_tokens=200,
288
  temperature=0.7,
289
  )
290
  return response.choices[0].message.content.strip()
291
  except Exception as e:
292
- logger.error(f"Summarization Error: {e}")
293
  return "Summarization failed."
294
 
295
  def predict_outcome(text: str) -> Union[Dict[str, float], str]:
 
296
  if not text.strip():
297
  return "No text provided for prediction."
298
  try:
@@ -303,10 +291,11 @@ def predict_outcome(text: str) -> Union[Dict[str, float], str]:
303
  probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)[0]
304
  return {f"Label {i+1}": float(prob.item()) for i, prob in enumerate(probabilities)}
305
  except Exception as e:
306
- logger.error(f"Prediction Error: {e}")
307
  return "Prediction failed."
308
 
309
  def generate_report(text: str, filename: str = "clinical_report.pdf") -> Optional[str]:
 
310
  try:
311
  if not text.strip():
312
  logger.warning("No text provided for the report.")
@@ -324,28 +313,26 @@ def generate_report(text: str, filename: str = "clinical_report.pdf") -> Optiona
324
  logger.info(f"Report generated: {filename}")
325
  return filename
326
  except Exception as e:
327
- logger.error(f"Report Generation Error: {e}")
328
  return None
329
 
330
- def visualize_predictions(predictions: Dict[str, float]) -> Optional[alt.Chart]:
331
- try:
332
- data = pd.DataFrame(list(predictions.items()), columns=["Label", "Probability"])
333
- chart = (
334
- alt.Chart(data)
335
- .mark_bar()
336
- .encode(
337
- x=alt.X("Label:N", sort=None),
338
- y="Probability:Q",
339
- tooltip=["Label", "Probability"],
340
- )
341
- .properties(title="Prediction Probabilities", width=500, height=300)
342
  )
343
- return chart
344
- except Exception as e:
345
- logger.error(f"Visualization Error: {e}")
346
- return None
347
 
348
  def translate_text(text: str, translation_option: str) -> str:
 
349
  if not text.strip():
350
  return "No text provided for translation."
351
  try:
@@ -355,10 +342,11 @@ def translate_text(text: str, translation_option: str) -> str:
355
  translated_tokens = translation_model.generate(**inputs)
356
  return translation_tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
357
  except Exception as e:
358
- logger.error(f"Translation Error: {e}")
359
  return "Translation failed."
360
 
361
  def perform_named_entity_recognition(text: str) -> str:
 
362
  if not text.strip():
363
  return "No text provided for NER."
364
  try:
@@ -366,115 +354,100 @@ def perform_named_entity_recognition(text: str) -> str:
366
  entities = [(ent.text, ent.label_) for ent in doc.ents]
367
  if not entities:
368
  return "No named entities found."
369
- return "\n".join(f"{ent_text} -> {ent_label}" for ent_text, ent_label in entities)
370
  except Exception as e:
371
- logger.error(f"NER Error: {e}")
372
- return "Named Entity Recognition failed."
373
 
374
- ##########################################################
375
- # FILE PARSING (TXT, PDF, CSV, EXCEL) #
376
- ##########################################################
377
 
378
  def parse_pdf_file_as_str(file_up: gr.File) -> str:
379
- """Read PDF pages with PyPDF2 (local path or in-memory)."""
380
  pdf_path = file_up.name
381
  if os.path.isfile(pdf_path):
382
  with open(pdf_path, "rb") as f:
383
  reader = PyPDF2.PdfReader(f)
384
- text_content = []
385
- for page in reader.pages:
386
- text_content.append(page.extract_text() or "")
387
- return "\n".join(text_content)
388
  else:
389
  if not hasattr(file_up, "file"):
390
- raise ValueError("Gradio file object has no .file attribute (PDF).")
391
- try:
392
- pdf_bytes = file_up.file.read()
393
- reader = PyPDF2.PdfReader(io.BytesIO(pdf_bytes))
394
- text_content = []
395
- for page in reader.pages:
396
- text_content.append(page.extract_text() or "")
397
- return "\n".join(text_content)
398
- except Exception as e:
399
- raise ValueError(f"PDF parse error: {e}")
400
 
401
  def parse_text_file_as_str(file_up: gr.File) -> str:
402
- """Read .txt as UTF-8 from path or in-memory."""
403
  path = file_up.name
404
  if os.path.isfile(path):
405
  with open(path, "rb") as f:
406
  return f.read().decode("utf-8", errors="replace")
407
  else:
408
  if not hasattr(file_up, "file"):
409
- raise ValueError("Gradio file object has no .file attribute (TXT).")
410
- raw_bytes = file_up.file.read()
411
- return raw_bytes.decode("utf-8", errors="replace")
412
 
413
  def parse_csv_file_to_df(file_up: gr.File) -> pd.DataFrame:
414
  """
415
- Safely parse CSV with multiple encodings.
416
- 1) Local file path or fallback .file
417
- 2) Encodings: ["utf-8", "utf-8-sig", "latin1", "ISO-8859-1"]
418
  """
419
  path = file_up.name
420
- # local path
421
  if os.path.isfile(path):
422
  for enc in ["utf-8", "utf-8-sig", "latin1", "ISO-8859-1"]:
423
  try:
424
  return pd.read_csv(path, encoding=enc)
425
  except UnicodeDecodeError:
426
- logger.warning(f"CSV parse failed with {enc}, trying next...")
427
  except Exception as e:
428
- logger.warning(f"Other CSV parse error with {enc}: {e}")
429
- raise ValueError("Could not parse CSV from local path with known encodings.")
430
  else:
431
  if not hasattr(file_up, "file"):
432
- raise ValueError("Gradio file object has no .file attribute (CSV).")
433
  raw_bytes = file_up.file.read()
434
  for enc in ["utf-8", "utf-8-sig", "latin1", "ISO-8859-1"]:
435
  try:
436
- txt_decoded = raw_bytes.decode(enc, errors="replace")
437
  from io import StringIO
438
- return pd.read_csv(StringIO(txt_decoded))
439
  except UnicodeDecodeError:
440
- logger.warning(f"In-memory CSV parse failed with {enc}, trying next...")
441
  except Exception as e:
442
- logger.warning(f"In-memory CSV parse error with {enc}: {e}")
443
- raise ValueError("Could not parse CSV from memory with known encodings.")
444
 
445
  def parse_excel_file_to_df(file_up: gr.File) -> pd.DataFrame:
446
- """Read Excel (.xls/.xlsx) from path or in-memory."""
447
- excel_path = file_up.name
448
- if os.path.isfile(excel_path):
449
- return pd.read_excel(excel_path, engine="openpyxl")
450
  else:
451
  if not hasattr(file_up, "file"):
452
- raise ValueError("Gradio file object has no .file attribute (Excel).")
453
- try:
454
- excel_bytes = file_up.file.read()
455
- return pd.read_excel(io.BytesIO(excel_bytes), engine="openpyxl")
456
- except Exception as e:
457
- raise ValueError(f"Excel parse error: {e}")
458
 
459
- ##########################################################
460
- # GRADIO APP SETUP #
461
- ##########################################################
462
 
463
  with gr.Blocks() as demo:
464
- gr.Markdown("# 🩺 Clinical Research Assistant (No EDA) + BioPortal")
465
  gr.Markdown("""
466
- - **Summarize** text (GPT-3.5)
467
- - **Predict** outcomes (fine-tuned model)
 
468
  - **Translate** (English ↔ French)
469
  - **Named Entity Recognition** (spaCy)
470
- - **Fetch** from PubMed, Crossref, Europe PMC
471
- - **Fetch** from BioPortal (NEW)
472
- - **Generate** PDF reports
473
- - (EDA Removed)
474
  """)
475
 
476
  with gr.Row():
477
- text_input = gr.Textbox(label="Input Text", lines=5)
478
  file_input = gr.File(
479
  label="Upload File (txt/csv/xls/xlsx/pdf)",
480
  file_types=[".txt", ".csv", ".xls", ".xlsx", ".pdf"]
@@ -491,20 +464,24 @@ with gr.Blocks() as demo:
491
  "Fetch PubMed Articles (Legacy)",
492
  "Fetch PubMed by Query",
493
  "Fetch Crossref by Query",
494
- "Fetch BioPortal by Query", # <-- NEW ACTION
495
  ],
496
  label="Select an Action",
497
  )
498
  translation_option = gr.Dropdown(
499
- choices=list(LANGUAGE_MAP.keys()),
500
- label="Translation Option",
501
  value="English to French"
502
  )
503
- query_params_input = gr.Textbox(label="Query Params (JSON)", placeholder='{"term": "cancer"}')
 
 
 
504
  nct_id_input = gr.Textbox(label="NCT ID")
505
  report_filename_input = gr.Textbox(label="Report Filename", value="clinical_report.pdf")
506
  export_format = gr.Dropdown(choices=["None", "CSV", "JSON"], label="Export Format")
507
 
 
508
  output_text = gr.Textbox(label="Output", lines=8)
509
  with gr.Row():
510
  output_chart = gr.Plot(label="Chart 1")
@@ -514,8 +491,10 @@ with gr.Blocks() as demo:
514
  submit_btn = gr.Button("Submit")
515
 
516
  ################################################################
517
- # MAIN ACTION HANDLER #
518
  ################################################################
 
 
519
  async def handle_action(
520
  action: str,
521
  txt: str,
@@ -526,189 +505,199 @@ with gr.Blocks() as demo:
526
  report_fn: str,
527
  exp_fmt: str
528
  ) -> Tuple[Optional[str], Optional[Any], Optional[Any], Optional[str]]:
529
-
530
- combined_text = txt.strip()
531
-
532
- # 1) If user uploaded a file, parse basic text from .txt or .pdf
533
- if file_up is not None:
534
- file_ext = os.path.splitext(file_up.name)[1].lower()
535
- try:
536
- if file_ext == ".txt":
537
- text_content = parse_text_file_as_str(file_up)
538
- combined_text += "\n" + text_content
539
- elif file_ext == ".pdf":
540
- pdf_text = parse_pdf_file_as_str(file_up)
541
- combined_text += "\n" + pdf_text
542
- # CSV/Excel might be parsed in the actions below if needed
543
- except Exception as e:
544
- return f"File parse error: {e}", None, None, None
545
-
546
- # 2) Action dispatch
547
- if action == "Summarize":
548
- # If CSV or Excel is uploaded, parse DataFrame -> text
549
- if file_up:
550
- fx = file_up.name.lower()
551
- if fx.endswith(".csv"):
552
- try:
553
- df_csv = parse_csv_file_to_df(file_up)
554
- combined_text += "\n" + df_csv.to_csv(index=False)
555
- except Exception as e:
556
- return f"CSV parse error (Summarize): {e}", None, None, None
557
- elif fx.endswith((".xls", ".xlsx")):
558
- try:
559
- df_xl = parse_excel_file_to_df(file_up)
560
- combined_text += "\n" + df_xl.to_csv(index=False)
561
- except Exception as e:
562
- return f"Excel parse error (Summarize): {e}", None, None, None
563
 
564
- summary = summarize_text(combined_text)
565
- return summary, None, None, None
566
-
567
- elif action == "Predict Outcome":
568
- if file_up:
569
- fx = file_up.name.lower()
570
- if fx.endswith(".csv"):
571
  try:
572
- df_csv = parse_csv_file_to_df(file_up)
573
- combined_text += "\n" + df_csv.to_csv(index=False)
574
  except Exception as e:
575
- return f"CSV parse error (Predict): {e}", None, None, None
576
- elif fx.endswith((".xls", ".xlsx")):
577
  try:
578
- df_xl = parse_excel_file_to_df(file_up)
579
- combined_text += "\n" + df_xl.to_csv(index=False)
580
  except Exception as e:
581
- return f"Excel parse error (Predict): {e}", None, None, None
 
582
 
583
- predictions = predict_outcome(combined_text)
584
- if isinstance(predictions, dict):
585
- chart = visualize_predictions(predictions)
586
- return json.dumps(predictions, indent=2), chart, None, None
587
- return predictions, None, None, None
588
-
589
- elif action == "Generate Report":
590
- # Merge CSV/Excel if user wants them in the PDF
591
- if file_up:
592
- fx = file_up.name.lower()
593
- if fx.endswith(".csv"):
594
- try:
595
- df_csv = parse_csv_file_to_df(file_up)
596
- combined_text += "\n" + df_csv.to_csv(index=False)
597
- except Exception as e:
598
- return f"CSV parse error (Report): {e}", None, None, None
599
- elif fx.endswith((".xls", ".xlsx")):
600
- try:
601
- df_xl = parse_excel_file_to_df(file_up)
602
- combined_text += "\n" + df_xl.to_csv(index=False)
603
- except Exception as e:
604
- return f"Excel parse error (Report): {e}", None, None, None
605
 
606
- fp = generate_report(combined_text, report_fn)
607
- msg = f"Report generated: {fp}" if fp else "Report generation failed."
608
- return msg, None, None, fp
609
-
610
- elif action == "Translate":
611
- if file_up:
612
- fx = file_up.name.lower()
613
- if fx.endswith(".csv"):
614
- try:
615
- df_csv = parse_csv_file_to_df(file_up)
616
- combined_text += "\n" + df_csv.to_csv(index=False)
617
- except Exception as e:
618
- return f"CSV parse error (Translate): {e}", None, None, None
619
- elif fx.endswith((".xls", ".xlsx")):
620
- try:
621
- df_xl = parse_excel_file_to_df(file_up)
622
- combined_text += "\n" + df_xl.to_csv(index=False)
623
- except Exception as e:
624
- return f"Excel parse error (Translate): {e}", None, None, None
 
 
625
 
626
- translated = translate_text(combined_text, translation_opt)
627
- return translated, None, None, None
628
-
629
- elif action == "Perform Named Entity Recognition":
630
- if file_up:
631
- fx = file_up.name.lower()
632
- if fx.endswith(".csv"):
633
- try:
634
- df_csv = parse_csv_file_to_df(file_up)
635
- combined_text += "\n" + df_csv.to_csv(index=False)
636
- except Exception as e:
637
- return f"CSV parse error (NER): {e}", None, None, None
638
- elif fx.endswith((".xls", ".xlsx")):
639
- try:
640
- df_xl = parse_excel_file_to_df(file_up)
641
- combined_text += "\n" + df_xl.to_csv(index=False)
642
- except Exception as e:
643
- return f"Excel parse error (NER): {e}", None, None, None
 
644
 
645
- ner_result = perform_named_entity_recognition(combined_text)
646
- return ner_result, None, None, None
647
-
648
- elif action == "Fetch Clinical Studies":
649
- if nct_id:
650
- result = await fetch_articles_by_nct_id(nct_id)
651
- elif query_str:
652
- result = await fetch_articles_by_query(query_str)
653
- else:
654
- return "Provide either an NCT ID or valid query parameters.", None, None, None
 
 
 
 
 
 
 
 
655
 
656
- articles = result.get("resultList", {}).get("result", [])
657
- if not articles:
658
- return "No articles found.", None, None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
659
 
660
- formatted_results = "\n\n".join(
661
- f"Title: {a.get('title')}\nJournal: {a.get('journalTitle')} ({a.get('pubYear')})"
662
- for a in articles
663
- )
664
- return formatted_results, None, None, None
665
-
666
- elif action in ["Fetch PubMed Articles (Legacy)", "Fetch PubMed by Query"]:
667
- pubmed_result = await fetch_pubmed_by_query(query_str)
668
- xml_data = pubmed_result.get("result")
669
- if xml_data:
670
- articles = parse_pubmed_xml(xml_data)
671
  if not articles:
672
  return "No articles found.", None, None, None
 
673
  formatted = "\n\n".join(
674
- f"{a['Title']} - {a['Journal']} ({a['PublicationDate']})"
675
- for a in articles if a['Title']
676
  )
677
- return formatted if formatted else "No articles found.", None, None, None
678
- return "No articles found or error fetching data.", None, None, None
679
-
680
- elif action == "Fetch Crossref by Query":
681
- crossref_result = await fetch_crossref_by_query(query_str)
682
- items = crossref_result.get("message", {}).get("items", [])
683
- if not items:
684
- return "No results found.", None, None, None
685
- formatted = "\n\n".join(
686
- f"Title: {item.get('title', ['No title'])[0]}, DOI: {item.get('DOI')}"
687
- for item in items
688
- )
689
- return formatted, None, None, None
690
-
691
- elif action == "Fetch BioPortal by Query":
692
- bioportal_result = await fetch_bioportal_by_query(query_str)
693
- # Typically, the results are in "collection"
694
- # See: https://data.bioontology.org/documentation
695
- items = bioportal_result.get("collection", [])
696
- if not items:
697
- return "No BioPortal results found.", None, None, None
698
 
699
- # Format a quick listing
700
- formatted = "\n\n".join(
701
- f"Label: {item.get('prefLabel')}, ID: {item.get('@id')}"
702
- for item in items
703
- )
704
- return formatted, None, None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
705
 
706
- return "Invalid action.", None, None, None
707
-
 
 
 
 
708
  submit_btn.click(
709
  fn=handle_action,
710
  inputs=[action, text_input, file_input, translation_option, query_params_input, nct_id_input, report_filename_input, export_format],
711
  outputs=[output_text, output_chart, output_chart2, output_file],
712
  )
713
 
 
714
  demo.launch(server_name="0.0.0.0", server_port=7860, share=True)
 
26
  import spacy.cli
27
  import PyPDF2
28
 
29
+ ###############################################################################
30
+ # 1) ENVIRONMENT & LOGGING #
31
+ ###############################################################################
32
+
33
+ # Ensure spaCy model is downloaded (English Core Web)
34
  try:
35
  nlp = spacy.load("en_core_web_sm")
36
  except OSError:
 
38
  spacy.cli.download("en_core_web_sm")
39
  nlp = spacy.load("en_core_web_sm")
40
 
41
+ # Logging
 
 
42
  logger.add("error_logs.log", rotation="1 MB", level="ERROR")
43
 
44
+ # Load environment variables
 
 
45
  load_dotenv()
46
  HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
47
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
48
+ BIOPORTAL_API_KEY = os.getenv("BIOPORTAL_API_KEY") # For BioPortal integration
49
  ENTREZ_EMAIL = os.getenv("ENTREZ_EMAIL")
50
 
51
  if not HUGGINGFACE_TOKEN or not OPENAI_API_KEY:
52
  logger.error("Missing Hugging Face or OpenAI credentials.")
53
  raise ValueError("Missing credentials for Hugging Face or OpenAI.")
54
 
55
+ # Warn if BioPortal key is missing
56
  if not BIOPORTAL_API_KEY:
57
+ logger.warning("BIOPORTAL_API_KEY is not set. BioPortal fetch calls will fail.")
58
 
59
+ # Hugging Face login
 
 
60
  login(HUGGINGFACE_TOKEN)
61
 
62
+ # OpenAI
 
 
63
  client = OpenAI(api_key=OPENAI_API_KEY)
64
 
65
+ # Device: CPU or GPU
 
 
66
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
67
  logger.info(f"Using device: {device}")
68
 
69
+ ###############################################################################
70
+ # 2) HUGGING FACE & TRANSLATION MODEL SETUP #
71
+ ###############################################################################
72
+
73
  MODEL_NAME = "mgbam/bert-base-finetuned-mgbam"
74
  try:
75
  model = AutoModelForSequenceClassification.from_pretrained(
 
94
  logger.error(f"Translation model load error: {e}")
95
  raise
96
 
97
+ # Language map for translation
98
  LANGUAGE_MAP: Dict[str, Tuple[str, str]] = {
99
  "English to French": ("en", "fr"),
100
  "French to English": ("fr", "en"),
101
  }
102
 
103
+ ###############################################################################
104
+ # 3) API ENDPOINTS & CONSTANTS #
105
+ ###############################################################################
106
+
107
  PUBMED_SEARCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
108
  PUBMED_FETCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
109
  EUROPE_PMC_BASE_URL = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
110
  BIOPORTAL_API_BASE = "https://data.bioontology.org"
111
  CROSSREF_API_URL = "https://api.crossref.org/works"
112
 
113
+ ###############################################################################
114
+ # 4) HELPER FUNCTIONS #
115
+ ###############################################################################
116
 
117
  def safe_json_parse(text: str) -> Union[Dict[str, Any], None]:
118
+ """Safely parse JSON."""
119
  try:
120
  return json.loads(text)
121
  except json.JSONDecodeError as e:
 
123
  return None
124
 
125
  def parse_pubmed_xml(xml_data: str) -> List[Dict[str, Any]]:
126
+ """Parse PubMed XML data into a structured list of articles."""
127
  root = ET.fromstring(xml_data)
128
  articles = []
129
  for article in root.findall(".//PubmedArticle"):
 
150
  })
151
  return articles
152
 
153
+ ###############################################################################
154
+ # 5) ASYNC FETCH FUNCTIONS #
155
+ ###############################################################################
156
 
157
  async def fetch_articles_by_nct_id(nct_id: str) -> Dict[str, Any]:
 
158
  params = {"query": nct_id, "format": "json"}
159
  async with httpx.AsyncClient() as client_http:
160
  try:
161
+ resp = await client_http.get(EUROPE_PMC_BASE_URL, params=params)
162
+ resp.raise_for_status()
163
+ return resp.json()
164
  except Exception as e:
165
  logger.error(f"Error fetching articles for {nct_id}: {e}")
166
  return {"error": str(e)}
167
 
168
  async def fetch_articles_by_query(query_params: str) -> Dict[str, Any]:
169
+ """Europe PMC query via JSON input."""
170
  parsed_params = safe_json_parse(query_params)
171
  if not parsed_params or not isinstance(parsed_params, dict):
172
  return {"error": "Invalid JSON."}
173
  query_string = " AND ".join(f"{k}:{v}" for k, v in parsed_params.items())
174
+ req_params = {"query": query_string, "format": "json"}
175
  async with httpx.AsyncClient() as client_http:
176
  try:
177
+ resp = await client_http.get(EUROPE_PMC_BASE_URL, params=req_params)
178
+ resp.raise_for_status()
179
+ return resp.json()
180
  except Exception as e:
181
  logger.error(f"Error fetching articles: {e}")
182
  return {"error": str(e)}
183
 
184
  async def fetch_pubmed_by_query(query_params: str) -> Dict[str, Any]:
 
185
  parsed_params = safe_json_parse(query_params)
186
  if not parsed_params or not isinstance(parsed_params, dict):
187
  return {"error": "Invalid JSON for PubMed."}
 
193
  "retmax": parsed_params.get("retmax", "10"),
194
  "term": parsed_params.get("term", ""),
195
  }
 
196
  async with httpx.AsyncClient() as client_http:
197
  try:
198
+ # Search PubMed
199
  search_resp = await client_http.get(PUBMED_SEARCH_URL, params=search_params)
200
  search_resp.raise_for_status()
201
+ data = search_resp.json()
202
+ id_list = data.get("esearchresult", {}).get("idlist", [])
203
  if not id_list:
204
  return {"result": ""}
205
 
206
+ # Fetch PubMed
207
  fetch_params = {
208
  "db": "pubmed",
209
  "id": ",".join(id_list),
 
218
  return {"error": str(e)}
219
 
220
  async def fetch_crossref_by_query(query_params: str) -> Dict[str, Any]:
 
221
  parsed_params = safe_json_parse(query_params)
222
  if not parsed_params or not isinstance(parsed_params, dict):
223
  return {"error": "Invalid JSON for Crossref."}
 
224
  async with httpx.AsyncClient() as client_http:
225
  try:
226
+ resp = await client_http.get(CROSSREF_API_URL, params=parsed_params)
227
+ resp.raise_for_status()
228
+ return resp.json()
229
  except Exception as e:
230
  logger.error(f"Error fetching Crossref data: {e}")
231
  return {"error": str(e)}
232
 
 
 
 
 
233
  async def fetch_bioportal_by_query(query_params: str) -> Dict[str, Any]:
234
  """
235
+ BioPortal fetch for medical ontologies/terminologies.
236
+ Expects JSON like: {"q": "cancer"}
237
  See: https://data.bioontology.org/documentation
238
  """
239
  if not BIOPORTAL_API_KEY:
240
+ return {"error": "No BioPortal API Key set."}
 
241
  parsed_params = safe_json_parse(query_params)
242
  if not parsed_params or not isinstance(parsed_params, dict):
243
  return {"error": "Invalid JSON for BioPortal."}
 
259
  logger.error(f"Error fetching BioPortal data: {e}")
260
  return {"error": str(e)}
261
 
262
+ ###############################################################################
263
+ # 6) CORE FUNCTIONS #
264
+ ###############################################################################
265
 
266
  def summarize_text(text: str) -> str:
267
+ """OpenAI GPT-3.5 summarization."""
268
  if not text.strip():
269
  return "No text provided for summarization."
270
  try:
271
  response = client.chat.completions.create(
272
  model="gpt-3.5-turbo",
273
+ messages=[{"role": "user", "content": f"Summarize this clinical data:\n{text}"}],
274
  max_tokens=200,
275
  temperature=0.7,
276
  )
277
  return response.choices[0].message.content.strip()
278
  except Exception as e:
279
+ logger.error(f"Summarization error: {e}")
280
  return "Summarization failed."
281
 
282
  def predict_outcome(text: str) -> Union[Dict[str, float], str]:
283
+ """Predict outcomes (classification) using a fine-tuned BERT model."""
284
  if not text.strip():
285
  return "No text provided for prediction."
286
  try:
 
291
  probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)[0]
292
  return {f"Label {i+1}": float(prob.item()) for i, prob in enumerate(probabilities)}
293
  except Exception as e:
294
+ logger.error(f"Prediction error: {e}")
295
  return "Prediction failed."
296
 
297
  def generate_report(text: str, filename: str = "clinical_report.pdf") -> Optional[str]:
298
+ """Generate a professional PDF report from the text."""
299
  try:
300
  if not text.strip():
301
  logger.warning("No text provided for the report.")
 
313
  logger.info(f"Report generated: {filename}")
314
  return filename
315
  except Exception as e:
316
+ logger.error(f"Report generation error: {e}")
317
  return None
318
 
319
+ def visualize_predictions(predictions: Dict[str, float]) -> alt.Chart:
320
+ """Simple Altair bar chart to visualize classification probabilities."""
321
+ data = pd.DataFrame(list(predictions.items()), columns=["Label", "Probability"])
322
+ chart = (
323
+ alt.Chart(data)
324
+ .mark_bar()
325
+ .encode(
326
+ x=alt.X("Label:N", sort=None),
327
+ y="Probability:Q",
328
+ tooltip=["Label", "Probability"],
 
 
329
  )
330
+ .properties(title="Prediction Probabilities", width=500, height=300)
331
+ )
332
+ return chart
 
333
 
334
  def translate_text(text: str, translation_option: str) -> str:
335
+ """Translate text between English and French via MarianMT."""
336
  if not text.strip():
337
  return "No text provided for translation."
338
  try:
 
342
  translated_tokens = translation_model.generate(**inputs)
343
  return translation_tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
344
  except Exception as e:
345
+ logger.error(f"Translation error: {e}")
346
  return "Translation failed."
347
 
348
  def perform_named_entity_recognition(text: str) -> str:
349
+ """NER using spaCy (en_core_web_sm)."""
350
  if not text.strip():
351
  return "No text provided for NER."
352
  try:
 
354
  entities = [(ent.text, ent.label_) for ent in doc.ents]
355
  if not entities:
356
  return "No named entities found."
357
+ return "\n".join(f"{t} -> {lbl}" for t, lbl in entities)
358
  except Exception as e:
359
+ logger.error(f"NER error: {e}")
360
+ return "NER failed."
361
 
362
+ ###############################################################################
363
+ # 7) FILE PARSING (TXT, PDF, CSV, XLS) #
364
+ ###############################################################################
365
 
366
  def parse_pdf_file_as_str(file_up: gr.File) -> str:
367
+ """Read PDF via PyPDF2. Attempt local path, else read from memory."""
368
  pdf_path = file_up.name
369
  if os.path.isfile(pdf_path):
370
  with open(pdf_path, "rb") as f:
371
  reader = PyPDF2.PdfReader(f)
372
+ return "\n".join(page.extract_text() or "" for page in reader.pages)
 
 
 
373
  else:
374
  if not hasattr(file_up, "file"):
375
+ raise ValueError("No .file attribute found for PDF.")
376
+ pdf_bytes = file_up.file.read()
377
+ reader = PyPDF2.PdfReader(io.BytesIO(pdf_bytes))
378
+ return "\n".join(page.extract_text() or "" for page in reader.pages)
 
 
 
 
 
 
379
 
380
  def parse_text_file_as_str(file_up: gr.File) -> str:
381
+ """Read .txt from path or fallback to memory."""
382
  path = file_up.name
383
  if os.path.isfile(path):
384
  with open(path, "rb") as f:
385
  return f.read().decode("utf-8", errors="replace")
386
  else:
387
  if not hasattr(file_up, "file"):
388
+ raise ValueError("No .file attribute for TXT.")
389
+ return file_up.file.read().decode("utf-8", errors="replace")
 
390
 
391
  def parse_csv_file_to_df(file_up: gr.File) -> pd.DataFrame:
392
  """
393
+ Attempt multiple encodings for CSV: utf-8, utf-8-sig, latin1, ISO-8859-1.
 
 
394
  """
395
  path = file_up.name
 
396
  if os.path.isfile(path):
397
  for enc in ["utf-8", "utf-8-sig", "latin1", "ISO-8859-1"]:
398
  try:
399
  return pd.read_csv(path, encoding=enc)
400
  except UnicodeDecodeError:
401
+ logger.warning(f"CSV parse failed (enc={enc}). Trying next...")
402
  except Exception as e:
403
+ logger.warning(f"CSV parse error (enc={enc}): {e}")
404
+ raise ValueError("Could not parse local CSV with known encodings.")
405
  else:
406
  if not hasattr(file_up, "file"):
407
+ raise ValueError("No .file attribute for CSV.")
408
  raw_bytes = file_up.file.read()
409
  for enc in ["utf-8", "utf-8-sig", "latin1", "ISO-8859-1"]:
410
  try:
411
+ text_decoded = raw_bytes.decode(enc, errors="replace")
412
  from io import StringIO
413
+ return pd.read_csv(StringIO(text_decoded))
414
  except UnicodeDecodeError:
415
+ logger.warning(f"CSV in-memory parse failed (enc={enc}). Next...")
416
  except Exception as e:
417
+ logger.warning(f"In-memory CSV error (enc={enc}): {e}")
418
+ raise ValueError("Could not parse in-memory CSV with known encodings.")
419
 
420
  def parse_excel_file_to_df(file_up: gr.File) -> pd.DataFrame:
421
+ """Read Excel from local path or memory (openpyxl)."""
422
+ path = file_up.name
423
+ if os.path.isfile(path):
424
+ return pd.read_excel(path, engine="openpyxl")
425
  else:
426
  if not hasattr(file_up, "file"):
427
+ raise ValueError("No .file attribute for Excel.")
428
+ excel_bytes = file_up.file.read()
429
+ return pd.read_excel(io.BytesIO(excel_bytes), engine="openpyxl")
 
 
 
430
 
431
+ ###############################################################################
432
+ # 8) BUILDING THE GRADIO APP #
433
+ ###############################################################################
434
 
435
  with gr.Blocks() as demo:
436
+ gr.Markdown("# 🏥 AI-Driven Clinical Assistant (No EDA)")
437
  gr.Markdown("""
438
+ **Highlights**:
439
+ - **Summarize** clinical text (OpenAI GPT-3.5)
440
+ - **Predict** with a specialized BERT-based model
441
  - **Translate** (English ↔ French)
442
  - **Named Entity Recognition** (spaCy)
443
+ - **Fetch** from PubMed, Crossref, Europe PMC, and **BioPortal**
444
+ - **Generate** professional PDF reports
445
+
446
+ *Disclaimer*: This is a research demo, **not** a medical device.
447
  """)
448
 
449
  with gr.Row():
450
+ text_input = gr.Textbox(label="Input Text", lines=5, placeholder="Enter clinical text or notes...")
451
  file_input = gr.File(
452
  label="Upload File (txt/csv/xls/xlsx/pdf)",
453
  file_types=[".txt", ".csv", ".xls", ".xlsx", ".pdf"]
 
464
  "Fetch PubMed Articles (Legacy)",
465
  "Fetch PubMed by Query",
466
  "Fetch Crossref by Query",
467
+ "Fetch BioPortal by Query",
468
  ],
469
  label="Select an Action",
470
  )
471
  translation_option = gr.Dropdown(
472
+ choices=list(LANGUAGE_MAP.keys()),
473
+ label="Translation Option",
474
  value="English to French"
475
  )
476
+ query_params_input = gr.Textbox(
477
+ label="Query Params (JSON)",
478
+ placeholder='{"term": "cancer"} or {"q": "cancer"} for BioPortal'
479
+ )
480
  nct_id_input = gr.Textbox(label="NCT ID")
481
  report_filename_input = gr.Textbox(label="Report Filename", value="clinical_report.pdf")
482
  export_format = gr.Dropdown(choices=["None", "CSV", "JSON"], label="Export Format")
483
 
484
+ # Outputs
485
  output_text = gr.Textbox(label="Output", lines=8)
486
  with gr.Row():
487
  output_chart = gr.Plot(label="Chart 1")
 
491
  submit_btn = gr.Button("Submit")
492
 
493
  ################################################################
494
+ # 9) MAIN ACTION HANDLER (ASYNC) #
495
  ################################################################
496
+ import traceback
497
+
498
  async def handle_action(
499
  action: str,
500
  txt: str,
 
505
  report_fn: str,
506
  exp_fmt: str
507
  ) -> Tuple[Optional[str], Optional[Any], Optional[Any], Optional[str]]:
508
+ """
509
+ Master function to handle user actions.
510
+ Returns a 4-tuple mapped to (output_text, output_chart, output_chart2, output_file).
511
+ """
512
+ try:
513
+ combined_text = txt.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
514
 
515
+ # 1) If user uploaded a file, parse minimal text from .txt/.pdf here
516
+ if file_up is not None:
517
+ ext = os.path.splitext(file_up.name)[1].lower()
518
+ if ext == ".txt":
 
 
 
519
  try:
520
+ txt_data = parse_text_file_as_str(file_up)
521
+ combined_text += "\n" + txt_data
522
  except Exception as e:
523
+ return f"TXT parse error: {e}", None, None, None
524
+ elif ext == ".pdf":
525
  try:
526
+ pdf_data = parse_pdf_file_as_str(file_up)
527
+ combined_text += "\n" + pdf_data
528
  except Exception as e:
529
+ return f"PDF parse error: {e}", None, None, None
530
+ # CSV and Excel are parsed *within* certain actions (e.g. Summarize)
531
 
532
+ # 2) Branch by action
533
+ if action == "Summarize":
534
+ if file_up:
535
+ fx = file_up.name.lower()
536
+ if fx.endswith(".csv"):
537
+ try:
538
+ df_csv = parse_csv_file_to_df(file_up)
539
+ combined_text += "\n" + df_csv.to_csv(index=False)
540
+ except Exception as e:
541
+ return f"CSV parse error (Summarize): {e}", None, None, None
542
+ elif fx.endswith((".xls", ".xlsx")):
543
+ try:
544
+ df_xl = parse_excel_file_to_df(file_up)
545
+ combined_text += "\n" + df_xl.to_csv(index=False)
546
+ except Exception as e:
547
+ return f"Excel parse error (Summarize): {e}", None, None, None
548
+
549
+ summary = summarize_text(combined_text)
550
+ return summary, None, None, None
 
 
 
551
 
552
+ elif action == "Predict Outcome":
553
+ if file_up:
554
+ fx = file_up.name.lower()
555
+ if fx.endswith(".csv"):
556
+ try:
557
+ df_csv = parse_csv_file_to_df(file_up)
558
+ combined_text += "\n" + df_csv.to_csv(index=False)
559
+ except Exception as e:
560
+ return f"CSV parse error (Predict): {e}", None, None, None
561
+ elif fx.endswith((".xls", ".xlsx")):
562
+ try:
563
+ df_xl = parse_excel_file_to_df(file_up)
564
+ combined_text += "\n" + df_xl.to_csv(index=False)
565
+ except Exception as e:
566
+ return f"Excel parse error (Predict): {e}", None, None, None
567
+
568
+ preds = predict_outcome(combined_text)
569
+ if isinstance(preds, dict):
570
+ chart = visualize_predictions(preds)
571
+ return json.dumps(preds, indent=2), chart, None, None
572
+ return preds, None, None, None
573
 
574
+ elif action == "Generate Report":
575
+ if file_up:
576
+ fx = file_up.name.lower()
577
+ if fx.endswith(".csv"):
578
+ try:
579
+ df_csv = parse_csv_file_to_df(file_up)
580
+ combined_text += "\n" + df_csv.to_csv(index=False)
581
+ except Exception as e:
582
+ return f"CSV parse error (Report): {e}", None, None, None
583
+ elif fx.endswith((".xls", ".xlsx")):
584
+ try:
585
+ df_xl = parse_excel_file_to_df(file_up)
586
+ combined_text += "\n" + df_xl.to_csv(index=False)
587
+ except Exception as e:
588
+ return f"Excel parse error (Report): {e}", None, None, None
589
+
590
+ path = generate_report(combined_text, report_fn)
591
+ msg = f"Report generated: {path}" if path else "Report generation failed."
592
+ return msg, None, None, path
593
 
594
+ elif action == "Translate":
595
+ if file_up:
596
+ fx = file_up.name.lower()
597
+ if fx.endswith(".csv"):
598
+ try:
599
+ df_csv = parse_csv_file_to_df(file_up)
600
+ combined_text += "\n" + df_csv.to_csv(index=False)
601
+ except Exception as e:
602
+ return f"CSV parse error (Translate): {e}", None, None, None
603
+ elif fx.endswith((".xls", ".xlsx")):
604
+ try:
605
+ df_xl = parse_excel_file_to_df(file_up)
606
+ combined_text += "\n" + df_xl.to_csv(index=False)
607
+ except Exception as e:
608
+ return f"Excel parse error (Translate): {e}", None, None, None
609
+
610
+ translated = translate_text(combined_text, translation_opt)
611
+ return translated, None, None, None
612
 
613
+ elif action == "Perform Named Entity Recognition":
614
+ if file_up:
615
+ fx = file_up.name.lower()
616
+ if fx.endswith(".csv"):
617
+ try:
618
+ df_csv = parse_csv_file_to_df(file_up)
619
+ combined_text += "\n" + df_csv.to_csv(index=False)
620
+ except Exception as e:
621
+ return f"CSV parse error (NER): {e}", None, None, None
622
+ elif fx.endswith((".xls", ".xlsx")):
623
+ try:
624
+ df_xl = parse_excel_file_to_df(file_up)
625
+ combined_text += "\n" + df_xl.to_csv(index=False)
626
+ except Exception as e:
627
+ return f"Excel parse error (NER): {e}", None, None, None
628
+
629
+ ner_result = perform_named_entity_recognition(combined_text)
630
+ return ner_result, None, None, None
631
 
632
+ elif action == "Fetch Clinical Studies":
633
+ if nct_id:
634
+ result = await fetch_articles_by_nct_id(nct_id)
635
+ elif query_str:
636
+ result = await fetch_articles_by_query(query_str)
637
+ else:
638
+ return "Provide either an NCT ID or valid query parameters.", None, None, None
639
+
640
+ articles = result.get("resultList", {}).get("result", [])
 
 
641
  if not articles:
642
  return "No articles found.", None, None, None
643
+
644
  formatted = "\n\n".join(
645
+ f"Title: {a.get('title')}\nJournal: {a.get('journalTitle')} ({a.get('pubYear')})"
646
+ for a in articles
647
  )
648
+ return formatted, None, None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
649
 
650
+ elif action in ["Fetch PubMed Articles (Legacy)", "Fetch PubMed by Query"]:
651
+ pubmed_result = await fetch_pubmed_by_query(query_str)
652
+ xml_data = pubmed_result.get("result")
653
+ if xml_data:
654
+ articles = parse_pubmed_xml(xml_data)
655
+ if not articles:
656
+ return "No articles found.", None, None, None
657
+ formatted = "\n\n".join(
658
+ f"{a['Title']} - {a['Journal']} ({a['PublicationDate']})"
659
+ for a in articles if a['Title']
660
+ )
661
+ return formatted if formatted else "No articles found.", None, None, None
662
+ return "No articles found or error in fetching PubMed data.", None, None, None
663
+
664
+ elif action == "Fetch Crossref by Query":
665
+ crossref_result = await fetch_crossref_by_query(query_str)
666
+ items = crossref_result.get("message", {}).get("items", [])
667
+ if not items:
668
+ return "No results found.", None, None, None
669
+ crossref_formatted = "\n\n".join(
670
+ f"Title: {it.get('title', ['No title'])[0]}, DOI: {it.get('DOI')}"
671
+ for it in items
672
+ )
673
+ return crossref_formatted, None, None, None
674
+
675
+ elif action == "Fetch BioPortal by Query":
676
+ bp_result = await fetch_bioportal_by_query(query_str)
677
+ collection = bp_result.get("collection", [])
678
+ if not collection:
679
+ return "No BioPortal results found.", None, None, None
680
+ # Format listing
681
+ formatted = "\n\n".join(
682
+ f"Label: {col.get('prefLabel')}, ID: {col.get('@id')}"
683
+ for col in collection
684
+ )
685
+ return formatted, None, None, None
686
+
687
+ # Fallback
688
+ return "Invalid action.", None, None, None
689
 
690
+ except Exception as ex:
691
+ # Catch all exceptions, log, and return traceback to 'output_text'
692
+ tb_str = traceback.format_exc()
693
+ logger.error(f"Exception in handle_action:\n{tb_str}")
694
+ return f"Traceback:\n{tb_str}", None, None, None
695
+
696
  submit_btn.click(
697
  fn=handle_action,
698
  inputs=[action, text_input, file_input, translation_option, query_params_input, nct_id_input, report_filename_input, export_format],
699
  outputs=[output_text, output_chart, output_chart2, output_file],
700
  )
701
 
702
+ # Launch the Gradio interface
703
  demo.launch(server_name="0.0.0.0", server_port=7860, share=True)