mgbam commited on
Commit
31be05a
·
verified ·
1 Parent(s): 7e82038

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +193 -127
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import json
3
  import csv
4
  import asyncio
@@ -24,7 +25,6 @@ import altair as alt
24
  import spacy
25
  import spacy.cli
26
  import PyPDF2
27
- import io # For handling in-memory files (Excel, etc.)
28
 
29
  # Ensure spaCy model is downloaded
30
  try:
@@ -53,7 +53,7 @@ PUBMED_SEARCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
53
  PUBMED_FETCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
54
  EUROPE_PMC_BASE_URL = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
55
 
56
- # Hugging Face login
57
  login(HUGGINGFACE_TOKEN)
58
 
59
  # Initialize OpenAI
@@ -94,7 +94,10 @@ LANGUAGE_MAP: Dict[str, Tuple[str, str]] = {
94
  "French to English": ("fr", "en"),
95
  }
96
 
97
- ### Utility Functions ###
 
 
 
98
  def safe_json_parse(text: str) -> Union[Dict, None]:
99
  """Safely parse JSON string into a Python dictionary."""
100
  try:
@@ -131,7 +134,10 @@ def parse_pubmed_xml(xml_data: str) -> List[Dict[str, Any]]:
131
  })
132
  return articles
133
 
134
- ### Async Functions for Europe PMC ###
 
 
 
135
  async def fetch_articles_by_nct_id(nct_id: str) -> Dict[str, Any]:
136
  params = {"query": nct_id, "format": "json"}
137
  async with httpx.AsyncClient() as client_http:
@@ -158,7 +164,6 @@ async def fetch_articles_by_query(query_params: str) -> Dict[str, Any]:
158
  logger.error(f"Error fetching articles: {e}")
159
  return {"error": str(e)}
160
 
161
- ### PubMed Integration ###
162
  async def fetch_pubmed_by_query(query_params: str) -> Dict[str, Any]:
163
  parsed_params = safe_json_parse(query_params)
164
  if not parsed_params or not isinstance(parsed_params, dict):
@@ -194,7 +199,6 @@ async def fetch_pubmed_by_query(query_params: str) -> Dict[str, Any]:
194
  logger.error(f"Error fetching PubMed articles: {e}")
195
  return {"error": str(e)}
196
 
197
- ### Crossref Integration ###
198
  async def fetch_crossref_by_query(query_params: str) -> Dict[str, Any]:
199
  parsed_params = safe_json_parse(query_params)
200
  if not parsed_params or not isinstance(parsed_params, dict):
@@ -209,7 +213,10 @@ async def fetch_crossref_by_query(query_params: str) -> Dict[str, Any]:
209
  logger.error(f"Error fetching Crossref data: {e}")
210
  return {"error": str(e)}
211
 
212
- ### Core Functions ###
 
 
 
213
  def summarize_text(text: str) -> str:
214
  """Summarize text using OpenAI."""
215
  if not text.strip():
@@ -310,21 +317,19 @@ def perform_named_entity_recognition(text: str) -> str:
310
  logger.error(f"NER Error: {e}")
311
  return "Named Entity Recognition failed."
312
 
313
- ### Enhanced EDA ###
 
 
 
314
  def perform_enhanced_eda(df: pd.DataFrame) -> Tuple[str, Optional[alt.Chart], Optional[alt.Chart]]:
315
  """
316
- Perform a more advanced EDA given a DataFrame:
317
- - Show dataset info (columns, shape, numeric summary).
318
- - Generate a correlation heatmap (for numeric columns).
319
- - Generate distribution plots (histograms) for numeric columns.
320
  Returns (text_summary, correlation_chart, distribution_chart).
321
  """
322
  try:
323
- # Basic info
324
  columns_info = f"Columns: {list(df.columns)}"
325
  shape_info = f"Shape: {df.shape[0]} rows x {df.shape[1]} columns"
326
 
327
- # Describe with include="all" to show all columns
328
  with pd.option_context("display.max_colwidth", 200, "display.max_rows", None):
329
  describe_info = df.describe(include="all").to_string()
330
 
@@ -334,7 +339,6 @@ def perform_enhanced_eda(df: pd.DataFrame) -> Tuple[str, Optional[alt.Chart], Op
334
  f"Summary Statistics:\n{describe_info}\n"
335
  )
336
 
337
- # Correlation heatmap (if at least 2 numeric columns)
338
  numeric_cols = df.select_dtypes(include="number")
339
  corr_chart = None
340
  if numeric_cols.shape[1] >= 2:
@@ -353,7 +357,6 @@ def perform_enhanced_eda(df: pd.DataFrame) -> Tuple[str, Optional[alt.Chart], Op
353
  .properties(width=400, height=400, title="Correlation Heatmap")
354
  )
355
 
356
- # Distribution plots (histograms) for numeric columns
357
  distribution_chart = None
358
  if numeric_cols.shape[1] >= 1:
359
  df_long = numeric_cols.melt(var_name='Column', value_name='Value')
@@ -380,86 +383,66 @@ def perform_enhanced_eda(df: pd.DataFrame) -> Tuple[str, Optional[alt.Chart], Op
380
  logger.error(f"Enhanced EDA Error: {e}")
381
  return f"Enhanced EDA failed: {e}", None, None
382
 
383
- ### File Handling ###
384
- def read_uploaded_file(uploaded_file: Optional[gr.File]) -> str:
385
- """
386
- Reads the content of an uploaded file (txt, csv, xls, xlsx, pdf).
387
- Returns the extracted text or CSV-like content for non-Excel files.
388
- For Excel, we return a placeholder string; we'll handle it later.
389
- """
390
- if uploaded_file is None:
391
- return ""
392
 
393
- file_name = uploaded_file.name
394
- file_ext = os.path.splitext(file_name)[1].lower()
 
395
 
 
 
 
 
 
 
 
 
396
  try:
397
- # TXT
398
- if file_ext == ".txt":
399
- return uploaded_file.read().decode("utf-8")
400
-
401
- # CSV
402
- elif file_ext == ".csv":
403
- return uploaded_file.read().decode("utf-8")
404
-
405
- # Excel
406
- elif file_ext in [".xls", ".xlsx"]:
407
- # Return a placeholder so we know an Excel file was uploaded
408
- return "EXCEL_FILE_PLACEHOLDER"
409
-
410
- # PDF
411
- elif file_ext == ".pdf":
412
- pdf_reader = PyPDF2.PdfReader(uploaded_file)
413
- text_content = []
414
- for page in pdf_reader.pages:
415
- text_content.append(page.extract_text())
416
- return "\n".join(text_content)
417
-
418
- else:
419
- return f"Unsupported file format: {file_ext}"
420
  except Exception as e:
421
- logger.error(f"File read error: {e}")
422
- return f"Error reading file: {e}"
423
 
424
  def parse_excel_file(uploaded_file: gr.File) -> pd.DataFrame:
425
  """
426
  Parse an Excel file into a pandas DataFrame.
427
- 1) Try using the local file path, if it exists.
428
- 2) Otherwise, read from the in-memory object using uploaded_file.file.read().
429
  """
430
  import pandas as pd
 
431
 
432
- # If we have a valid local file path (common in some Gradio versions)
433
- if os.path.exists(uploaded_file.name):
434
- # Directly read from the file path
435
- return pd.read_excel(uploaded_file.name, engine="openpyxl")
436
 
437
- # Otherwise, we read the file from memory
438
  try:
439
  excel_bytes = uploaded_file.file.read()
440
  return pd.read_excel(io.BytesIO(excel_bytes), engine="openpyxl")
441
  except Exception as e:
442
- logger.error(f"Excel parsing error: {e}")
443
- raise ValueError(f"Excel parsing error: {e}")
444
 
445
- def parse_csv_content(csv_content: str) -> pd.DataFrame:
446
- """
447
- Attempt to parse CSV content with both utf-8 and utf-8-sig
448
- to handle BOM issues or encoding complexities.
449
- """
450
- from io import StringIO
451
- errors = []
452
- for encoding_try in ["utf-8", "utf-8-sig"]:
453
- try:
454
- df = pd.read_csv(StringIO(csv_content), encoding=encoding_try)
455
- return df
456
- except Exception as e:
457
- errors.append(f"Encoding {encoding_try} failed: {e}")
458
- error_msg = "Could not parse CSV content.\n" + "\n".join(errors)
459
- logger.error(error_msg)
460
- raise ValueError(error_msg)
461
 
462
- ### Gradio Interface ###
463
  with gr.Blocks() as demo:
464
  gr.Markdown("# ✨ Advanced Clinical Research Assistant with Enhanced EDA ✨")
465
  gr.Markdown("""
@@ -476,6 +459,7 @@ Welcome to the **Enhanced** AI-Powered Clinical Assistant!
476
  # Inputs
477
  with gr.Row():
478
  text_input = gr.Textbox(label="Input Text", lines=5, placeholder="Enter clinical text or query...")
 
479
  file_input = gr.File(
480
  label="Upload File (txt/csv/xls/xlsx/pdf)",
481
  file_types=[".txt", ".csv", ".xls", ".xlsx", ".pdf"]
@@ -515,16 +499,17 @@ Welcome to the **Enhanced** AI-Powered Clinical Assistant!
515
 
516
  # Outputs
517
  output_text = gr.Textbox(label="Output", lines=10)
518
-
519
  with gr.Row():
520
  output_chart = gr.Plot(label="Visualization 1")
521
  output_chart2 = gr.Plot(label="Visualization 2")
522
-
523
  output_file = gr.File(label="Generated File")
524
 
525
  submit_button = gr.Button("Submit")
526
 
527
- # Async function for handling actions
 
 
 
528
  async def handle_action(
529
  action: str,
530
  text: str,
@@ -536,68 +521,85 @@ Welcome to the **Enhanced** AI-Powered Clinical Assistant!
536
  export_format: str
537
  ) -> Tuple[Optional[str], Optional[Any], Optional[Any], Optional[str]]:
538
 
539
- # 1) Read the uploaded file (if any) -> returns a string or placeholder
540
- file_content = read_uploaded_file(file_up)
541
 
542
- # 2) Combine user text with file text if needed
543
- combined_text = (text + "\n" + file_content).strip() if file_content else text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
544
 
545
- ### Branch by action ###
546
  if action == "Summarize":
 
 
 
 
 
 
 
 
 
 
 
 
547
  return summarize_text(combined_text), None, None, None
548
 
549
  elif action == "Predict Outcome":
550
- predictions = predict_outcome(combined_text)
551
- if isinstance(predictions, dict):
552
- chart = visualize_predictions(predictions)
553
- return json.dumps(predictions, indent=2), chart, None, None
554
- return predictions, None, None, None
555
 
556
  elif action == "Generate Report":
 
 
 
 
 
 
 
557
  file_path = generate_report(combined_text, filename=report_filename)
558
  msg = f"Report generated: {file_path}" if file_path else "Report generation failed."
559
  return msg, None, None, file_path
560
 
561
  elif action == "Translate":
562
- return translate_text(combined_text, translation_opt), None, None, None
 
 
 
 
 
 
 
 
563
 
564
  elif action == "Perform Named Entity Recognition":
 
 
 
 
 
 
 
565
  ner_result = perform_named_entity_recognition(combined_text)
566
  return ner_result, None, None, None
567
 
568
  elif action == "Perform Enhanced EDA":
569
- # Ensure some data is provided
570
- if not file_up and not combined_text:
571
- return "No data provided for EDA.", None, None, None
572
-
573
- # If the user uploaded an Excel file
574
- if file_up and file_up.name.lower().endswith((".xls", ".xlsx")):
575
- try:
576
- df_excel = parse_excel_file(file_up)
577
- eda_summary, corr_chart, dist_chart = perform_enhanced_eda(df_excel)
578
- return eda_summary, corr_chart, dist_chart, None
579
- except Exception as e:
580
- return f"Excel EDA failed: {e}", None, None, None
581
-
582
- # If the user uploaded a CSV
583
- if file_up and file_up.name.lower().endswith(".csv"):
584
- try:
585
- df_csv = parse_csv_content(file_content)
586
- eda_summary, corr_chart, dist_chart = perform_enhanced_eda(df_csv)
587
- return eda_summary, corr_chart, dist_chart, None
588
- except Exception as e:
589
- return f"CSV EDA failed: {e}", None, None, None
590
-
591
- # If no file but possibly CSV text in the text box
592
- if not file_up and "," in combined_text:
593
- try:
594
- df_csv = parse_csv_content(combined_text)
595
- eda_summary, corr_chart, dist_chart = perform_enhanced_eda(df_csv)
596
- return eda_summary, corr_chart, dist_chart, None
597
- except Exception as e:
598
- return f"CSV EDA failed: {e}", None, None, None
599
-
600
- return "No valid CSV/Excel data found for EDA.", None, None, None
601
 
602
  elif action == "Fetch Clinical Studies":
603
  if nct_id:
@@ -642,9 +644,69 @@ Welcome to the **Enhanced** AI-Powered Clinical Assistant!
642
  )
643
  return formatted, None, None, None
644
 
645
- # Default fallback
646
  return "Invalid action.", None, None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
647
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
648
  submit_button.click(
649
  handle_action,
650
  inputs=[
@@ -657,8 +719,12 @@ Welcome to the **Enhanced** AI-Powered Clinical Assistant!
657
  report_filename_input,
658
  export_format,
659
  ],
660
- outputs=[output_text, output_chart, output_chart2, output_file],
 
 
 
 
 
661
  )
662
 
663
- # Launch the Gradio app
664
  demo.launch(server_name="0.0.0.0", server_port=7860, share=True)
 
1
  import os
2
+ import io
3
  import json
4
  import csv
5
  import asyncio
 
25
  import spacy
26
  import spacy.cli
27
  import PyPDF2
 
28
 
29
  # Ensure spaCy model is downloaded
30
  try:
 
53
  PUBMED_FETCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
54
  EUROPE_PMC_BASE_URL = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
55
 
56
+ # Log in to Hugging Face
57
  login(HUGGINGFACE_TOKEN)
58
 
59
  # Initialize OpenAI
 
94
  "French to English": ("fr", "en"),
95
  }
96
 
97
+ ###################################################
98
+ # UTILS #
99
+ ###################################################
100
+
101
  def safe_json_parse(text: str) -> Union[Dict, None]:
102
  """Safely parse JSON string into a Python dictionary."""
103
  try:
 
134
  })
135
  return articles
136
 
137
+ ###################################################
138
+ # ASYNC FETCHES #
139
+ ###################################################
140
+
141
  async def fetch_articles_by_nct_id(nct_id: str) -> Dict[str, Any]:
142
  params = {"query": nct_id, "format": "json"}
143
  async with httpx.AsyncClient() as client_http:
 
164
  logger.error(f"Error fetching articles: {e}")
165
  return {"error": str(e)}
166
 
 
167
  async def fetch_pubmed_by_query(query_params: str) -> Dict[str, Any]:
168
  parsed_params = safe_json_parse(query_params)
169
  if not parsed_params or not isinstance(parsed_params, dict):
 
199
  logger.error(f"Error fetching PubMed articles: {e}")
200
  return {"error": str(e)}
201
 
 
202
  async def fetch_crossref_by_query(query_params: str) -> Dict[str, Any]:
203
  parsed_params = safe_json_parse(query_params)
204
  if not parsed_params or not isinstance(parsed_params, dict):
 
213
  logger.error(f"Error fetching Crossref data: {e}")
214
  return {"error": str(e)}
215
 
216
+ ###################################################
217
+ # CORE LOGIC #
218
+ ###################################################
219
+
220
  def summarize_text(text: str) -> str:
221
  """Summarize text using OpenAI."""
222
  if not text.strip():
 
317
  logger.error(f"NER Error: {e}")
318
  return "Named Entity Recognition failed."
319
 
320
+ ###################################################
321
+ # ENHANCED EDA #
322
+ ###################################################
323
+
324
  def perform_enhanced_eda(df: pd.DataFrame) -> Tuple[str, Optional[alt.Chart], Optional[alt.Chart]]:
325
  """
326
+ Show columns, shape, numeric summary, correlation heatmap, and distribution histograms.
 
 
 
327
  Returns (text_summary, correlation_chart, distribution_chart).
328
  """
329
  try:
 
330
  columns_info = f"Columns: {list(df.columns)}"
331
  shape_info = f"Shape: {df.shape[0]} rows x {df.shape[1]} columns"
332
 
 
333
  with pd.option_context("display.max_colwidth", 200, "display.max_rows", None):
334
  describe_info = df.describe(include="all").to_string()
335
 
 
339
  f"Summary Statistics:\n{describe_info}\n"
340
  )
341
 
 
342
  numeric_cols = df.select_dtypes(include="number")
343
  corr_chart = None
344
  if numeric_cols.shape[1] >= 2:
 
357
  .properties(width=400, height=400, title="Correlation Heatmap")
358
  )
359
 
 
360
  distribution_chart = None
361
  if numeric_cols.shape[1] >= 1:
362
  df_long = numeric_cols.melt(var_name='Column', value_name='Value')
 
383
  logger.error(f"Enhanced EDA Error: {e}")
384
  return f"Enhanced EDA failed: {e}", None, None
385
 
386
+ ###################################################
387
+ # FILE PARSING #
388
+ ###################################################
 
 
 
 
 
 
389
 
390
+ def parse_text_file(uploaded_file: gr.File) -> str:
391
+ """Reads a .txt file as UTF-8 text."""
392
+ return uploaded_file.read().decode("utf-8")
393
 
394
+ def parse_csv_file(uploaded_file: gr.File) -> pd.DataFrame:
395
+ """
396
+ Reads CSV content with possible BOM issues
397
+ by trying 'utf-8' and 'utf-8-sig'.
398
+ """
399
+ content = uploaded_file.read().decode("utf-8", errors="replace")
400
+ # We can attempt to parse with multiple encodings if needed:
401
+ # For simplicity, let's just do a fallback approach:
402
  try:
403
+ from io import StringIO
404
+ df = pd.read_csv(StringIO(content))
405
+ return df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
406
  except Exception as e:
407
+ raise ValueError(f"CSV parse error: {e}")
 
408
 
409
  def parse_excel_file(uploaded_file: gr.File) -> pd.DataFrame:
410
  """
411
  Parse an Excel file into a pandas DataFrame.
412
+ 1) If the path exists, read directly from path.
413
+ 2) Else read from uploaded_file.file (in-memory) in binary mode.
414
  """
415
  import pandas as pd
416
+ import os
417
 
418
+ excel_path = uploaded_file.name
419
+ # Try local path first
420
+ if os.path.isfile(excel_path):
421
+ return pd.read_excel(excel_path, engine="openpyxl")
422
 
423
+ # Fall back to reading raw bytes from uploaded_file.file
424
  try:
425
  excel_bytes = uploaded_file.file.read()
426
  return pd.read_excel(io.BytesIO(excel_bytes), engine="openpyxl")
427
  except Exception as e:
428
+ raise ValueError(f"Excel parse error: {e}")
 
429
 
430
+ def parse_pdf_file(uploaded_file: gr.File) -> str:
431
+ """Reads a PDF file with PyPDF2, extracting text from each page."""
432
+ try:
433
+ pdf_reader = PyPDF2.PdfReader(uploaded_file)
434
+ text_content = []
435
+ for page in pdf_reader.pages:
436
+ text_content.append(page.extract_text())
437
+ return "\n".join(text_content)
438
+ except Exception as e:
439
+ logger.error(f"PDF parse error: {e}")
440
+ return f"Error reading PDF file: {e}"
441
+
442
+ ###################################################
443
+ # GRADIO INTERFACE #
444
+ ###################################################
 
445
 
 
446
  with gr.Blocks() as demo:
447
  gr.Markdown("# ✨ Advanced Clinical Research Assistant with Enhanced EDA ✨")
448
  gr.Markdown("""
 
459
  # Inputs
460
  with gr.Row():
461
  text_input = gr.Textbox(label="Input Text", lines=5, placeholder="Enter clinical text or query...")
462
+ # We'll rely on .name and .file for the path and file handle
463
  file_input = gr.File(
464
  label="Upload File (txt/csv/xls/xlsx/pdf)",
465
  file_types=[".txt", ".csv", ".xls", ".xlsx", ".pdf"]
 
499
 
500
  # Outputs
501
  output_text = gr.Textbox(label="Output", lines=10)
 
502
  with gr.Row():
503
  output_chart = gr.Plot(label="Visualization 1")
504
  output_chart2 = gr.Plot(label="Visualization 2")
 
505
  output_file = gr.File(label="Generated File")
506
 
507
  submit_button = gr.Button("Submit")
508
 
509
+ ################################################################
510
+ # MAIN HANDLER FUNCTION #
511
+ ################################################################
512
+
513
  async def handle_action(
514
  action: str,
515
  text: str,
 
521
  export_format: str
522
  ) -> Tuple[Optional[str], Optional[Any], Optional[Any], Optional[str]]:
523
 
524
+ # 1) Start with user-provided text
525
+ combined_text = text.strip()
526
 
527
+ # 2) If user uploaded a file, parse it based on extension
528
+ if file_up is not None:
529
+ file_ext = os.path.splitext(file_up.name)[1].lower()
530
+
531
+ if file_ext == ".txt":
532
+ file_text = parse_text_file(file_up)
533
+ combined_text = (combined_text + "\n" + file_text).strip()
534
+
535
+ elif file_ext == ".csv":
536
+ # If user chose EDA, we'll parse into DataFrame below
537
+ # If we just want to combine text for Summarize, etc., do so:
538
+ pass
539
+
540
+ elif file_ext in [".xls", ".xlsx"]:
541
+ # We'll handle Excel parsing in the EDA step if needed
542
+ pass
543
+
544
+ elif file_ext == ".pdf":
545
+ file_text = parse_pdf_file(file_up)
546
+ combined_text = (combined_text + "\n" + file_text).strip()
547
 
548
+ ### ACTIONS ###
549
  if action == "Summarize":
550
+ if file_up and file_up.name.endswith(".csv"):
551
+ # Merge CSV text into combined_text
552
+ # in case user wants summarization of the CSV's raw text
553
+ try:
554
+ df_csv = parse_csv_file(file_up)
555
+ # Turn CSV into text
556
+ csv_as_text = df_csv.to_csv(index=False)
557
+ combined_text = (combined_text + "\n" + csv_as_text).strip()
558
+ except Exception as e:
559
+ return f"CSV parse error for Summarize: {e}", None, None, None
560
+
561
+ # Summarize the combined text
562
  return summarize_text(combined_text), None, None, None
563
 
564
  elif action == "Predict Outcome":
565
+ return _action_predict_outcome(combined_text, file_up)
 
 
 
 
566
 
567
  elif action == "Generate Report":
568
+ # Add CSV content if needed
569
+ if file_up and file_up.name.endswith(".csv"):
570
+ try:
571
+ df_csv = parse_csv_file(file_up)
572
+ combined_text += "\n" + df_csv.to_csv(index=False)
573
+ except Exception as e:
574
+ logger.error(f"Error reading CSV for report: {e}")
575
  file_path = generate_report(combined_text, filename=report_filename)
576
  msg = f"Report generated: {file_path}" if file_path else "Report generation failed."
577
  return msg, None, None, file_path
578
 
579
  elif action == "Translate":
580
+ # Optionally read CSV or PDF text?
581
+ if file_up and file_up.name.endswith(".csv"):
582
+ try:
583
+ df_csv = parse_csv_file(file_up)
584
+ combined_text += "\n" + df_csv.to_csv(index=False)
585
+ except Exception as e:
586
+ return f"CSV parse error for Translate: {e}", None, None, None
587
+ translated = translate_text(combined_text, translation_opt)
588
+ return translated, None, None, None
589
 
590
  elif action == "Perform Named Entity Recognition":
591
+ # Merge CSV as text if user wants NER on CSV
592
+ if file_up and file_up.name.endswith(".csv"):
593
+ try:
594
+ df_csv = parse_csv_file(file_up)
595
+ combined_text += "\n" + df_csv.to_csv(index=False)
596
+ except Exception as e:
597
+ return f"CSV parse error for NER: {e}", None, None, None
598
  ner_result = perform_named_entity_recognition(combined_text)
599
  return ner_result, None, None, None
600
 
601
  elif action == "Perform Enhanced EDA":
602
+ return await _action_eda(combined_text, file_up, text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
603
 
604
  elif action == "Fetch Clinical Studies":
605
  if nct_id:
 
644
  )
645
  return formatted, None, None, None
646
 
 
647
  return "Invalid action.", None, None, None
648
+
649
+ def _action_predict_outcome(combined_text: str, file_up: gr.File) -> Tuple[Optional[str], Optional[Any], Optional[Any], Optional[str]]:
650
+ # If CSV is uploaded, we can merge it into text or do separate logic
651
+ if file_up and file_up.name.endswith(".csv"):
652
+ try:
653
+ df_csv = parse_csv_file(file_up)
654
+ # Optionally, merge CSV content into the text to be classified
655
+ combined_text_local = combined_text + "\n" + df_csv.to_csv(index=False)
656
+ except Exception as e:
657
+ return f"CSV parse error for Predict Outcome: {e}", None, None, None
658
+ else:
659
+ combined_text_local = combined_text
660
+
661
+ predictions = predict_outcome(combined_text_local)
662
+ if isinstance(predictions, dict):
663
+ chart = visualize_predictions(predictions)
664
+ return json.dumps(predictions, indent=2), chart, None, None
665
+ return predictions, None, None, None
666
 
667
+ async def _action_eda(combined_text: str, file_up: Optional[gr.File], raw_text: str) -> Tuple[Optional[str], Optional[Any], Optional[Any], Optional[str]]:
668
+ """
669
+ Perform Enhanced EDA on a CSV or Excel file if uploaded.
670
+ If .csv is present, parse as CSV; if .xls/.xlsx is present, parse as Excel.
671
+ """
672
+ # Make sure we either have a file or some data in the text
673
+ if not file_up and not raw_text.strip():
674
+ return "No data provided for EDA.", None, None, None
675
+
676
+ if file_up:
677
+ file_ext = os.path.splitext(file_up.name)[1].lower()
678
+ if file_ext == ".csv":
679
+ try:
680
+ df_csv = parse_csv_file(file_up)
681
+ eda_summary, corr_chart, dist_chart = perform_enhanced_eda(df_csv)
682
+ return eda_summary, corr_chart, dist_chart, None
683
+ except Exception as e:
684
+ return f"CSV EDA failed: {e}", None, None, None
685
+
686
+ elif file_ext in [".xls", ".xlsx"]:
687
+ try:
688
+ df_excel = parse_excel_file(file_up)
689
+ eda_summary, corr_chart, dist_chart = perform_enhanced_eda(df_excel)
690
+ return eda_summary, corr_chart, dist_chart, None
691
+ except Exception as e:
692
+ return f"Excel EDA failed: {e}", None, None, None
693
+
694
+ else:
695
+ # EDA not supported for PDF or .txt in this example
696
+ return "No valid CSV/Excel data found for EDA.", None, None, None
697
+ else:
698
+ # If no file, maybe the user pasted CSV into the text box
699
+ if "," in raw_text:
700
+ # Attempt to parse text as CSV
701
+ try:
702
+ from io import StringIO
703
+ df_csv = pd.read_csv(StringIO(raw_text))
704
+ eda_summary, corr_chart, dist_chart = perform_enhanced_eda(df_csv)
705
+ return eda_summary, corr_chart, dist_chart, None
706
+ except Exception as e:
707
+ return f"EDA parse error for pasted CSV: {e}", None, None, None
708
+ return "No valid CSV/Excel data found for EDA.", None, None, None
709
+
710
  submit_button.click(
711
  handle_action,
712
  inputs=[
 
719
  report_filename_input,
720
  export_format,
721
  ],
722
+ outputs=[
723
+ output_text,
724
+ output_chart,
725
+ output_chart2,
726
+ output_file,
727
+ ],
728
  )
729
 
 
730
  demo.launch(server_name="0.0.0.0", server_port=7860, share=True)