mgbam commited on
Commit
7e82038
·
verified ·
1 Parent(s): 19c2c87

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -9
app.py CHANGED
@@ -24,7 +24,7 @@ import altair as alt
24
  import spacy
25
  import spacy.cli
26
  import PyPDF2
27
- import io # For handling in-memory files for Excel
28
 
29
  # Ensure spaCy model is downloaded
30
  try:
@@ -131,7 +131,7 @@ def parse_pubmed_xml(xml_data: str) -> List[Dict[str, Any]]:
131
  })
132
  return articles
133
 
134
- ### Asynchronous Functions for Europe PMC ###
135
  async def fetch_articles_by_nct_id(nct_id: str) -> Dict[str, Any]:
136
  params = {"query": nct_id, "format": "json"}
137
  async with httpx.AsyncClient() as client_http:
@@ -381,7 +381,6 @@ def perform_enhanced_eda(df: pd.DataFrame) -> Tuple[str, Optional[alt.Chart], Op
381
  return f"Enhanced EDA failed: {e}", None, None
382
 
383
  ### File Handling ###
384
-
385
  def read_uploaded_file(uploaded_file: Optional[gr.File]) -> str:
386
  """
387
  Reads the content of an uploaded file (txt, csv, xls, xlsx, pdf).
@@ -405,7 +404,6 @@ def read_uploaded_file(uploaded_file: Optional[gr.File]) -> str:
405
 
406
  # Excel
407
  elif file_ext in [".xls", ".xlsx"]:
408
- # We won't parse here; we'll parse in parse_excel_file(...)
409
  # Return a placeholder so we know an Excel file was uploaded
410
  return "EXCEL_FILE_PLACEHOLDER"
411
 
@@ -425,13 +423,21 @@ def read_uploaded_file(uploaded_file: Optional[gr.File]) -> str:
425
 
426
  def parse_excel_file(uploaded_file: gr.File) -> pd.DataFrame:
427
  """
428
- Parse an Excel file into a pandas DataFrame using raw bytes.
429
- This avoids the NamedString error from calling .read() on a Gradio file.
 
430
  """
 
 
 
 
 
 
 
 
431
  try:
432
- excel_bytes = uploaded_file.data # raw file content in bytes
433
- df = pd.read_excel(io.BytesIO(excel_bytes), engine="openpyxl")
434
- return df
435
  except Exception as e:
436
  logger.error(f"Excel parsing error: {e}")
437
  raise ValueError(f"Excel parsing error: {e}")
 
24
  import spacy
25
  import spacy.cli
26
  import PyPDF2
27
+ import io # For handling in-memory files (Excel, etc.)
28
 
29
  # Ensure spaCy model is downloaded
30
  try:
 
131
  })
132
  return articles
133
 
134
+ ### Async Functions for Europe PMC ###
135
  async def fetch_articles_by_nct_id(nct_id: str) -> Dict[str, Any]:
136
  params = {"query": nct_id, "format": "json"}
137
  async with httpx.AsyncClient() as client_http:
 
381
  return f"Enhanced EDA failed: {e}", None, None
382
 
383
  ### File Handling ###
 
384
  def read_uploaded_file(uploaded_file: Optional[gr.File]) -> str:
385
  """
386
  Reads the content of an uploaded file (txt, csv, xls, xlsx, pdf).
 
404
 
405
  # Excel
406
  elif file_ext in [".xls", ".xlsx"]:
 
407
  # Return a placeholder so we know an Excel file was uploaded
408
  return "EXCEL_FILE_PLACEHOLDER"
409
 
 
423
 
424
  def parse_excel_file(uploaded_file: gr.File) -> pd.DataFrame:
425
  """
426
+ Parse an Excel file into a pandas DataFrame.
427
+ 1) Try using the local file path, if it exists.
428
+ 2) Otherwise, read from the in-memory object using uploaded_file.file.read().
429
  """
430
+ import pandas as pd
431
+
432
+ # If we have a valid local file path (common in some Gradio versions)
433
+ if os.path.exists(uploaded_file.name):
434
+ # Directly read from the file path
435
+ return pd.read_excel(uploaded_file.name, engine="openpyxl")
436
+
437
+ # Otherwise, we read the file from memory
438
  try:
439
+ excel_bytes = uploaded_file.file.read()
440
+ return pd.read_excel(io.BytesIO(excel_bytes), engine="openpyxl")
 
441
  except Exception as e:
442
  logger.error(f"Excel parsing error: {e}")
443
  raise ValueError(f"Excel parsing error: {e}")