danial0203 commited on
Commit
e003f08
·
verified ·
1 Parent(s): 41f37dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -7
app.py CHANGED
@@ -84,25 +84,26 @@ def is_image_file(filename):
84
 
85
  def process_file_and_generate_csv(input_file):
86
  output_csv_path = "output.csv" # Output CSV file name
87
- file_content = BytesIO(input_file.read()) # Read file content into memory for processing
88
- file_content.seek(0) # Go to the start of the file-like object
89
-
 
 
 
 
90
  object_name = os.path.basename(input_file.name)
91
 
92
- # Check if the uploaded file is an image or needs conversion
93
  images = []
94
  if is_image_file(object_name):
95
  images.append(Image.open(file_content))
96
  file_content.seek(0) # Reset for potential re-use
97
  else:
98
- # Convert PDF/TIFF to images
99
  images.extend(convert_from_path(file_content))
100
 
101
  csv_output = BytesIO()
102
  writer = csv.writer(csv_output)
103
 
104
  for i, image in enumerate(images):
105
- # Process each image and upload to S3 for Textract processing
106
  image_byte_array = BytesIO()
107
  image.save(image_byte_array, format='JPEG')
108
  image_byte_array.seek(0)
@@ -114,7 +115,7 @@ def process_file_and_generate_csv(input_file):
114
  tables = [block for block in blocks if block['BlockType'] == "TABLE"]
115
  generate_table_csv(tables, blocks_map, writer)
116
 
117
- csv_output.seek(0) # Go to the start of the CSV in-memory file
118
  return csv_output, output_csv_path
119
 
120
  # Gradio Interface
 
84
 
85
  def process_file_and_generate_csv(input_file):
86
  output_csv_path = "output.csv" # Output CSV file name
87
+
88
+ # Handling different types of input file objects
89
+ if hasattr(input_file, 'read'): # If input_file has a read method, it's a file-like object
90
+ file_content = BytesIO(input_file.read()) # Read file content into memory for processing
91
+ else: # Assuming input_file might be a path (string) to the uploaded file
92
+ file_content = open(input_file.name, 'rb') # Open the file for reading in binary mode
93
+
94
  object_name = os.path.basename(input_file.name)
95
 
 
96
  images = []
97
  if is_image_file(object_name):
98
  images.append(Image.open(file_content))
99
  file_content.seek(0) # Reset for potential re-use
100
  else:
 
101
  images.extend(convert_from_path(file_content))
102
 
103
  csv_output = BytesIO()
104
  writer = csv.writer(csv_output)
105
 
106
  for i, image in enumerate(images):
 
107
  image_byte_array = BytesIO()
108
  image.save(image_byte_array, format='JPEG')
109
  image_byte_array.seek(0)
 
115
  tables = [block for block in blocks if block['BlockType'] == "TABLE"]
116
  generate_table_csv(tables, blocks_map, writer)
117
 
118
+ csv_output.seek(0)
119
  return csv_output, output_csv_path
120
 
121
  # Gradio Interface