danial0203 commited on
Commit
4e4f693
·
verified ·
1 Parent(s): 8cd0fcc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -7
app.py CHANGED
@@ -83,19 +83,26 @@ def is_image_file(filename):
83
  return any(filename.lower().endswith(ext) for ext in image_file_extensions)
84
 
85
  def process_file_and_generate_csv(input_file):
86
- # Check if the uploaded file is an image or needs conversion to images
 
 
 
 
 
 
87
  images = []
88
- if is_image_file(input_file.name):
89
- input_file.seek(0) # Go to the start of the file
90
- images.append(Image.open(input_file))
91
  else:
92
- input_file.seek(0) # Ensure we're at the start of the file
93
- images.extend(convert_from_bytes(input_file.read()))
94
 
95
  csv_output = BytesIO()
96
  writer = csv.writer(csv_output)
97
 
98
  for i, image in enumerate(images):
 
99
  image_byte_array = BytesIO()
100
  image.save(image_byte_array, format='JPEG')
101
  image_byte_array.seek(0)
@@ -107,7 +114,7 @@ def process_file_and_generate_csv(input_file):
107
  tables = [block for block in blocks if block['BlockType'] == "TABLE"]
108
  generate_table_csv(tables, blocks_map, writer)
109
 
110
- csv_output.seek(0)
111
  return csv_output, output_csv_path
112
 
113
  # Gradio Interface
 
83
  return any(filename.lower().endswith(ext) for ext in image_file_extensions)
84
 
85
  def process_file_and_generate_csv(input_file):
86
+ output_csv_path = "output.csv" # Output CSV file name
87
+ file_content = BytesIO(input_file.read()) # Read file content into memory for processing
88
+ file_content.seek(0) # Go to the start of the file-like object
89
+
90
+ object_name = os.path.basename(input_file.name)
91
+
92
+ # Check if the uploaded file is an image or needs conversion
93
  images = []
94
+ if is_image_file(object_name):
95
+ images.append(Image.open(file_content))
96
+ file_content.seek(0) # Reset for potential re-use
97
  else:
98
+ # Convert PDF/TIFF to images
99
+ images.extend(convert_from_path(file_content))
100
 
101
  csv_output = BytesIO()
102
  writer = csv.writer(csv_output)
103
 
104
  for i, image in enumerate(images):
105
+ # Process each image and upload to S3 for Textract processing
106
  image_byte_array = BytesIO()
107
  image.save(image_byte_array, format='JPEG')
108
  image_byte_array.seek(0)
 
114
  tables = [block for block in blocks if block['BlockType'] == "TABLE"]
115
  generate_table_csv(tables, blocks_map, writer)
116
 
117
+ csv_output.seek(0) # Go to the start of the CSV in-memory file
118
  return csv_output, output_csv_path
119
 
120
  # Gradio Interface