Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -83,19 +83,26 @@ def is_image_file(filename):
|
|
83 |
return any(filename.lower().endswith(ext) for ext in image_file_extensions)
|
84 |
|
85 |
def process_file_and_generate_csv(input_file):
|
86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
images = []
|
88 |
-
if is_image_file(
|
89 |
-
|
90 |
-
|
91 |
else:
|
92 |
-
|
93 |
-
images.extend(
|
94 |
|
95 |
csv_output = BytesIO()
|
96 |
writer = csv.writer(csv_output)
|
97 |
|
98 |
for i, image in enumerate(images):
|
|
|
99 |
image_byte_array = BytesIO()
|
100 |
image.save(image_byte_array, format='JPEG')
|
101 |
image_byte_array.seek(0)
|
@@ -107,7 +114,7 @@ def process_file_and_generate_csv(input_file):
|
|
107 |
tables = [block for block in blocks if block['BlockType'] == "TABLE"]
|
108 |
generate_table_csv(tables, blocks_map, writer)
|
109 |
|
110 |
-
csv_output.seek(0)
|
111 |
return csv_output, output_csv_path
|
112 |
|
113 |
# Gradio Interface
|
|
|
83 |
return any(filename.lower().endswith(ext) for ext in image_file_extensions)
|
84 |
|
85 |
def process_file_and_generate_csv(input_file):
|
86 |
+
output_csv_path = "output.csv" # Output CSV file name
|
87 |
+
file_content = BytesIO(input_file.read()) # Read file content into memory for processing
|
88 |
+
file_content.seek(0) # Go to the start of the file-like object
|
89 |
+
|
90 |
+
object_name = os.path.basename(input_file.name)
|
91 |
+
|
92 |
+
# Check if the uploaded file is an image or needs conversion
|
93 |
images = []
|
94 |
+
if is_image_file(object_name):
|
95 |
+
images.append(Image.open(file_content))
|
96 |
+
file_content.seek(0) # Reset for potential re-use
|
97 |
else:
|
98 |
+
# Convert PDF/TIFF to images
|
99 |
+
images.extend(convert_from_path(file_content))
|
100 |
|
101 |
csv_output = BytesIO()
|
102 |
writer = csv.writer(csv_output)
|
103 |
|
104 |
for i, image in enumerate(images):
|
105 |
+
# Process each image and upload to S3 for Textract processing
|
106 |
image_byte_array = BytesIO()
|
107 |
image.save(image_byte_array, format='JPEG')
|
108 |
image_byte_array.seek(0)
|
|
|
114 |
tables = [block for block in blocks if block['BlockType'] == "TABLE"]
|
115 |
generate_table_csv(tables, blocks_map, writer)
|
116 |
|
117 |
+
csv_output.seek(0) # Go to the start of the CSV in-memory file
|
118 |
return csv_output, output_csv_path
|
119 |
|
120 |
# Gradio Interface
|