Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -118,31 +118,6 @@ Return the response in the following JSoN response format:
|
|
118 |
error_pages.append(page_identifier)
|
119 |
return []
|
120 |
|
121 |
-
error_pages = [] # Initialize the list to track error pages or files
|
122 |
-
file_path = "/content/testing 2.pdf" # Replace with your actual file path
|
123 |
-
images = get_images(file_path)
|
124 |
-
print("Total images found:", len(images))
|
125 |
-
|
126 |
-
structured_data = []
|
127 |
-
for i, image in enumerate(images, start=1):
|
128 |
-
print(f"Processing image {i}...")
|
129 |
-
with TemporaryDirectory() as temp_dir:
|
130 |
-
image_path = os.path.join(temp_dir, "image.jpg")
|
131 |
-
image.save(image_path)
|
132 |
-
data = process_files_fixed(image_path, i, error_pages) # Pass the page number or identifier
|
133 |
-
print(f"{len(data)} records found...")
|
134 |
-
structured_data.extend(data)
|
135 |
-
|
136 |
-
if structured_data:
|
137 |
-
df = pd.DataFrame(structured_data)
|
138 |
-
csv_filename = 'results.csv'
|
139 |
-
df.to_csv(csv_filename, index=False)
|
140 |
-
print(f"Results saved to {csv_filename}")
|
141 |
-
else:
|
142 |
-
print("No data to save.")
|
143 |
-
|
144 |
-
if error_pages:
|
145 |
-
print(f"Errors or no records found in {len(error_pages)} pages/files: {error_pages}")
|
146 |
def process_pdf_and_generate_csv(file_path):
|
147 |
error_pages = [] # Initialize the list to track error pages or files
|
148 |
images = get_images(file_path)
|
|
|
118 |
error_pages.append(page_identifier)
|
119 |
return []
|
120 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
def process_pdf_and_generate_csv(file_path):
|
122 |
error_pages = [] # Initialize the list to track error pages or files
|
123 |
images = get_images(file_path)
|