Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -22,13 +22,6 @@ s3 = s3fs.S3FileSystem(
|
|
| 22 |
# textract_client = boto3.client('textract', region_name=region_name)
|
| 23 |
textract_client = boto3.client('textract', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, region_name=region_name)
|
| 24 |
|
| 25 |
-
# textract_client = boto3.client(
|
| 26 |
-
# 'textract',
|
| 27 |
-
# aws_access_key_id=('AWS_ACCESS_KEY'),
|
| 28 |
-
# aws_secret_access_key=('AWS_SECRET_KEY'),
|
| 29 |
-
# region_name=('AWS_REGION')
|
| 30 |
-
# )
|
| 31 |
-
|
| 32 |
def upload_file_to_s3(file_path, bucket, object_name=None):
|
| 33 |
if object_name is None:
|
| 34 |
object_name = os.path.basename(file_path)
|
|
@@ -53,7 +46,8 @@ def process_image(file_path, s3_bucket, textract_client):
|
|
| 53 |
return response
|
| 54 |
|
| 55 |
def generate_table_csv(tables, blocks_map, csv_output_path):
|
| 56 |
-
|
|
|
|
| 57 |
writer = csv.writer(csvfile)
|
| 58 |
for table in tables:
|
| 59 |
rows = get_rows_columns_map(table, blocks_map)
|
|
@@ -62,7 +56,7 @@ def generate_table_csv(tables, blocks_map, csv_output_path):
|
|
| 62 |
for col_index in range(1, max(cols.keys()) + 1):
|
| 63 |
row.append(cols.get(col_index, ""))
|
| 64 |
writer.writerow(row)
|
| 65 |
-
|
| 66 |
def get_rows_columns_map(table_result, blocks_map):
|
| 67 |
rows = {}
|
| 68 |
for relationship in table_result['Relationships']:
|
|
@@ -94,7 +88,8 @@ def get_text(result, blocks_map):
|
|
| 94 |
def process_file_and_generate_csv(file_path):
|
| 95 |
# The file_path is directly usable; no need to check for attributes or methods
|
| 96 |
|
| 97 |
-
|
|
|
|
| 98 |
|
| 99 |
if file_path.lower().endswith(('.png', '.jpg', '.jpeg')):
|
| 100 |
images = [Image.open(file_path)]
|
|
|
|
| 22 |
# textract_client = boto3.client('textract', region_name=region_name)
|
| 23 |
textract_client = boto3.client('textract', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, region_name=region_name)
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
def upload_file_to_s3(file_path, bucket, object_name=None):
|
| 26 |
if object_name is None:
|
| 27 |
object_name = os.path.basename(file_path)
|
|
|
|
| 46 |
return response
|
| 47 |
|
| 48 |
def generate_table_csv(tables, blocks_map, csv_output_path):
|
| 49 |
+
# Open the CSV file in append mode to add content without overwriting existing data
|
| 50 |
+
with open(csv_output_path, 'a', newline='') as csvfile:
|
| 51 |
writer = csv.writer(csvfile)
|
| 52 |
for table in tables:
|
| 53 |
rows = get_rows_columns_map(table, blocks_map)
|
|
|
|
| 56 |
for col_index in range(1, max(cols.keys()) + 1):
|
| 57 |
row.append(cols.get(col_index, ""))
|
| 58 |
writer.writerow(row)
|
| 59 |
+
|
| 60 |
def get_rows_columns_map(table_result, blocks_map):
|
| 61 |
rows = {}
|
| 62 |
for relationship in table_result['Relationships']:
|
|
|
|
| 88 |
def process_file_and_generate_csv(file_path):
|
| 89 |
# The file_path is directly usable; no need to check for attributes or methods
|
| 90 |
|
| 91 |
+
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 92 |
+
csv_output_path = f"/tmp/output_{timestamp}.csv"
|
| 93 |
|
| 94 |
if file_path.lower().endswith(('.png', '.jpg', '.jpeg')):
|
| 95 |
images = [Image.open(file_path)]
|