Spaces:
Runtime error
Runtime error
danial0203
commited on
Commit
•
5caebda
1
Parent(s):
1ef063e
Update app.py
Browse files
app.py
CHANGED
@@ -22,13 +22,6 @@ s3 = s3fs.S3FileSystem(
|
|
22 |
# textract_client = boto3.client('textract', region_name=region_name)
|
23 |
textract_client = boto3.client('textract', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, region_name=region_name)
|
24 |
|
25 |
-
# textract_client = boto3.client(
|
26 |
-
# 'textract',
|
27 |
-
# aws_access_key_id=('AWS_ACCESS_KEY'),
|
28 |
-
# aws_secret_access_key=('AWS_SECRET_KEY'),
|
29 |
-
# region_name=('AWS_REGION')
|
30 |
-
# )
|
31 |
-
|
32 |
def upload_file_to_s3(file_path, bucket, object_name=None):
|
33 |
if object_name is None:
|
34 |
object_name = os.path.basename(file_path)
|
@@ -53,7 +46,8 @@ def process_image(file_path, s3_bucket, textract_client):
|
|
53 |
return response
|
54 |
|
55 |
def generate_table_csv(tables, blocks_map, csv_output_path):
|
56 |
-
|
|
|
57 |
writer = csv.writer(csvfile)
|
58 |
for table in tables:
|
59 |
rows = get_rows_columns_map(table, blocks_map)
|
@@ -62,7 +56,7 @@ def generate_table_csv(tables, blocks_map, csv_output_path):
|
|
62 |
for col_index in range(1, max(cols.keys()) + 1):
|
63 |
row.append(cols.get(col_index, ""))
|
64 |
writer.writerow(row)
|
65 |
-
|
66 |
def get_rows_columns_map(table_result, blocks_map):
|
67 |
rows = {}
|
68 |
for relationship in table_result['Relationships']:
|
@@ -94,7 +88,8 @@ def get_text(result, blocks_map):
|
|
94 |
def process_file_and_generate_csv(file_path):
|
95 |
# The file_path is directly usable; no need to check for attributes or methods
|
96 |
|
97 |
-
|
|
|
98 |
|
99 |
if file_path.lower().endswith(('.png', '.jpg', '.jpeg')):
|
100 |
images = [Image.open(file_path)]
|
|
|
22 |
# textract_client = boto3.client('textract', region_name=region_name)
|
23 |
textract_client = boto3.client('textract', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, region_name=region_name)
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
def upload_file_to_s3(file_path, bucket, object_name=None):
|
26 |
if object_name is None:
|
27 |
object_name = os.path.basename(file_path)
|
|
|
46 |
return response
|
47 |
|
48 |
def generate_table_csv(tables, blocks_map, csv_output_path):
|
49 |
+
# Open the CSV file in append mode to add content without overwriting existing data
|
50 |
+
with open(csv_output_path, 'a', newline='') as csvfile:
|
51 |
writer = csv.writer(csvfile)
|
52 |
for table in tables:
|
53 |
rows = get_rows_columns_map(table, blocks_map)
|
|
|
56 |
for col_index in range(1, max(cols.keys()) + 1):
|
57 |
row.append(cols.get(col_index, ""))
|
58 |
writer.writerow(row)
|
59 |
+
|
60 |
def get_rows_columns_map(table_result, blocks_map):
|
61 |
rows = {}
|
62 |
for relationship in table_result['Relationships']:
|
|
|
88 |
def process_file_and_generate_csv(file_path):
|
89 |
# The file_path is directly usable; no need to check for attributes or methods
|
90 |
|
91 |
+
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
92 |
+
csv_output_path = f"/tmp/output_{timestamp}.csv"
|
93 |
|
94 |
if file_path.lower().endswith(('.png', '.jpg', '.jpeg')):
|
95 |
images = [Image.open(file_path)]
|