danial0203 commited on
Commit
5caebda
1 Parent(s): 1ef063e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -10
app.py CHANGED
@@ -22,13 +22,6 @@ s3 = s3fs.S3FileSystem(
22
  # textract_client = boto3.client('textract', region_name=region_name)
23
  textract_client = boto3.client('textract', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, region_name=region_name)
24
 
25
- # textract_client = boto3.client(
26
- # 'textract',
27
- # aws_access_key_id=('AWS_ACCESS_KEY'),
28
- # aws_secret_access_key=('AWS_SECRET_KEY'),
29
- # region_name=('AWS_REGION')
30
- # )
31
-
32
  def upload_file_to_s3(file_path, bucket, object_name=None):
33
  if object_name is None:
34
  object_name = os.path.basename(file_path)
@@ -53,7 +46,8 @@ def process_image(file_path, s3_bucket, textract_client):
53
  return response
54
 
55
  def generate_table_csv(tables, blocks_map, csv_output_path):
56
- with open(csv_output_path, 'w', newline='') as csvfile:
 
57
  writer = csv.writer(csvfile)
58
  for table in tables:
59
  rows = get_rows_columns_map(table, blocks_map)
@@ -62,7 +56,7 @@ def generate_table_csv(tables, blocks_map, csv_output_path):
62
  for col_index in range(1, max(cols.keys()) + 1):
63
  row.append(cols.get(col_index, ""))
64
  writer.writerow(row)
65
-
66
  def get_rows_columns_map(table_result, blocks_map):
67
  rows = {}
68
  for relationship in table_result['Relationships']:
@@ -94,7 +88,8 @@ def get_text(result, blocks_map):
94
  def process_file_and_generate_csv(file_path):
95
  # The file_path is directly usable; no need to check for attributes or methods
96
 
97
- csv_output_path = "/tmp/output.csv"
 
98
 
99
  if file_path.lower().endswith(('.png', '.jpg', '.jpeg')):
100
  images = [Image.open(file_path)]
 
22
  # textract_client = boto3.client('textract', region_name=region_name)
23
  textract_client = boto3.client('textract', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, region_name=region_name)
24
 
 
 
 
 
 
 
 
25
  def upload_file_to_s3(file_path, bucket, object_name=None):
26
  if object_name is None:
27
  object_name = os.path.basename(file_path)
 
46
  return response
47
 
48
  def generate_table_csv(tables, blocks_map, csv_output_path):
49
+ # Open the CSV file in append mode to add content without overwriting existing data
50
+ with open(csv_output_path, 'a', newline='') as csvfile:
51
  writer = csv.writer(csvfile)
52
  for table in tables:
53
  rows = get_rows_columns_map(table, blocks_map)
 
56
  for col_index in range(1, max(cols.keys()) + 1):
57
  row.append(cols.get(col_index, ""))
58
  writer.writerow(row)
59
+
60
  def get_rows_columns_map(table_result, blocks_map):
61
  rows = {}
62
  for relationship in table_result['Relationships']:
 
88
  def process_file_and_generate_csv(file_path):
89
  # The file_path is directly usable; no need to check for attributes or methods
90
 
91
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
92
+ csv_output_path = f"/tmp/output_{timestamp}.csv"
93
 
94
  if file_path.lower().endswith(('.png', '.jpg', '.jpeg')):
95
  images = [Image.open(file_path)]