from typing import Type, List import pandas as pd import boto3 import tempfile import os from tools.config import AWS_REGION, RUN_AWS_FUNCTIONS, DOCUMENT_REDACTION_BUCKET PandasDataFrame = Type[pd.DataFrame] def get_assumed_role_info(): sts_endpoint = 'https://sts.' + AWS_REGION + '.amazonaws.com' sts = boto3.client('sts', region_name=AWS_REGION, endpoint_url=sts_endpoint) response = sts.get_caller_identity() # Extract ARN of the assumed role assumed_role_arn = response['Arn'] # Extract the name of the assumed role from the ARN assumed_role_name = assumed_role_arn.split('/')[-1] return assumed_role_arn, assumed_role_name if RUN_AWS_FUNCTIONS == "1": try: session = boto3.Session(region_name=AWS_REGION) except Exception as e: print("Could not start boto3 session:", e) try: assumed_role_arn, assumed_role_name = get_assumed_role_info() print("Successfully assumed ARN role") #print("Assumed Role ARN:", assumed_role_arn) #print("Assumed Role Name:", assumed_role_name) except Exception as e: print("Could not get assumed role from STS:", e) # Download direct from S3 - requires login credentials def download_file_from_s3(bucket_name:str, key:str, local_file_path_and_name:str, RUN_AWS_FUNCTIONS:str = RUN_AWS_FUNCTIONS): if RUN_AWS_FUNCTIONS == "1": try: print("bucket_name:", bucket_name) print("key:", key) print("local_file_path_and_name:", local_file_path_and_name) # Ensure the local directory exists os.makedirs(os.path.dirname(local_file_path_and_name), exist_ok=True) s3 = boto3.client('s3', region_name=AWS_REGION) s3.download_file(bucket_name, key, local_file_path_and_name) print(f"File downloaded from s3://{bucket_name}/{key} to {local_file_path_and_name}") except Exception as e: print("Could not download file:", key, "from s3 due to", e) def download_folder_from_s3(bucket_name:str, s3_folder:str, local_folder:str, RUN_AWS_FUNCTIONS:str = RUN_AWS_FUNCTIONS): """ Download all files from an S3 folder to a local folder. """ if RUN_AWS_FUNCTIONS == "1": if bucket_name and s3_folder and local_folder: s3 = boto3.client('s3', region_name=AWS_REGION) # List objects in the specified S3 folder response = s3.list_objects_v2(Bucket=bucket_name, Prefix=s3_folder) # Download each object for obj in response.get('Contents', []): # Extract object key and construct local file path object_key = obj['Key'] local_file_path = os.path.join(local_folder, os.path.relpath(object_key, s3_folder)) # Create directories if necessary os.makedirs(os.path.dirname(local_file_path), exist_ok=True) # Download the object try: s3.download_file(bucket_name, object_key, local_file_path) print(f"Downloaded 's3://{bucket_name}/{object_key}' to '{local_file_path}'") except Exception as e: print(f"Error downloading 's3://{bucket_name}/{object_key}':", e) else: print("One or more required variables are empty, could not download from S3") def download_files_from_s3(bucket_name:str, s3_folder:str, local_folder:str, filenames:List[str], RUN_AWS_FUNCTIONS:str = RUN_AWS_FUNCTIONS): """ Download specific files from an S3 folder to a local folder. """ if RUN_AWS_FUNCTIONS == "1": if bucket_name and s3_folder and local_folder and filenames: s3 = boto3.client('s3', region_name=AWS_REGION) print("Trying to download file: ", filenames) if filenames == '*': # List all objects in the S3 folder print("Trying to download all files in AWS folder: ", s3_folder) response = s3.list_objects_v2(Bucket=bucket_name, Prefix=s3_folder) print("Found files in AWS folder: ", response.get('Contents', [])) filenames = [obj['Key'].split('/')[-1] for obj in response.get('Contents', [])] print("Found filenames in AWS folder: ", filenames) for filename in filenames: object_key = os.path.join(s3_folder, filename) local_file_path = os.path.join(local_folder, filename) # Create directories if necessary os.makedirs(os.path.dirname(local_file_path), exist_ok=True) # Download the object try: s3.download_file(bucket_name, object_key, local_file_path) print(f"Downloaded 's3://{bucket_name}/{object_key}' to '{local_file_path}'") except Exception as e: print(f"Error downloading 's3://{bucket_name}/{object_key}':", e) else: print("One or more required variables are empty, could not download from S3") def upload_file_to_s3(local_file_paths:List[str], s3_key:str, s3_bucket:str=DOCUMENT_REDACTION_BUCKET, RUN_AWS_FUNCTIONS:str = RUN_AWS_FUNCTIONS): """ Uploads a file from local machine to Amazon S3. Args: - local_file_path: Local file path(s) of the file(s) to upload. - s3_key: Key (path) to the file in the S3 bucket. - s3_bucket: Name of the S3 bucket. Returns: - Message as variable/printed to console """ final_out_message = [] final_out_message_str = "" if RUN_AWS_FUNCTIONS == "1": try: if s3_bucket and s3_key and local_file_paths: s3_client = boto3.client('s3', region_name=AWS_REGION) if isinstance(local_file_paths, str): local_file_paths = [local_file_paths] for file in local_file_paths: if s3_client: #print(s3_client) try: # Get file name off file path file_name = os.path.basename(file) s3_key_full = s3_key + file_name print("S3 key: ", s3_key_full) s3_client.upload_file(file, s3_bucket, s3_key_full) out_message = "File " + file_name + " uploaded successfully!" print(out_message) except Exception as e: out_message = f"Error uploading file(s): {e}" print(out_message) final_out_message.append(out_message) final_out_message_str = '\n'.join(final_out_message) else: final_out_message_str = "Could not connect to AWS." else: final_out_message_str = "At least one essential variable is empty, could not upload to S3" except Exception as e: final_out_message_str = "Could not upload files to S3 due to: " + str(e) print(final_out_message_str) else: final_out_message_str = "App not set to run AWS functions" return final_out_message_str