import os import base64 import random import string from base64 import urlsafe_b64encode import requests from supabase_models import Supabase_Client from authenticate import get_access_token_v1 def generate_custom_random_string(): # Length of the provided string (16 characters) length = 16 # Define the character set (hexadecimal characters: 0-9 and a-f) characters = string.hexdigits.lower() # Generate the random string random_string = ''.join(random.choice(characters) for _ in range(length)) # Add "RAN" in front of the string return "RAN" + random_string async def extract_structure_store_message(filename:str,filename_path:str,user_id:str,email:str): message_id = generate_custom_random_string() attachment_id = 'UPLOADED_MANUALLY' if attachment_id and message_id: project_id = os.getenv('PROJECT_ID') processor_id = os.getenv('PROCESSOR_ID') document_entities = {} print(filename) attachment_extension = filename.split('.')[1] print("printing attachment extension") print(attachment_extension) print(filename) # file_name = f"{message_id}_{attachment_id}" # print(f"file_name: {file_name}") print(filename_path) supabase = Supabase_Client().instance try: response = supabase.storage.from_(f"all_card_assets").download( filename_path ) base64_data = urlsafe_b64encode(response).decode('utf-8') if attachment_extension == 'pdf': payload = { "skipHumanReview": True, "rawDocument": { "mimeType": f"application/{attachment_extension}", "content": base64_data } } elif attachment_extension == 'jpg' or attachment_extension == 'png': payload = { "skipHumanReview": True, "rawDocument": { "mimeType": f"image/{attachment_extension}", "content": base64_data } } access_token = get_access_token_v1() print(access_token) headers = { 'Authorization': f'Bearer {access_token}', 'Content-Type': 'application/json; charset=utf-8' } response = requests.post( f'https://us-documentai.googleapis.com/v1/projects/{project_id}/locations/us/processors/{processor_id}:process', headers=headers, json=payload ) response_json = response.json() print(response_json) allowed_entities = [ "credit_card_last_four_digits", "currency", "end_date", "net_amount", "payment_type", "purchase_time", "receipt_date", "start_date", "supplier_address", "supplier_city", "supplier_name", "tip_amount", "total_amount", "line_item/quantity", "line_item/amount", "line_item/unit_price" ] raw_text = response_json.get('document').get('text' , None) entities = response_json.get('document').get('entities' , None) document_entities['user_id'] = user_id insert_ocr_data_response = ( supabase.table("receipt_ocr_data") .insert({'user_id':user_id , 'message_id':message_id,'receipt_text':raw_text ,'email':email,'file_type':attachment_extension}) .execute() ) print('Printing entities') print(entities) # if entities is not None: # for ent in entities: # if ent.get('type') is not None: # if ent.get('type') in allowed_entities: # mention_text = ent.get('mentionText') # normalised_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None # document_entities[ent.get('type')] = {"mention_text":mention_text,"normalizedValue":normalised_values} if entities is not None: for ent in entities: if ent.get('type') is not None: entity_type = ent.get('type') if entity_type in allowed_entities: mention_text = ent.get('mentionText') normalized_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None # Initialize a list for the entity type if not already present if entity_type not in document_entities: document_entities[entity_type] = [] # Append the entity data to the list document_entities[entity_type].append({ "mention_text": mention_text, "normalizedValue": normalized_values }) document_entities['email'] = email document_entities['message_id'] = message_id print(document_entities) insert_data_response = ( supabase.table("document_ai_entities") .insert(document_entities) .execute() ) print(insert_data_response) except Exception as e: print(f"Error downloading or encoding file: {e}")