Spaces:
Sleeping
Sleeping
import os | |
import base64 | |
import random | |
import string | |
from base64 import urlsafe_b64encode | |
import requests | |
from supabase_models import Supabase_Client | |
from authenticate import get_access_token_v1 | |
def generate_custom_random_string(): | |
# Length of the provided string (16 characters) | |
length = 16 | |
# Define the character set (hexadecimal characters: 0-9 and a-f) | |
characters = string.hexdigits.lower() | |
# Generate the random string | |
random_string = ''.join(random.choice(characters) for _ in range(length)) | |
# Add "RAN" in front of the string | |
return "RAN" + random_string | |
async def extract_structure_store_message(filename:str,filename_path:str,user_id:str,email:str): | |
message_id = generate_custom_random_string() | |
attachment_id = 'UPLOADED_MANUALLY' | |
if attachment_id and message_id: | |
project_id = os.getenv('PROJECT_ID') | |
processor_id = os.getenv('PROCESSOR_ID') | |
document_entities = {} | |
print(filename) | |
attachment_extension = filename.split('.')[1] | |
print("printing attachment extension") | |
print(attachment_extension) | |
print(filename) | |
# file_name = f"{message_id}_{attachment_id}" | |
# print(f"file_name: {file_name}") | |
print(filename_path) | |
supabase = Supabase_Client().instance | |
try: | |
response = supabase.storage.from_(f"all_card_assets").download( | |
filename_path | |
) | |
base64_data = urlsafe_b64encode(response).decode('utf-8') | |
if attachment_extension == 'pdf': | |
payload = { | |
"skipHumanReview": True, | |
"rawDocument": { | |
"mimeType": f"application/{attachment_extension}", | |
"content": base64_data | |
} | |
} | |
elif attachment_extension == 'jpg' or attachment_extension == 'png': | |
payload = { | |
"skipHumanReview": True, | |
"rawDocument": { | |
"mimeType": f"image/{attachment_extension}", | |
"content": base64_data | |
} | |
} | |
access_token = get_access_token_v1() | |
print(access_token) | |
headers = { | |
'Authorization': f'Bearer {access_token}', | |
'Content-Type': 'application/json; charset=utf-8' | |
} | |
response = requests.post( | |
f'https://us-documentai.googleapis.com/v1/projects/{project_id}/locations/us/processors/{processor_id}:process', | |
headers=headers, | |
json=payload | |
) | |
response_json = response.json() | |
print(response_json) | |
allowed_entities = [ | |
"credit_card_last_four_digits", | |
"currency", | |
"end_date", | |
"net_amount", | |
"payment_type", | |
"purchase_time", | |
"receipt_date", | |
"start_date", | |
"supplier_address", | |
"supplier_city", | |
"supplier_name", | |
"tip_amount", | |
"total_amount", | |
"line_item/quantity", | |
"line_item/amount", | |
"line_item/unit_price" | |
] | |
raw_text = response_json.get('document').get('text' , None) | |
entities = response_json.get('document').get('entities' , None) | |
document_entities['user_id'] = user_id | |
insert_ocr_data_response = ( | |
supabase.table("receipt_ocr_data") | |
.insert({'user_id':user_id , 'message_id':message_id,'receipt_text':raw_text ,'email':email,'file_type':attachment_extension}) | |
.execute() | |
) | |
print('Printing entities') | |
print(entities) | |
# if entities is not None: | |
# for ent in entities: | |
# if ent.get('type') is not None: | |
# if ent.get('type') in allowed_entities: | |
# mention_text = ent.get('mentionText') | |
# normalised_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None | |
# document_entities[ent.get('type')] = {"mention_text":mention_text,"normalizedValue":normalised_values} | |
if entities is not None: | |
for ent in entities: | |
if ent.get('type') is not None: | |
entity_type = ent.get('type') | |
if entity_type in allowed_entities: | |
mention_text = ent.get('mentionText') | |
normalized_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None | |
# Initialize a list for the entity type if not already present | |
if entity_type not in document_entities: | |
document_entities[entity_type] = [] | |
# Append the entity data to the list | |
document_entities[entity_type].append({ | |
"mention_text": mention_text, | |
"normalizedValue": normalized_values | |
}) | |
document_entities['email'] = email | |
document_entities['message_id'] = message_id | |
print(document_entities) | |
insert_data_response = ( | |
supabase.table("document_ai_entities") | |
.insert(document_entities) | |
.execute() | |
) | |
print(insert_data_response) | |
except Exception as e: | |
print(f"Error downloading or encoding file: {e}") |