Spaces:
Sleeping
Sleeping
File size: 6,090 Bytes
8190662 4098f17 8190662 7853c7c d72a34e 7853c7c 8190662 72a80a7 1ce2e79 c228d77 335dd53 3376cac 17117d6 8190662 6e37b9b dac4d7c 8190662 52022cd 8190662 52022cd 8190662 f111a58 8190662 7853c7c 8190662 7853c7c 8190662 7853c7c 8190662 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import os
import base64
import random
import string
from base64 import urlsafe_b64encode
import requests
from supabase_models import Supabase_Client
from authenticate import get_access_token_v1
def generate_custom_random_string():
# Length of the provided string (16 characters)
length = 16
# Define the character set (hexadecimal characters: 0-9 and a-f)
characters = string.hexdigits.lower()
# Generate the random string
random_string = ''.join(random.choice(characters) for _ in range(length))
# Add "RAN" in front of the string
return "RAN" + random_string
async def extract_structure_store_message(filename:str,filename_path:str,user_id:str,email:str):
message_id = generate_custom_random_string()
attachment_id = 'UPLOADED_MANUALLY'
if attachment_id and message_id:
project_id = os.getenv('PROJECT_ID')
processor_id = os.getenv('PROCESSOR_ID')
document_entities = {}
print(filename)
attachment_extension = filename.split('.')[1]
print("printing attachment extension")
print(attachment_extension)
print(filename)
# file_name = f"{message_id}_{attachment_id}"
# print(f"file_name: {file_name}")
print(filename_path)
supabase = Supabase_Client().instance
try:
response = supabase.storage.from_(f"all_card_assets").download(
filename_path
)
base64_data = urlsafe_b64encode(response).decode('utf-8')
if attachment_extension == 'pdf':
payload = {
"skipHumanReview": True,
"rawDocument": {
"mimeType": f"application/{attachment_extension}",
"content": base64_data
}
}
elif attachment_extension == 'jpg' or attachment_extension == 'png':
payload = {
"skipHumanReview": True,
"rawDocument": {
"mimeType": f"image/{attachment_extension}",
"content": base64_data
}
}
access_token = get_access_token_v1()
print(access_token)
headers = {
'Authorization': f'Bearer {access_token}',
'Content-Type': 'application/json; charset=utf-8'
}
response = requests.post(
f'https://us-documentai.googleapis.com/v1/projects/{project_id}/locations/us/processors/{processor_id}:process',
headers=headers,
json=payload
)
response_json = response.json()
print(response_json)
allowed_entities = [
"credit_card_last_four_digits",
"currency",
"end_date",
"net_amount",
"payment_type",
"purchase_time",
"receipt_date",
"start_date",
"supplier_address",
"supplier_city",
"supplier_name",
"tip_amount",
"total_amount",
"line_item/quantity",
"line_item/amount",
"line_item/unit_price"
]
raw_text = response_json.get('document').get('text' , None)
entities = response_json.get('document').get('entities' , None)
document_entities['user_id'] = user_id
insert_ocr_data_response = (
supabase.table("receipt_ocr_data")
.insert({'user_id':user_id , 'message_id':message_id,'receipt_text':raw_text ,'email':email,'file_type':attachment_extension})
.execute()
)
print('Printing entities')
print(entities)
# if entities is not None:
# for ent in entities:
# if ent.get('type') is not None:
# if ent.get('type') in allowed_entities:
# mention_text = ent.get('mentionText')
# normalised_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None
# document_entities[ent.get('type')] = {"mention_text":mention_text,"normalizedValue":normalised_values}
if entities is not None:
for ent in entities:
if ent.get('type') is not None:
entity_type = ent.get('type')
if entity_type in allowed_entities:
mention_text = ent.get('mentionText')
normalized_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None
# Initialize a list for the entity type if not already present
if entity_type not in document_entities:
document_entities[entity_type] = []
# Append the entity data to the list
document_entities[entity_type].append({
"mention_text": mention_text,
"normalizedValue": normalized_values
})
document_entities['email'] = email
document_entities['message_id'] = message_id
print(document_entities)
insert_data_response = (
supabase.table("document_ai_entities")
.insert(document_entities)
.execute()
)
print(insert_data_response)
except Exception as e:
print(f"Error downloading or encoding file: {e}") |