Spaces:
Sleeping
Sleeping
File size: 5,557 Bytes
8190662 4098f17 8190662 7853c7c 8eaf834 7853c7c 8190662 071ea1b c228d77 335dd53 3376cac 8190662 3376cac 8190662 f111a58 8190662 7853c7c 8190662 7853c7c 8190662 7853c7c 8190662 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import os
import base64
import random
import string
from base64 import urlsafe_b64encode
import requests
from supabase_models import Supabase_Client
from authenticate import get_access_token_v1
def generate_custom_random_string():
# Length of the provided string (16 characters)
length = 16
# Define the character set (hexadecimal characters: 0-9 and a-f)
characters = string.hexdigits.lower()
# Generate the random string
random_string = ''.join(random.choice(characters) for _ in range(length))
# Add "RAN" in front of the string
return "RAN" + random_string
async def extract_structure_store_message(filename:str,user_id:str):
message_id = generate_custom_random_string()
attachment_id = 'UPLOADED_MANUALLY'
if attachment_id and message_id:
project_id = os.getenv('PROJECT_ID')
processor_id = os.getenv('PROCESSOR_ID')
document_entities = {}
attachment_extension = filename.split('.')[1].upper()
print("printing attachment extension")
print(attachment_extension)
print(filename)
# file_name = f"{message_id}_{attachment_id}"
# print(f"file_name: {file_name}")
supabase = Supabase_Client().instance
try:
response = supabase.storage.from_("receipt_radar").download(
filename
)
base64_data = urlsafe_b64encode(response).decode('utf-8')
payload = {
"skipHumanReview": True,
"rawDocument": {
"mimeType": f"application/{attachment_extension}",
"content": base64_data
}
}
access_token = get_access_token_v1()
print(access_token)
headers = {
'Authorization': f'Bearer {access_token}',
'Content-Type': 'application/json; charset=utf-8'
}
response = requests.post(
f'https://us-documentai.googleapis.com/v1/projects/{project_id}/locations/us/processors/{processor_id}:process',
headers=headers,
json=payload
)
response_json = response.json()
print(response_json)
allowed_entities = [
"credit_card_last_four_digits",
"currency",
"end_date",
"net_amount",
"payment_type",
"purchase_time",
"receipt_date",
"start_date",
"supplier_address",
"supplier_city",
"supplier_name",
"tip_amount",
"total_amount",
"line_item/quantity",
"line_item/amount",
"line_item/unit_price"
]
raw_text = response_json.get('document').get('text' , None)
entities = response_json.get('document').get('entities' , None)
document_entities['user_id'] = user_id
insert_ocr_data_response = (
supabase.table("receipt_ocr_data")
.insert({'user_id':user_id , 'message_id':message_id,'receipt_text':raw_text ,'email':email,'file_type':attachment_extension})
.execute()
)
print('Printing entities')
print(entities)
# if entities is not None:
# for ent in entities:
# if ent.get('type') is not None:
# if ent.get('type') in allowed_entities:
# mention_text = ent.get('mentionText')
# normalised_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None
# document_entities[ent.get('type')] = {"mention_text":mention_text,"normalizedValue":normalised_values}
if entities is not None:
for ent in entities:
if ent.get('type') is not None:
entity_type = ent.get('type')
if entity_type in allowed_entities:
mention_text = ent.get('mentionText')
normalized_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None
# Initialize a list for the entity type if not already present
if entity_type not in document_entities:
document_entities[entity_type] = []
# Append the entity data to the list
document_entities[entity_type].append({
"mention_text": mention_text,
"normalizedValue": normalized_values
})
document_entities['email'] = email
document_entities['message_id'] = message_id
print(document_entities)
insert_data_response = (
supabase.table("document_ai_entities")
.insert(document_entities)
.execute()
)
print(insert_data_response)
except Exception as e:
print(f"Error downloading or encoding file: {e}") |