Spaces:
Sleeping
Sleeping
File size: 6,214 Bytes
0b2c576 0902287 7535d0e 9a0ee18 36a270b 7535d0e 36a270b 590c4a4 7535d0e 36a270b 6d9b971 0f20ab3 36a270b 7a50d0c 36a270b 7535d0e 36a270b 6b73b6f 4fcd10d 6b73b6f 36a270b 6306de6 36a270b 6306de6 0249415 6306de6 36a270b 6306de6 0249415 6306de6 0249415 6306de6 0249415 9a0ee18 2abbb8c 36a270b 6f8da54 d32c49f 36a270b 27ad10a 8f88289 7535d0e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import os
import base64
from base64 import urlsafe_b64encode
import requests
from supabase_models import Supabase_Client
from authenticate import get_access_token_v1
def extract_structure_store_message(user_id:str,message_id:str , attachment_id:str,attachment_extension:str,email:str):
if attachment_id and message_id:
project_id = os.getenv('PROJECT_ID')
processor_id = os.getenv('PROCESSOR_ID')
document_entities = {}
file_name = f"{message_id}_{attachment_id}.{attachment_extension}"
print(f"file_name: {file_name}")
supabase = Supabase_Client().instance
try:
response = supabase.storage.from_("receipt_radar").download(
file_name
)
base64_data = urlsafe_b64encode(response).decode('utf-8')
payload = {
"skipHumanReview": True,
"rawDocument": {
"mimeType": f"application/{attachment_extension}",
"content": base64_data
}
}
access_token = get_access_token_v1()
print(access_token)
headers = {
'Authorization': f'Bearer {access_token}',
'Content-Type': 'application/json; charset=utf-8'
}
response = requests.post(
f'https://us-documentai.googleapis.com/v1/projects/{project_id}/locations/us/processors/{processor_id}:process',
headers=headers,
json=payload
)
response_json = response.json()
allowed_entities = [
"due_date",
"invoice_date",
"total_amount",
"total_tax_amount",
"receiver_name",
"invoice_id",
"currency",
"receiver_address",
"invoice_type",
"supplier_name",
"payment_terms",
"line_item",
"line_item/description",
"line_item/quantity",
"line_item/amount",
"line_item/unit_price"
]
raw_text = response_json.get('document').get('text' , None)
entities = response_json.get('document').get('entities' , None)
document_entities['user_id'] = user_id
insert_ocr_data_response = (
supabase.table("receipt_ocr_data")
.insert({'user_id':user_id , 'message_id':message_id,'receipt_text':raw_text ,'email':email,'file_type':attachment_extension})
.execute()
)
print('Printing entities')
print(entities)
# if entities is not None:
# for ent in entities:
# if ent.get('type') is not None:
# if ent.get('type') in allowed_entities:
# mention_text = ent.get('mentionText')
# normalised_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None
# document_entities[ent.get('type')] = {"mention_text":mention_text,"normalizedValue":normalised_values}
if entities is not None:
for ent in entities:
if ent.get('type') is not None:
entity_type = ent.get('type')
# Check if the entity type is in the allowed list
if entity_type in allowed_entities:
mention_text = ent.get('mentionText')
normalized_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None
# Initialize a list for the entity type if not already present
if entity_type not in document_entities:
document_entities[entity_type] = []
# Append the entity data to the list
document_entities[entity_type].append({
"mention_text": mention_text,
"normalizedValue": normalized_values
})
# Handling 'line_item' and its properties (line_item/description, line_item/quantity, etc.)
if entity_type == 'line_item' and 'properties' in ent:
for prop in ent['properties']:
prop_type = prop.get('type')
if prop_type in allowed_entities:
mention_text = prop.get('mentionText')
normalized_values = prop.get('normalizedValue') if 'normalizedValue' in prop else None
# Initialize a list for the property type if not already present
if prop_type not in document_entities:
document_entities[prop_type] = []
# Append the property data to the list
document_entities[prop_type].append({
"mention_text": mention_text,
"normalizedValue": normalized_values
})
document_entities['email'] = email
document_entities['message_id'] = message_id
print(document_entities)
insert_data_response = (
supabase.table("document_ai_entities")
.insert(document_entities)
.execute()
)
print(insert_data_response)
except Exception as e:
print(f"Error downloading or encoding file: {e}") |