Spaces:
Sleeping
Sleeping
File size: 3,851 Bytes
0b2c576 0902287 7535d0e 9a0ee18 36a270b 7535d0e 36a270b 590c4a4 7535d0e 36a270b 6d9b971 0f20ab3 36a270b 7a50d0c 36a270b 7535d0e 36a270b 6b73b6f 36a270b 691826b 36a270b 9a0ee18 36a270b 6f8da54 d32c49f 36a270b 27ad10a 8f88289 7535d0e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import os
import base64
from base64 import urlsafe_b64encode
import requests
from supabase_models import Supabase_Client
from authenticate import get_access_token_v1
def extract_structure_store_message(user_id:str,message_id:str , attachment_id:str,attachment_extension:str,email:str):
if attachment_id and message_id:
project_id = os.getenv('PROJECT_ID')
processor_id = os.getenv('PROCESSOR_ID')
document_entities = {}
file_name = f"{message_id}_{attachment_id}.{attachment_extension}"
print(f"file_name: {file_name}")
supabase = Supabase_Client().instance
try:
response = supabase.storage.from_("receipt_radar").download(
file_name
)
base64_data = urlsafe_b64encode(response).decode('utf-8')
payload = {
"skipHumanReview": True,
"rawDocument": {
"mimeType": f"application/{attachment_extension}",
"content": base64_data
}
}
access_token = get_access_token_v1()
print(access_token)
headers = {
'Authorization': f'Bearer {access_token}',
'Content-Type': 'application/json; charset=utf-8'
}
response = requests.post(
f'https://us-documentai.googleapis.com/v1/projects/{project_id}/locations/us/processors/{processor_id}:process',
headers=headers,
json=payload
)
response_json = response.json()
allowed_entities = [
"due_date",
"invoice_date",
"total_amount",
"total_tax_amount",
"receiver_name",
"invoice_id",
"currency",
"receiver_address",
"invoice_type",
"supplier_name",
"payment_terms",
"line_item",
"line_item/description",
"line_item/quantity",
"line_item/amount",
"line_item/unit_price"
]
raw_text = response_json.get('document').get('text' , None)
entities = response_json.get('document').get('entities' , None)
document_entities['user_id'] = user_id
insert_ocr_data_response = (
supabase.table("receipt_ocr_data")
.insert({'user_id':user_id , 'message_id':message_id,'receipt_text':raw_text ,'email':email})
.execute()
)
print('Printing entities')
print(entities)
if entities is not None:
for ent in entities:
if ent.get('type') is not None:
if ent.get('type') in allowed_entities:
mention_text = ent.get('mentionText')
normalised_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None
document_entities[ent.get('type')] = {"mention_text":mention_text,"normalizedValue":normalised_values}
document_entities['email'] = email
print(document_entities)
insert_data_response = (
supabase.table("document_ai_entities")
.insert(document_entities)
.execute()
)
print(insert_data_response)
except Exception as e:
print(f"Error downloading or encoding file: {e}") |