File size: 3,851 Bytes
0b2c576
 
 
 
 
0902287
7535d0e
9a0ee18
36a270b
7535d0e
36a270b
 
 
590c4a4
 
7535d0e
 
 
36a270b
 
 
 
 
 
 
 
6d9b971
0f20ab3
36a270b
 
 
 
7a50d0c
36a270b
 
 
 
 
 
 
 
 
 
7535d0e
36a270b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b73b6f
 
 
 
 
 
36a270b
 
 
 
 
691826b
36a270b
 
 
9a0ee18
36a270b
6f8da54
d32c49f
36a270b
 
 
27ad10a
8f88289
7535d0e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import os  
import base64  
from base64 import urlsafe_b64encode 
import requests  
from supabase_models import Supabase_Client  
from authenticate import get_access_token_v1

def extract_structure_store_message(user_id:str,message_id:str , attachment_id:str,attachment_extension:str,email:str):

    if attachment_id and message_id:
        project_id = os.getenv('PROJECT_ID')
        processor_id = os.getenv('PROCESSOR_ID')
        document_entities = {}

        file_name = f"{message_id}_{attachment_id}.{attachment_extension}"
        print(f"file_name: {file_name}")
        supabase = Supabase_Client().instance
        try:
            response = supabase.storage.from_("receipt_radar").download(
                  file_name
                )
            base64_data = urlsafe_b64encode(response).decode('utf-8')

            payload = {
                    "skipHumanReview": True,
                    "rawDocument": {
                        "mimeType": f"application/{attachment_extension}",
                        "content": base64_data
                    }
                }
    
            access_token = get_access_token_v1()
            print(access_token)
    
            headers = {
                'Authorization': f'Bearer {access_token}',
                'Content-Type': 'application/json; charset=utf-8'
            }
    
            response = requests.post(
                f'https://us-documentai.googleapis.com/v1/projects/{project_id}/locations/us/processors/{processor_id}:process',
                headers=headers,
                json=payload
            )
            response_json = response.json()
            allowed_entities = [
                        "due_date",
                        "invoice_date",
                        "total_amount",
                        "total_tax_amount",
                        "receiver_name",
                        "invoice_id",
                        "currency",
                        "receiver_address",
                        "invoice_type",
                        "supplier_name",
                        "payment_terms",
                        "line_item",
                        "line_item/description",
                        "line_item/quantity",
                        "line_item/amount",
                        "line_item/unit_price"
                    ]
            raw_text = response_json.get('document').get('text' , None)
            entities = response_json.get('document').get('entities' , None)
            document_entities['user_id'] = user_id
            insert_ocr_data_response =  (
                        supabase.table("receipt_ocr_data")
                        .insert({'user_id':user_id , 'message_id':message_id,'receipt_text':raw_text ,'email':email})
                        .execute()
                    )
            
            print('Printing entities')
            print(entities)
            if entities is not None:
                for ent in entities:
                    if ent.get('type') is not None:
                        if ent.get('type') in allowed_entities:
                            mention_text = ent.get('mentionText')
                            normalised_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None
                            document_entities[ent.get('type')] = {"mention_text":mention_text,"normalizedValue":normalised_values}
            document_entities['email'] = email
            print(document_entities)
            insert_data_response =  (
                        supabase.table("document_ai_entities")
                        .insert(document_entities)
                        .execute()
                    )
            print(insert_data_response)
            
        except Exception as e:
            print(f"Error downloading or encoding file: {e}")