File size: 4,838 Bytes
8190662
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import os  
import base64  
from base64 import urlsafe_b64encode 
import requests  
from supabase_models import Supabase_Client  
from authenticate import get_access_token_v1

def extract_structure_store_message(filename:str):

    if attachment_id and message_id:
        project_id = os.getenv('PROJECT_ID')
        processor_id = os.getenv('PROCESSOR_ID')
        document_entities = {}

        file_name = f"{message_id}_{attachment_id}.{attachment_extension}"
        print(f"file_name: {file_name}")
        supabase = Supabase_Client().instance
        try:
            response = supabase.storage.from_("receipt_radar").download(
                  file_name
                )
            base64_data = urlsafe_b64encode(response).decode('utf-8')

            payload = {
                    "skipHumanReview": True,
                    "rawDocument": {
                        "mimeType": f"application/{attachment_extension}",
                        "content": base64_data
                    }
                }
    
            access_token = get_access_token_v1()
            print(access_token)
    
            headers = {
                'Authorization': f'Bearer {access_token}',
                'Content-Type': 'application/json; charset=utf-8'
            }
    
            response = requests.post(
                f'https://us-documentai.googleapis.com/v1/projects/{project_id}/locations/us/processors/{processor_id}:process',
                headers=headers,
                json=payload
            )
            response_json = response.json()
            allowed_entities = [
                        "due_date",
                        "invoice_date",
                        "total_amount",
                        "total_tax_amount",
                        "receiver_name",
                        "invoice_id",
                        "currency",
                        "receiver_address",
                        "invoice_type",
                        "supplier_name",
                        "payment_terms",
                        "line_item",
                        "line_item/description",
                        "line_item/quantity",
                        "line_item/amount",
                        "line_item/unit_price"
                    ]
            raw_text = response_json.get('document').get('text' , None)
            entities = response_json.get('document').get('entities' , None)
            document_entities['user_id'] = user_id
            insert_ocr_data_response =  (
                        supabase.table("receipt_ocr_data")
                        .insert({'user_id':user_id , 'message_id':message_id,'receipt_text':raw_text ,'email':email,'file_type':attachment_extension})
                        .execute()
                    )
            
            print('Printing entities')
            print(entities)
            # if entities is not None:
            #     for ent in entities:
            #         if ent.get('type') is not None:
            #             if ent.get('type') in allowed_entities:
            #                 mention_text = ent.get('mentionText')
            #                 normalised_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None
            #                 document_entities[ent.get('type')] = {"mention_text":mention_text,"normalizedValue":normalised_values}
            if entities is not None:
                for ent in entities:
                    if ent.get('type') is not None:
                        entity_type = ent.get('type')
                        if entity_type in allowed_entities:
                            mention_text = ent.get('mentionText')
                            normalized_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None
    
                            # Initialize a list for the entity type if not already present
                            if entity_type not in document_entities:
                                document_entities[entity_type] = []
    
                            # Append the entity data to the list
                            document_entities[entity_type].append({
                                "mention_text": mention_text,
                                "normalizedValue": normalized_values
                            })
            document_entities['email'] = email
            document_entities['message_id'] = message_id
            print(document_entities)
            insert_data_response =  (
                        supabase.table("document_ai_entities")
                        .insert(document_entities)
                        .execute()
                    )
            print(insert_data_response)
            
        except Exception as e:
            print(f"Error downloading or encoding file: {e}")