File size: 6,214 Bytes
0b2c576
 
 
 
 
0902287
7535d0e
9a0ee18
36a270b
7535d0e
36a270b
 
 
590c4a4
 
7535d0e
 
 
36a270b
 
 
 
 
 
 
 
6d9b971
0f20ab3
36a270b
 
 
 
7a50d0c
36a270b
 
 
 
 
 
 
 
 
 
7535d0e
36a270b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b73b6f
 
4fcd10d
6b73b6f
 
 
36a270b
 
6306de6
 
 
 
 
 
 
36a270b
 
 
6306de6
0249415
 
6306de6
36a270b
6306de6
0249415
6306de6
 
 
0249415
6306de6
 
 
 
 
0249415
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9a0ee18
2abbb8c
36a270b
6f8da54
d32c49f
36a270b
 
 
27ad10a
8f88289
7535d0e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import os  
import base64  
from base64 import urlsafe_b64encode 
import requests  
from supabase_models import Supabase_Client  
from authenticate import get_access_token_v1

def extract_structure_store_message(user_id:str,message_id:str , attachment_id:str,attachment_extension:str,email:str):

    if attachment_id and message_id:
        project_id = os.getenv('PROJECT_ID')
        processor_id = os.getenv('PROCESSOR_ID')
        document_entities = {}

        file_name = f"{message_id}_{attachment_id}.{attachment_extension}"
        print(f"file_name: {file_name}")
        supabase = Supabase_Client().instance
        try:
            response = supabase.storage.from_("receipt_radar").download(
                  file_name
                )
            base64_data = urlsafe_b64encode(response).decode('utf-8')

            payload = {
                    "skipHumanReview": True,
                    "rawDocument": {
                        "mimeType": f"application/{attachment_extension}",
                        "content": base64_data
                    }
                }
    
            access_token = get_access_token_v1()
            print(access_token)
    
            headers = {
                'Authorization': f'Bearer {access_token}',
                'Content-Type': 'application/json; charset=utf-8'
            }
    
            response = requests.post(
                f'https://us-documentai.googleapis.com/v1/projects/{project_id}/locations/us/processors/{processor_id}:process',
                headers=headers,
                json=payload
            )
            response_json = response.json()
            allowed_entities = [
                        "due_date",
                        "invoice_date",
                        "total_amount",
                        "total_tax_amount",
                        "receiver_name",
                        "invoice_id",
                        "currency",
                        "receiver_address",
                        "invoice_type",
                        "supplier_name",
                        "payment_terms",
                        "line_item",
                        "line_item/description",
                        "line_item/quantity",
                        "line_item/amount",
                        "line_item/unit_price"
                    ]
            raw_text = response_json.get('document').get('text' , None)
            entities = response_json.get('document').get('entities' , None)
            document_entities['user_id'] = user_id
            insert_ocr_data_response =  (
                        supabase.table("receipt_ocr_data")
                        .insert({'user_id':user_id , 'message_id':message_id,'receipt_text':raw_text ,'email':email,'file_type':attachment_extension})
                        .execute()
                    )
            
            print('Printing entities')
            print(entities)
            # if entities is not None:
            #     for ent in entities:
            #         if ent.get('type') is not None:
            #             if ent.get('type') in allowed_entities:
            #                 mention_text = ent.get('mentionText')
            #                 normalised_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None
            #                 document_entities[ent.get('type')] = {"mention_text":mention_text,"normalizedValue":normalised_values}
            if entities is not None:
                for ent in entities:
                    if ent.get('type') is not None:
                        entity_type = ent.get('type')
            
                        # Check if the entity type is in the allowed list
                        if entity_type in allowed_entities:
                            mention_text = ent.get('mentionText')
                            normalized_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None
            
                            # Initialize a list for the entity type if not already present
                            if entity_type not in document_entities:
                                document_entities[entity_type] = []
            
                            # Append the entity data to the list
                            document_entities[entity_type].append({
                                "mention_text": mention_text,
                                "normalizedValue": normalized_values
                            })
            
                        # Handling 'line_item' and its properties (line_item/description, line_item/quantity, etc.)
                        if entity_type == 'line_item' and 'properties' in ent:
                            for prop in ent['properties']:
                                prop_type = prop.get('type')
                                if prop_type in allowed_entities:
                                    mention_text = prop.get('mentionText')
                                    normalized_values = prop.get('normalizedValue') if 'normalizedValue' in prop else None
            
                                    # Initialize a list for the property type if not already present
                                    if prop_type not in document_entities:
                                        document_entities[prop_type] = []
            
                                    # Append the property data to the list
                                    document_entities[prop_type].append({
                                        "mention_text": mention_text,
                                        "normalizedValue": normalized_values
                                    })

            document_entities['email'] = email
            document_entities['message_id'] = message_id
            print(document_entities)
            insert_data_response =  (
                        supabase.table("document_ai_entities")
                        .insert(document_entities)
                        .execute()
                    )
            print(insert_data_response)
            
        except Exception as e:
            print(f"Error downloading or encoding file: {e}")