File size: 4,850 Bytes
0b2c576
 
 
 
 
0902287
7535d0e
9a0ee18
36a270b
7535d0e
36a270b
 
 
590c4a4
 
7535d0e
 
 
36a270b
 
 
 
 
 
 
 
6d9b971
0f20ab3
36a270b
 
 
 
7a50d0c
36a270b
 
 
 
 
 
 
 
 
 
7535d0e
36a270b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b73b6f
 
4fcd10d
6b73b6f
 
 
36a270b
 
6306de6
 
 
 
 
 
 
36a270b
 
 
6306de6
 
36a270b
6306de6
 
 
 
 
 
 
 
 
 
 
9a0ee18
36a270b
6f8da54
d32c49f
36a270b
 
 
27ad10a
8f88289
7535d0e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import os  
import base64  
from base64 import urlsafe_b64encode 
import requests  
from supabase_models import Supabase_Client  
from authenticate import get_access_token_v1

def extract_structure_store_message(user_id:str,message_id:str , attachment_id:str,attachment_extension:str,email:str):

    if attachment_id and message_id:
        project_id = os.getenv('PROJECT_ID')
        processor_id = os.getenv('PROCESSOR_ID')
        document_entities = {}

        file_name = f"{message_id}_{attachment_id}.{attachment_extension}"
        print(f"file_name: {file_name}")
        supabase = Supabase_Client().instance
        try:
            response = supabase.storage.from_("receipt_radar").download(
                  file_name
                )
            base64_data = urlsafe_b64encode(response).decode('utf-8')

            payload = {
                    "skipHumanReview": True,
                    "rawDocument": {
                        "mimeType": f"application/{attachment_extension}",
                        "content": base64_data
                    }
                }
    
            access_token = get_access_token_v1()
            print(access_token)
    
            headers = {
                'Authorization': f'Bearer {access_token}',
                'Content-Type': 'application/json; charset=utf-8'
            }
    
            response = requests.post(
                f'https://us-documentai.googleapis.com/v1/projects/{project_id}/locations/us/processors/{processor_id}:process',
                headers=headers,
                json=payload
            )
            response_json = response.json()
            allowed_entities = [
                        "due_date",
                        "invoice_date",
                        "total_amount",
                        "total_tax_amount",
                        "receiver_name",
                        "invoice_id",
                        "currency",
                        "receiver_address",
                        "invoice_type",
                        "supplier_name",
                        "payment_terms",
                        "line_item",
                        "line_item/description",
                        "line_item/quantity",
                        "line_item/amount",
                        "line_item/unit_price"
                    ]
            raw_text = response_json.get('document').get('text' , None)
            entities = response_json.get('document').get('entities' , None)
            document_entities['user_id'] = user_id
            insert_ocr_data_response =  (
                        supabase.table("receipt_ocr_data")
                        .insert({'user_id':user_id , 'message_id':message_id,'receipt_text':raw_text ,'email':email,'file_type':attachment_extension})
                        .execute()
                    )
            
            print('Printing entities')
            print(entities)
            # if entities is not None:
            #     for ent in entities:
            #         if ent.get('type') is not None:
            #             if ent.get('type') in allowed_entities:
            #                 mention_text = ent.get('mentionText')
            #                 normalised_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None
            #                 document_entities[ent.get('type')] = {"mention_text":mention_text,"normalizedValue":normalised_values}
            if entities is not None:
                for ent in entities:
                    if ent.get('type') is not None:
                        entity_type = ent.get('type')
                        if entity_type in allowed_entities:
                            mention_text = ent.get('mentionText')
                            normalized_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None
    
                            # Initialize a list for the entity type if not already present
                            if entity_type not in document_entities:
                                document_entities[entity_type] = []
    
                            # Append the entity data to the list
                            document_entities[entity_type].append({
                                "mention_text": mention_text,
                                "normalizedValue": normalized_values
                            })
            document_entities['email'] = email
            print(document_entities)
            insert_data_response =  (
                        supabase.table("document_ai_entities")
                        .insert(document_entities)
                        .execute()
                    )
            print(insert_data_response)
            
        except Exception as e:
            print(f"Error downloading or encoding file: {e}")