File size: 6,090 Bytes
8190662
4098f17
 
 
8190662
 
 
 
 
 
7853c7c
 
 
 
 
 
 
 
 
 
 
d72a34e
7853c7c
 
8190662
 
 
 
72a80a7
1ce2e79
c228d77
 
335dd53
3376cac
 
17117d6
8190662
 
6e37b9b
dac4d7c
8190662
 
52022cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8190662
 
 
 
52022cd
8190662
 
 
 
 
 
 
 
 
 
 
f111a58
8190662
7853c7c
8190662
7853c7c
 
 
 
 
 
 
 
8190662
7853c7c
 
8190662
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import os  
import base64
import random
import string
from base64 import urlsafe_b64encode 
import requests  
from supabase_models import Supabase_Client  
from authenticate import get_access_token_v1



def generate_custom_random_string():
    # Length of the provided string (16 characters)
    length = 16  
    # Define the character set (hexadecimal characters: 0-9 and a-f)
    characters = string.hexdigits.lower()
    # Generate the random string
    random_string = ''.join(random.choice(characters) for _ in range(length))
    # Add "RAN" in front of the string
    return "RAN" + random_string

async def extract_structure_store_message(filename:str,filename_path:str,user_id:str,email:str):
    message_id = generate_custom_random_string()
    attachment_id = 'UPLOADED_MANUALLY'
    if attachment_id and message_id:
        project_id = os.getenv('PROJECT_ID')
        processor_id = os.getenv('PROCESSOR_ID')
        document_entities = {}
        print(filename)
        attachment_extension = filename.split('.')[1]
        print("printing attachment extension")
        print(attachment_extension)
        print(filename)
        # file_name = f"{message_id}_{attachment_id}"
        # print(f"file_name: {file_name}")
        print(filename_path)
        supabase = Supabase_Client().instance
        try:
            response = supabase.storage.from_(f"all_card_assets").download(
                  filename_path
                )
            base64_data = urlsafe_b64encode(response).decode('utf-8')
            if attachment_extension == 'pdf':
                payload = {
                        "skipHumanReview": True,
                        "rawDocument": {
                            "mimeType": f"application/{attachment_extension}",
                            "content": base64_data
                        }
                    }
            elif attachment_extension == 'jpg' or attachment_extension == 'png':
                payload = {
                        "skipHumanReview": True,
                        "rawDocument": {
                            "mimeType": f"image/{attachment_extension}",
                            "content": base64_data
                        }
                    }
    
            access_token = get_access_token_v1()
            print(access_token)
            
            headers = {
                'Authorization': f'Bearer {access_token}',
                'Content-Type': 'application/json; charset=utf-8'
            }
    
            response = requests.post(
                f'https://us-documentai.googleapis.com/v1/projects/{project_id}/locations/us/processors/{processor_id}:process',
                headers=headers,
                json=payload
            )
            response_json = response.json()
            print(response_json)
            allowed_entities = [
                        "credit_card_last_four_digits",
                        "currency",
                        "end_date",
                        "net_amount",
                        "payment_type",
                        "purchase_time",
                        "receipt_date",
                        "start_date",
                        "supplier_address",
                        "supplier_city",
                        "supplier_name",
                        "tip_amount",
                        "total_amount",
                        "line_item/quantity",
                        "line_item/amount",
                        "line_item/unit_price"
                    ]
            raw_text = response_json.get('document').get('text' , None)
            entities = response_json.get('document').get('entities' , None)
            document_entities['user_id'] = user_id
            insert_ocr_data_response =  (
                        supabase.table("receipt_ocr_data")
                        .insert({'user_id':user_id , 'message_id':message_id,'receipt_text':raw_text ,'email':email,'file_type':attachment_extension})
                        .execute()
                    )
            
            print('Printing entities')
            print(entities)
            # if entities is not None:
            #     for ent in entities:
            #         if ent.get('type') is not None:
            #             if ent.get('type') in allowed_entities:
            #                 mention_text = ent.get('mentionText')
            #                 normalised_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None
            #                 document_entities[ent.get('type')] = {"mention_text":mention_text,"normalizedValue":normalised_values}
            if entities is not None:
                for ent in entities:
                    if ent.get('type') is not None:
                        entity_type = ent.get('type')
                        if entity_type in allowed_entities:
                            mention_text = ent.get('mentionText')
                            normalized_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None
    
                            # Initialize a list for the entity type if not already present
                            if entity_type not in document_entities:
                                document_entities[entity_type] = []
    
                            # Append the entity data to the list
                            document_entities[entity_type].append({
                                "mention_text": mention_text,
                                "normalizedValue": normalized_values
                            })
            document_entities['email'] = email
            document_entities['message_id'] = message_id
            print(document_entities)
            insert_data_response =  (
                        supabase.table("document_ai_entities")
                        .insert(document_entities)
                        .execute()
                    )
            print(insert_data_response)
            
        except Exception as e:
            print(f"Error downloading or encoding file: {e}")