File size: 3,361 Bytes
7535d0e
 
 
36a270b
 
7535d0e
36a270b
 
 
7535d0e
 
 
 
 
36a270b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7535d0e
36a270b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7535d0e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
from supabase_models import Supabase_Client


def extract_structure_store_message(user_id:str,message_id:str , attachment_id:str):

    if attachment_id and message_id:
        project_id = os.getenv('PROJECT_ID')
        processor_id = os.getenv('PROCESSOR_ID')
        document_entities = {}
        extension = attachment.filename.split(".")[-1]
        file_name = f"{message.id}_{attachment.attachment_id}.{extension}"
        print(f"file_name: {file_name}")
        supabase = Supabase_Client().instance
        try:
            response = supabase.storage.from_("receipt_radar").download(
                  file_name
                )
            base64_data = urlsafe_b64encode(response).decode('utf-8')

            payload = {
                    "skipHumanReview": True,
                    "rawDocument": {
                        "mimeType": f"application/{file_type}",
                        "content": base64_content
                    }
                }
    
            access_token = get_access_token_v1()

    
            headers = {
                'Authorization': f'Bearer {access_token}',
                'Content-Type': 'application/json; charset=utf-8'
            }
    
            response = requests.post(
                f'https://us-documentai.googleapis.com/v1/projects/{project_id}/locations/us/processors/{processor_id}:process',
                headers=headers,
                json=payload
            )
            response_json = response.json()
            allowed_entities = [
                        "due_date",
                        "invoice_date",
                        "total_amount",
                        "total_tax_amount",
                        "receiver_name",
                        "invoice_id",
                        "currency",
                        "receiver_address",
                        "invoice_type",
                        "supplier_name",
                        "payment_terms",
                        "line_item",
                        "line_item/description",
                        "line_item/quantity",
                        "line_item/amount",
                        "line_item/unit_price"
                    ]
            raw_text = response_json.get('document').get('text' , None)
            entities = response_json.get('document').get('entities' , None)
            document_entities['user_id'] = user_id
            document_entities['raw_text'] = raw_text
            print('Printing entities')
            print(entities)
            if entities is not None:
                for ent in entities:
                    if ent.get('type') is not None:
                        if entity_type in allowed_entities:
                            mention_text = ent.get('mentionText')
                            normalised_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None
                            document_entities[ent.get('type')] = {"mention_text":mention_text,"normalizedValue":normalised_values}

            print(document_entities)
            insert_data_response = response = (
                        supabase.table("countries")
                        .insert(document_entities)
                        .execute()
                    )
        except Exception as e:
            print(f"Error downloading or encoding file: {e}")