Omkar008 commited on
Commit
8190662
·
verified ·
1 Parent(s): 613b217

Create extract_and_store_supabase.py

Browse files
Files changed (1) hide show
  1. extract_and_store_supabase.py +110 -0
extract_and_store_supabase.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ from base64 import urlsafe_b64encode
4
+ import requests
5
+ from supabase_models import Supabase_Client
6
+ from authenticate import get_access_token_v1
7
+
8
+ def extract_structure_store_message(filename:str):
9
+
10
+ if attachment_id and message_id:
11
+ project_id = os.getenv('PROJECT_ID')
12
+ processor_id = os.getenv('PROCESSOR_ID')
13
+ document_entities = {}
14
+
15
+ file_name = f"{message_id}_{attachment_id}.{attachment_extension}"
16
+ print(f"file_name: {file_name}")
17
+ supabase = Supabase_Client().instance
18
+ try:
19
+ response = supabase.storage.from_("receipt_radar").download(
20
+ file_name
21
+ )
22
+ base64_data = urlsafe_b64encode(response).decode('utf-8')
23
+
24
+ payload = {
25
+ "skipHumanReview": True,
26
+ "rawDocument": {
27
+ "mimeType": f"application/{attachment_extension}",
28
+ "content": base64_data
29
+ }
30
+ }
31
+
32
+ access_token = get_access_token_v1()
33
+ print(access_token)
34
+
35
+ headers = {
36
+ 'Authorization': f'Bearer {access_token}',
37
+ 'Content-Type': 'application/json; charset=utf-8'
38
+ }
39
+
40
+ response = requests.post(
41
+ f'https://us-documentai.googleapis.com/v1/projects/{project_id}/locations/us/processors/{processor_id}:process',
42
+ headers=headers,
43
+ json=payload
44
+ )
45
+ response_json = response.json()
46
+ allowed_entities = [
47
+ "due_date",
48
+ "invoice_date",
49
+ "total_amount",
50
+ "total_tax_amount",
51
+ "receiver_name",
52
+ "invoice_id",
53
+ "currency",
54
+ "receiver_address",
55
+ "invoice_type",
56
+ "supplier_name",
57
+ "payment_terms",
58
+ "line_item",
59
+ "line_item/description",
60
+ "line_item/quantity",
61
+ "line_item/amount",
62
+ "line_item/unit_price"
63
+ ]
64
+ raw_text = response_json.get('document').get('text' , None)
65
+ entities = response_json.get('document').get('entities' , None)
66
+ document_entities['user_id'] = user_id
67
+ insert_ocr_data_response = (
68
+ supabase.table("receipt_ocr_data")
69
+ .insert({'user_id':user_id , 'message_id':message_id,'receipt_text':raw_text ,'email':email,'file_type':attachment_extension})
70
+ .execute()
71
+ )
72
+
73
+ print('Printing entities')
74
+ print(entities)
75
+ # if entities is not None:
76
+ # for ent in entities:
77
+ # if ent.get('type') is not None:
78
+ # if ent.get('type') in allowed_entities:
79
+ # mention_text = ent.get('mentionText')
80
+ # normalised_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None
81
+ # document_entities[ent.get('type')] = {"mention_text":mention_text,"normalizedValue":normalised_values}
82
+ if entities is not None:
83
+ for ent in entities:
84
+ if ent.get('type') is not None:
85
+ entity_type = ent.get('type')
86
+ if entity_type in allowed_entities:
87
+ mention_text = ent.get('mentionText')
88
+ normalized_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None
89
+
90
+ # Initialize a list for the entity type if not already present
91
+ if entity_type not in document_entities:
92
+ document_entities[entity_type] = []
93
+
94
+ # Append the entity data to the list
95
+ document_entities[entity_type].append({
96
+ "mention_text": mention_text,
97
+ "normalizedValue": normalized_values
98
+ })
99
+ document_entities['email'] = email
100
+ document_entities['message_id'] = message_id
101
+ print(document_entities)
102
+ insert_data_response = (
103
+ supabase.table("document_ai_entities")
104
+ .insert(document_entities)
105
+ .execute()
106
+ )
107
+ print(insert_data_response)
108
+
109
+ except Exception as e:
110
+ print(f"Error downloading or encoding file: {e}")