Fix identation
Browse files- vertex_api_invoice_extractor.py +36 -36
vertex_api_invoice_extractor.py
CHANGED
@@ -11,31 +11,31 @@ from remittance_pdf_processing_utils import remove_duplicate_lists
|
|
11 |
|
12 |
# Set up authentication
|
13 |
def initialize_vertexai():
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
|
40 |
|
41 |
# Call this function at the start of your script or in your main function
|
@@ -84,14 +84,14 @@ def extract_invoice_numbers_with_vertex_ai_single_hop(base64_image: str) -> list
|
|
84 |
"temperature": 0.1,
|
85 |
"top_p": 0.95,
|
86 |
}
|
87 |
-
|
88 |
safety_settings = {
|
89 |
generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
90 |
generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
91 |
generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
92 |
generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
93 |
}
|
94 |
-
|
95 |
|
96 |
responses = model.generate_content(
|
97 |
[image_part, text_prompt],
|
@@ -142,7 +142,7 @@ def extract_column_headers(base64_image: str) -> list[str]:
|
|
142 |
generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
143 |
generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
144 |
}
|
145 |
-
|
146 |
|
147 |
responses = model.generate_content(
|
148 |
[image_part, text_prompt],
|
@@ -193,7 +193,7 @@ def extract_invoice_numbers_for_column(base64_image: str, column_name: str) -> I
|
|
193 |
generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
194 |
generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
195 |
}
|
196 |
-
|
197 |
|
198 |
responses = model.generate_content(
|
199 |
[image_part, text_prompt],
|
@@ -358,7 +358,7 @@ def extract_invoice_numbers_from_text_with_vertex_ai_single_hop(text: str) -> li
|
|
358 |
generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
359 |
generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
360 |
}
|
361 |
-
|
362 |
|
363 |
responses = model.generate_content(
|
364 |
prompt,
|
@@ -434,7 +434,7 @@ def extract_column_headers_from_text(text: str) -> list[str]:
|
|
434 |
generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
435 |
generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
436 |
}
|
437 |
-
|
438 |
|
439 |
response = model.generate_content(
|
440 |
prompt,
|
@@ -479,7 +479,7 @@ def extract_invoice_numbers_for_column_from_text(text: str, column_name: str) ->
|
|
479 |
generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
480 |
generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
481 |
}
|
482 |
-
|
483 |
|
484 |
response = model.generate_content(
|
485 |
prompt,
|
@@ -516,7 +516,7 @@ def extract_payment_amounts_with_vertex_ai(base64_image: str) -> list[PaymentAmo
|
|
516 |
generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
517 |
generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
518 |
}
|
519 |
-
|
520 |
|
521 |
responses = model.generate_content(
|
522 |
[image_part, text_prompt],
|
@@ -560,14 +560,14 @@ def extract_payment_amounts_from_text_with_vertex_ai(text: str) -> list[PaymentA
|
|
560 |
"temperature": 0.1,
|
561 |
"top_p": 0.95,
|
562 |
}
|
563 |
-
|
564 |
safety_settings = {
|
565 |
generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
566 |
generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
567 |
generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
568 |
generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
569 |
-
|
570 |
-
|
571 |
|
572 |
response = model.generate_content(
|
573 |
prompt,
|
|
|
11 |
|
12 |
# Set up authentication
|
13 |
def initialize_vertexai():
|
14 |
+
# Get the base64-encoded service account JSON from an environment variable
|
15 |
+
encoded_sa_json = os.environ.get('VERTEX_AI_SERVICE_ACCOUNT_JSON')
|
16 |
+
|
17 |
+
if not encoded_sa_json:
|
18 |
+
raise ValueError("VERTEX_AI_SERVICE_ACCOUNT_JSON environment variable is not set")
|
19 |
+
|
20 |
+
try:
|
21 |
+
# Decode the base64 string to get the JSON content
|
22 |
+
sa_json_str = base64.b64decode(encoded_sa_json).decode('utf-8')
|
23 |
+
sa_info = json.loads(sa_json_str)
|
24 |
+
|
25 |
+
# Create credentials object from the decoded JSON
|
26 |
+
credentials = service_account.Credentials.from_service_account_info(
|
27 |
+
sa_info,
|
28 |
+
scopes=['https://www.googleapis.com/auth/cloud-platform']
|
29 |
+
)
|
30 |
+
|
31 |
+
# Initialize Vertex AI with the credentials
|
32 |
+
vertexai.init(project="saltech-ai-sandbox", location="us-central1", credentials=credentials)
|
33 |
+
|
34 |
+
print("Vertex AI initialized successfully.")
|
35 |
+
except json.JSONDecodeError:
|
36 |
+
raise ValueError("Invalid JSON format in the decoded service account information")
|
37 |
+
except Exception as e:
|
38 |
+
raise Exception(f"Error initializing Vertex AI: {str(e)}")
|
39 |
|
40 |
|
41 |
# Call this function at the start of your script or in your main function
|
|
|
84 |
"temperature": 0.1,
|
85 |
"top_p": 0.95,
|
86 |
}
|
87 |
+
|
88 |
safety_settings = {
|
89 |
generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
90 |
generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
91 |
generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
92 |
generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
93 |
}
|
94 |
+
safety_settings = {}
|
95 |
|
96 |
responses = model.generate_content(
|
97 |
[image_part, text_prompt],
|
|
|
142 |
generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
143 |
generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
144 |
}
|
145 |
+
safety_settings = {}
|
146 |
|
147 |
responses = model.generate_content(
|
148 |
[image_part, text_prompt],
|
|
|
193 |
generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
194 |
generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
195 |
}
|
196 |
+
safety_settings = {}
|
197 |
|
198 |
responses = model.generate_content(
|
199 |
[image_part, text_prompt],
|
|
|
358 |
generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
359 |
generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
360 |
}
|
361 |
+
safety_settings = {}
|
362 |
|
363 |
responses = model.generate_content(
|
364 |
prompt,
|
|
|
434 |
generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
435 |
generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
436 |
}
|
437 |
+
safety_settings = {}
|
438 |
|
439 |
response = model.generate_content(
|
440 |
prompt,
|
|
|
479 |
generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
480 |
generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
481 |
}
|
482 |
+
safety_settings = {}
|
483 |
|
484 |
response = model.generate_content(
|
485 |
prompt,
|
|
|
516 |
generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
517 |
generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
518 |
}
|
519 |
+
safety_settings = {}
|
520 |
|
521 |
responses = model.generate_content(
|
522 |
[image_part, text_prompt],
|
|
|
560 |
"temperature": 0.1,
|
561 |
"top_p": 0.95,
|
562 |
}
|
563 |
+
|
564 |
safety_settings = {
|
565 |
generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
566 |
generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
567 |
generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
568 |
generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
569 |
+
}
|
570 |
+
safety_settings = {}
|
571 |
|
572 |
response = model.generate_content(
|
573 |
prompt,
|