Spaces:

Saltech
/

remittance-poc-with-verifier

Sleeping

App Files Files Community

Alejandro-STC commited on Oct 10, 2024

Commit

9712b9a

verified ·

1 Parent(s): 1fa2e92

Fix identation

Browse files

Files changed (1) hide show

vertex_api_invoice_extractor.py +36 -36

vertex_api_invoice_extractor.py CHANGED Viewed

@@ -11,31 +11,31 @@ from remittance_pdf_processing_utils import remove_duplicate_lists
 # Set up authentication
 def initialize_vertexai():
-    # Get the base64-encoded service account JSON from an environment variable
-    encoded_sa_json = os.environ.get('VERTEX_AI_SERVICE_ACCOUNT_JSON')
-    if not encoded_sa_json:
-        raise ValueError("VERTEX_AI_SERVICE_ACCOUNT_JSON environment variable is not set")
-    try:
-        # Decode the base64 string to get the JSON content
-        sa_json_str = base64.b64decode(encoded_sa_json).decode('utf-8')
-        sa_info = json.loads(sa_json_str)
-        # Create credentials object from the decoded JSON
-        credentials = service_account.Credentials.from_service_account_info(
-            sa_info,
-            scopes=['https://www.googleapis.com/auth/cloud-platform']
-        )
-        # Initialize Vertex AI with the credentials
-        vertexai.init(project="saltech-ai-sandbox", location="us-central1", credentials=credentials)
-        print("Vertex AI initialized successfully.")
-    except json.JSONDecodeError:
-        raise ValueError("Invalid JSON format in the decoded service account information")
-    except Exception as e:
-        raise Exception(f"Error initializing Vertex AI: {str(e)}")
 # Call this function at the start of your script or in your main function
@@ -84,14 +84,14 @@ def extract_invoice_numbers_with_vertex_ai_single_hop(base64_image: str) -> list
 		"temperature": 0.1,
 		"top_p": 0.95,
 	}
 	safety_settings = {
 		generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 		generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 		generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 		generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 	}
-    safety_settings = {}
 	responses = model.generate_content(
 		[image_part, text_prompt],
@@ -142,7 +142,7 @@ def extract_column_headers(base64_image: str) -> list[str]:
 	  generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 	  generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 	}
-    safety_settings = {}
 	responses = model.generate_content(
 	  [image_part, text_prompt],
@@ -193,7 +193,7 @@ def extract_invoice_numbers_for_column(base64_image: str, column_name: str) -> I
 	  generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 	  generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 	}
-    safety_settings = {}
 	responses = model.generate_content(
 	  [image_part, text_prompt],
@@ -358,7 +358,7 @@ def extract_invoice_numbers_from_text_with_vertex_ai_single_hop(text: str) -> li
 		generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 		generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 	}
-    safety_settings = {}
 	responses = model.generate_content(
 		prompt,
@@ -434,7 +434,7 @@ def extract_column_headers_from_text(text: str) -> list[str]:
 		generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 		generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 	}
-    safety_settings = {}
 	response = model.generate_content(
 		prompt,
@@ -479,7 +479,7 @@ def extract_invoice_numbers_for_column_from_text(text: str, column_name: str) ->
 		generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 		generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 	}
-    safety_settings = {}
 	response = model.generate_content(
 		prompt,
@@ -516,7 +516,7 @@ def extract_payment_amounts_with_vertex_ai(base64_image: str) -> list[PaymentAmo
 		generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 		generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 	}
-    safety_settings = {}
 	responses = model.generate_content(
 		[image_part, text_prompt],
@@ -560,14 +560,14 @@ def extract_payment_amounts_from_text_with_vertex_ai(text: str) -> list[PaymentA
 		"temperature": 0.1,
 		"top_p": 0.95,
 	}
 	safety_settings = {
 		generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 		generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 		generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 		generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
-    }
-    safety_settings = {}
 	response = model.generate_content(
 		prompt,

 # Set up authentication
 def initialize_vertexai():
+	# Get the base64-encoded service account JSON from an environment variable
+	encoded_sa_json = os.environ.get('VERTEX_AI_SERVICE_ACCOUNT_JSON')
+	if not encoded_sa_json:
+		raise ValueError("VERTEX_AI_SERVICE_ACCOUNT_JSON environment variable is not set")
+	try:
+		# Decode the base64 string to get the JSON content
+		sa_json_str = base64.b64decode(encoded_sa_json).decode('utf-8')
+		sa_info = json.loads(sa_json_str)
+		# Create credentials object from the decoded JSON
+		credentials = service_account.Credentials.from_service_account_info(
+			sa_info,
+			scopes=['https://www.googleapis.com/auth/cloud-platform']
+		)
+		# Initialize Vertex AI with the credentials
+		vertexai.init(project="saltech-ai-sandbox", location="us-central1", credentials=credentials)
+		print("Vertex AI initialized successfully.")
+	except json.JSONDecodeError:
+		raise ValueError("Invalid JSON format in the decoded service account information")
+	except Exception as e:
+		raise Exception(f"Error initializing Vertex AI: {str(e)}")
 # Call this function at the start of your script or in your main function
 		"temperature": 0.1,
 		"top_p": 0.95,
 	}
 	safety_settings = {
 		generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 		generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 		generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 		generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 	}
+	safety_settings = {}
 	responses = model.generate_content(
 		[image_part, text_prompt],
 	  generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 	  generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 	}
+	safety_settings = {}
 	responses = model.generate_content(
 	  [image_part, text_prompt],
 	  generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 	  generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 	}
+	safety_settings = {}
 	responses = model.generate_content(
 	  [image_part, text_prompt],
 		generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 		generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 	}
+	safety_settings = {}
 	responses = model.generate_content(
 		prompt,
 		generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 		generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 	}
+	safety_settings = {}
 	response = model.generate_content(
 		prompt,
 		generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 		generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 	}
+	safety_settings = {}
 	response = model.generate_content(
 		prompt,
 		generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 		generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 	}
+	safety_settings = {}
 	responses = model.generate_content(
 		[image_part, text_prompt],
 		"temperature": 0.1,
 		"top_p": 0.95,
 	}
 	safety_settings = {
 		generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 		generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 		generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 		generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
+	}
+	safety_settings = {}
 	response = model.generate_content(
 		prompt,