Initial upload from Colab

ef1ad9e verified 4 months ago

11.2 kB

	# --- Library Imports ---
	import os
	import uuid
	from itertools import chain

	from azure.storage.blob import BlobServiceClient

	# ---
	# --- User Imports ---
	from app.config.env import env

	# ---

	# --- Constant ---
	BASE_FOLDER_NAME = "doc_agent_userIds"
	STORAGE_ACCOUNT_NAME = env.ACCOUNT_NAME
	CONNECTION_STRING = env.CONNECTION_STRING
	CONTAINER_NAME = env.CONTAINER_NAME
	USER_FOLDER_PREFIX = "userId_"
	MISCELLANEOUS_DOCUMENT_TYPE_ID = 7
	LENDERS_DOCUMENT_TYPE_ID = 9
	PURCHASE_RETIRED_DOCUMENT_LIST = [
	"DriversLicense",
	"AwardLetterSSN",
	"Recent2Years1099Forms",
	"Recent2MonthBankStatements",
	"PurchaseAgreements",
	]
	PURCHASE_EMPLOYED_DOCUMENT_LIST = [
	"DriversLicense",
	"Recent30DaysPaystubs",
	"Recent2YearsW2Forms",
	"Recent2MonthBankStatements",
	"PurchaseAgreements",
	]
	PURCHASE_SELF_EMPLOYED_DOCUMENT_LIST = [
	"DriversLicense",
	"Recent2YearsPersonalTaxReturns",
	"Recent2YearsBusinessTaxReturns",
	"Recent2MonthBankStatements",
	"PurchaseAgreements",
	]
	REFINANCE_RETIRED_DOCUMENT_LIST = [
	"DriversLicense",
	"AwardLetterSSN",
	"Recent2Years1099Forms",
	"Recent2MonthBankStatements",
	"RecentMortgageStatementsRefinance",
	]
	REFINANCE_EMPLOYED_DOCUMENT_LIST = [
	"DriversLicense",
	"Recent30DaysPaystubs",
	"Recent2YearsW2Forms",
	"Recent2MonthBankStatements",
	"RecentMortgageStatementsRefinance",
	]
	REFINANCE_SELF_EMPLOYED_DOCUMENT_LIST = [
	"DriversLicense",
	"Recent2YearsPersonalTaxReturns",
	"Recent2YearsBusinessTaxReturns",
	"Recent2MonthBankStatements",
	"RecentMortgageStatementsRefinance",
	]
	OWNERSHIP_OWNED_DOCUMENT_LIST = [
	"PrimaryResidenceMortgageStatement",
	"HomeownersInsurancePolicy",
	"PrimaryPropertyTaxStatement",
	"HomeownersAssociation",
	"RecentMortgageStatements",
	]
	OTHER_INCOME = ["W2OtherIncome"]
	OT_BONUS_INCOME = ["2YearsPaystubHistory"]
	ASSET_DOCUMENT_LIST = ["Recent2MonthBankStatements"]
	PAYOFF_DOCUMENT_LIST = ["PayoffAmountStatement"]
	EXCLUDE_DOCUMENT_LIST = ["ProofOfExclusion"]
	RESIDENT_DOCUMENT_LIST = ["PermanentResidentCardGreenCard"]
	ESCROWED_DOCUMENT_LIST = ["PrimaryResidenceMortgageStatement"]
	NON_ESCROWED_DOCUMENT_LIST = ["HomeownersInsurancePolicy", "ProofofPropertyTax", "ProofofPropertyInsurance"]
	NON_PRA_DOCUMENT_LIST = ["H1B"]
	# ---

	# Initiate connection to blob storage
	blob_service_client = BlobServiceClient.from_connection_string(CONNECTION_STRING)
	container_client = blob_service_client.get_container_client(container=CONTAINER_NAME)


	async def write_file(file):
	# Ensure the temp directory exists
	os.makedirs("temp", exist_ok=True)

	# Construct the file path inside the temp directory
	file_path = os.path.join("temp", file.filename)
	contents = await file.read()

	# Write the file contents to the temp directory
	with open(file_path, "wb") as f:
	f.write(contents)

	return file_path


	async def create_folders(container_client, folder_name, application_id):
	# Construct the base folder path
	base_folder_path = f"{BASE_FOLDER_NAME}/{folder_name}"

	# Define the list of subfolders to create
	subfolders = [
	"processed",
	"raw",
	f"applicationId_{application_id}",
	os.path.join(f"applicationId_{application_id}", "identityDocuments"),
	os.path.join(f"applicationId_{application_id}", "incomeDocuments"),
	os.path.join(f"applicationId_{application_id}", "assetsDocuments"),
	os.path.join(f"applicationId_{application_id}", "employmentVerification"),
	os.path.join(f"applicationId_{application_id}", "creditDocuments"),
	os.path.join(f"applicationId_{application_id}", "propertyDocuments"),
	os.path.join(f"applicationId_{application_id}", "miscellaneousDocuments"),
	]

	# Iterate through each subfolder
	for sub_folder_name in subfolders:
	# Construct the full blob name and populate it with a .dummy file
	blob_name = os.path.join(base_folder_path, sub_folder_name, ".dummy")

	try:
	# Attempt to get blob properties
	blob_client = container_client.get_blob_client(blob=blob_name)
	blob_properties = blob_client.get_blob_properties()

	# If the blob exists, skip folder creation
	if blob_properties:
	continue
	except Exception:
	# If there's any exception, ignore and proceed to folder creation
	print("proceed to folder creation")

	# Create the folder by uploading a dummy blob
	container_client.upload_blob(name=blob_name, data=b"", overwrite=True)


	async def upload_file(user_id, payload, file):
	# Create user-specific folders if they don't exist
	user_folder_name = USER_FOLDER_PREFIX + str(user_id)
	await create_folders(container_client, user_folder_name, payload["application_id"])

	# Specify File Path
	file_path = f"{BASE_FOLDER_NAME}/{user_folder_name}/raw/{file.filename}"

	# Get blob client for uploading file to raw folder
	blob_client = container_client.get_blob_client(blob=file_path)

	# Read file data
	file.file.seek(0)
	data = file.file.read()

	# Upload file to Azure blob storage
	blob_client.upload_blob(data, overwrite=True)

	# Generate blob URL
	return f"https://{STORAGE_ACCOUNT_NAME}.blob.core.windows.net/{CONTAINER_NAME}/{file_path}"


	def generate_unique_code(text):
	"""
	The function removes white spaces from a string, adds an underscore,
	and appends a UUID to create a unique code.
	"""
	# Remove white spaces
	text_without_spaces = text.replace(" ", "")
	# Combine text with underscore and UUID
	unique_code = f"{text_without_spaces}_" + str(uuid.uuid4())
	return unique_code


	def filter_documents(documents, payload):
	"""
	Filters documents based on loan application details, and allowed document codes within sub_types.
	Modifies the original documents' sub_types list to contain only allowed sub-types.

	Args:
	documents: A list of dictionaries representing documents.
	payload: Various details related to a specific application.

	Returns
	-------
	A list of dictionaries containing the original documents (modified with filtered sub_types).
	"""
	"""
	Common Documents - Based on below conditions, add respective documents to the filter list
	1) Ownership is Owned
	2) Consumer is a US citizen
	3) Escrowed Payment is True
	4) If any of the income is present/filled - ot, bonus or other
	5) If none of these fields are filled - lender_credit, realtor_credit and other_credit
	6) If payoff is required in liabilities
	7) If exclude is selected as yes in liabilities
	"""
	common_documents = []
	if payload["ownershipId"] == 1:
	common_documents.append(OWNERSHIP_OWNED_DOCUMENT_LIST)

	if payload["citizenshipStatusId"] == 1:
	common_documents.append(RESIDENT_DOCUMENT_LIST)
	elif payload["citizenshipStatusId"] == 3:
	common_documents.append(NON_PRA_DOCUMENT_LIST)

	for reo in payload["reos"]:
	if reo["escrowedPayment"] == "1" and payload["ownershipId"] != 2:
	common_documents.append(ESCROWED_DOCUMENT_LIST)
	elif reo["escrowedPayment"] == "0":
	common_documents.append(NON_ESCROWED_DOCUMENT_LIST)

	for employment in payload["employments"]:
	if employment["otherIncome"]:
	common_documents.append(OTHER_INCOME)
	if employment["bonusIncome"] or employment["OTIncome"]:
	common_documents.append(OT_BONUS_INCOME)

	for asset in payload["assets"]:
	if asset["lenderCredit"] != 1 and asset["realtorCredit"] != 1 and asset["otherCredit"] != 1:
	common_documents.append(ASSET_DOCUMENT_LIST)

	for liability in payload["liabilities"]:
	if liability["isPayoffRequired"] == "1":
	common_documents.append(PAYOFF_DOCUMENT_LIST)
	if liability["isExclude"] == "1":
	common_documents.append(EXCLUDE_DOCUMENT_LIST)

	# Define allowed document codes for each combination of loan purpose and employment type
	allowed_codes = {
	(1, 1): PURCHASE_RETIRED_DOCUMENT_LIST,
	(1, 2): PURCHASE_EMPLOYED_DOCUMENT_LIST,
	(1, 3): PURCHASE_SELF_EMPLOYED_DOCUMENT_LIST,
	(2, 1): REFINANCE_RETIRED_DOCUMENT_LIST,
	(2, 2): REFINANCE_EMPLOYED_DOCUMENT_LIST,
	(2, 3): REFINANCE_SELF_EMPLOYED_DOCUMENT_LIST,
	}

	allowed_documents = []

	# Loop through each employment type in the payload
	for employment in payload["employments"]:
	employment_type_id = employment["employmentTypeId"]
	# Add the relevant document codes to the final allowed codes
	documents_to_append = allowed_codes.get((payload["loanPurposeId"], employment_type_id))
	if documents_to_append:
	allowed_documents.extend(documents_to_append)

	# Flatten the common_documents list of lists
	flattened_common_documents = list(chain.from_iterable(common_documents))

	# Combine allowed_documents and flattened_common_documents
	allowed_documents.extend(flattened_common_documents)

	# Filter the documents based on the final allowed codes
	for document in documents:
	document["sub_types"] = [
	sub_type
	for sub_type in document.get("sub_types", [])
	if (
	sub_type.get("code") in allowed_documents
	or sub_type.get("code") in common_documents
	or (
	sub_type.get("documentTypeId") == MISCELLANEOUS_DOCUMENT_TYPE_ID
	and sub_type.get("applicationId") == payload.get("applicationId")
	)
	or (
	sub_type.get("documentTypeId") == LENDERS_DOCUMENT_TYPE_ID
	and sub_type.get("applicationId") == payload.get("applicationId")
	)
	)
	]

	return sorted(documents, key=sort_by_document_type_id)


	def sort_by_document_type_id(item):
	return item["documentTypeId"]


	def get_download_file_stream_from_blob_storage(file_path):
	# Get blob client
	blob_client = container_client.get_blob_client(blob=file_path)

	# Fetch the file content as a stream from Azure Blob Storage
	return blob_client.download_blob().readall()


	def format_bytes(size):
	"""
	Convert a size in bytes to a human-readable string format (KB, MB, GB).

	Parameters
	----------
	size (int): The size in bytes.

	Returns
	-------
	str: The human-readable string format of the size.
	"""
	# Define the size units
	power = 1024
	n = 0
	power_labels = {0: "B", 1: "KB", 2: "MB", 3: "GB", 4: "TB"}

	# Calculate the appropriate unit
	while size >= power and n < 4:
	size /= power
	n += 1

	return f"{size:.1f} {power_labels[n]}"