ans123's picture
Initial upload from Colab
ef1ad9e verified
# --- Library Imports ---
import os
import uuid
from itertools import chain
from azure.storage.blob import BlobServiceClient
# ---
# --- User Imports ---
from app.config.env import env
# ---
# --- Constant ---
BASE_FOLDER_NAME = "doc_agent_userIds"
STORAGE_ACCOUNT_NAME = env.ACCOUNT_NAME
CONNECTION_STRING = env.CONNECTION_STRING
CONTAINER_NAME = env.CONTAINER_NAME
USER_FOLDER_PREFIX = "userId_"
MISCELLANEOUS_DOCUMENT_TYPE_ID = 7
LENDERS_DOCUMENT_TYPE_ID = 9
PURCHASE_RETIRED_DOCUMENT_LIST = [
"DriversLicense",
"AwardLetterSSN",
"Recent2Years1099Forms",
"Recent2MonthBankStatements",
"PurchaseAgreements",
]
PURCHASE_EMPLOYED_DOCUMENT_LIST = [
"DriversLicense",
"Recent30DaysPaystubs",
"Recent2YearsW2Forms",
"Recent2MonthBankStatements",
"PurchaseAgreements",
]
PURCHASE_SELF_EMPLOYED_DOCUMENT_LIST = [
"DriversLicense",
"Recent2YearsPersonalTaxReturns",
"Recent2YearsBusinessTaxReturns",
"Recent2MonthBankStatements",
"PurchaseAgreements",
]
REFINANCE_RETIRED_DOCUMENT_LIST = [
"DriversLicense",
"AwardLetterSSN",
"Recent2Years1099Forms",
"Recent2MonthBankStatements",
"RecentMortgageStatementsRefinance",
]
REFINANCE_EMPLOYED_DOCUMENT_LIST = [
"DriversLicense",
"Recent30DaysPaystubs",
"Recent2YearsW2Forms",
"Recent2MonthBankStatements",
"RecentMortgageStatementsRefinance",
]
REFINANCE_SELF_EMPLOYED_DOCUMENT_LIST = [
"DriversLicense",
"Recent2YearsPersonalTaxReturns",
"Recent2YearsBusinessTaxReturns",
"Recent2MonthBankStatements",
"RecentMortgageStatementsRefinance",
]
OWNERSHIP_OWNED_DOCUMENT_LIST = [
"PrimaryResidenceMortgageStatement",
"HomeownersInsurancePolicy",
"PrimaryPropertyTaxStatement",
"HomeownersAssociation",
"RecentMortgageStatements",
]
OTHER_INCOME = ["W2OtherIncome"]
OT_BONUS_INCOME = ["2YearsPaystubHistory"]
ASSET_DOCUMENT_LIST = ["Recent2MonthBankStatements"]
PAYOFF_DOCUMENT_LIST = ["PayoffAmountStatement"]
EXCLUDE_DOCUMENT_LIST = ["ProofOfExclusion"]
RESIDENT_DOCUMENT_LIST = ["PermanentResidentCardGreenCard"]
ESCROWED_DOCUMENT_LIST = ["PrimaryResidenceMortgageStatement"]
NON_ESCROWED_DOCUMENT_LIST = ["HomeownersInsurancePolicy", "ProofofPropertyTax", "ProofofPropertyInsurance"]
NON_PRA_DOCUMENT_LIST = ["H1B"]
# ---
# Initiate connection to blob storage
blob_service_client = BlobServiceClient.from_connection_string(CONNECTION_STRING)
container_client = blob_service_client.get_container_client(container=CONTAINER_NAME)
async def write_file(file):
# Ensure the temp directory exists
os.makedirs("temp", exist_ok=True)
# Construct the file path inside the temp directory
file_path = os.path.join("temp", file.filename)
contents = await file.read()
# Write the file contents to the temp directory
with open(file_path, "wb") as f:
f.write(contents)
return file_path
async def create_folders(container_client, folder_name, application_id):
# Construct the base folder path
base_folder_path = f"{BASE_FOLDER_NAME}/{folder_name}"
# Define the list of subfolders to create
subfolders = [
"processed",
"raw",
f"applicationId_{application_id}",
os.path.join(f"applicationId_{application_id}", "identityDocuments"),
os.path.join(f"applicationId_{application_id}", "incomeDocuments"),
os.path.join(f"applicationId_{application_id}", "assetsDocuments"),
os.path.join(f"applicationId_{application_id}", "employmentVerification"),
os.path.join(f"applicationId_{application_id}", "creditDocuments"),
os.path.join(f"applicationId_{application_id}", "propertyDocuments"),
os.path.join(f"applicationId_{application_id}", "miscellaneousDocuments"),
]
# Iterate through each subfolder
for sub_folder_name in subfolders:
# Construct the full blob name and populate it with a .dummy file
blob_name = os.path.join(base_folder_path, sub_folder_name, ".dummy")
try:
# Attempt to get blob properties
blob_client = container_client.get_blob_client(blob=blob_name)
blob_properties = blob_client.get_blob_properties()
# If the blob exists, skip folder creation
if blob_properties:
continue
except Exception:
# If there's any exception, ignore and proceed to folder creation
print("proceed to folder creation")
# Create the folder by uploading a dummy blob
container_client.upload_blob(name=blob_name, data=b"", overwrite=True)
async def upload_file(user_id, payload, file):
# Create user-specific folders if they don't exist
user_folder_name = USER_FOLDER_PREFIX + str(user_id)
await create_folders(container_client, user_folder_name, payload["application_id"])
# Specify File Path
file_path = f"{BASE_FOLDER_NAME}/{user_folder_name}/raw/{file.filename}"
# Get blob client for uploading file to raw folder
blob_client = container_client.get_blob_client(blob=file_path)
# Read file data
file.file.seek(0)
data = file.file.read()
# Upload file to Azure blob storage
blob_client.upload_blob(data, overwrite=True)
# Generate blob URL
return f"https://{STORAGE_ACCOUNT_NAME}.blob.core.windows.net/{CONTAINER_NAME}/{file_path}"
def generate_unique_code(text):
"""
The function removes white spaces from a string, adds an underscore,
and appends a UUID to create a unique code.
"""
# Remove white spaces
text_without_spaces = text.replace(" ", "")
# Combine text with underscore and UUID
unique_code = f"{text_without_spaces}_" + str(uuid.uuid4())
return unique_code
def filter_documents(documents, payload):
"""
Filters documents based on loan application details, and allowed document codes within sub_types.
Modifies the original documents' sub_types list to contain only allowed sub-types.
Args:
documents: A list of dictionaries representing documents.
payload: Various details related to a specific application.
Returns
-------
A list of dictionaries containing the original documents (modified with filtered sub_types).
"""
"""
Common Documents - Based on below conditions, add respective documents to the filter list
1) Ownership is Owned
2) Consumer is a US citizen
3) Escrowed Payment is True
4) If any of the income is present/filled - ot, bonus or other
5) If none of these fields are filled - lender_credit, realtor_credit and other_credit
6) If payoff is required in liabilities
7) If exclude is selected as yes in liabilities
"""
common_documents = []
if payload["ownershipId"] == 1:
common_documents.append(OWNERSHIP_OWNED_DOCUMENT_LIST)
if payload["citizenshipStatusId"] == 1:
common_documents.append(RESIDENT_DOCUMENT_LIST)
elif payload["citizenshipStatusId"] == 3:
common_documents.append(NON_PRA_DOCUMENT_LIST)
for reo in payload["reos"]:
if reo["escrowedPayment"] == "1" and payload["ownershipId"] != 2:
common_documents.append(ESCROWED_DOCUMENT_LIST)
elif reo["escrowedPayment"] == "0":
common_documents.append(NON_ESCROWED_DOCUMENT_LIST)
for employment in payload["employments"]:
if employment["otherIncome"]:
common_documents.append(OTHER_INCOME)
if employment["bonusIncome"] or employment["OTIncome"]:
common_documents.append(OT_BONUS_INCOME)
for asset in payload["assets"]:
if asset["lenderCredit"] != 1 and asset["realtorCredit"] != 1 and asset["otherCredit"] != 1:
common_documents.append(ASSET_DOCUMENT_LIST)
for liability in payload["liabilities"]:
if liability["isPayoffRequired"] == "1":
common_documents.append(PAYOFF_DOCUMENT_LIST)
if liability["isExclude"] == "1":
common_documents.append(EXCLUDE_DOCUMENT_LIST)
# Define allowed document codes for each combination of loan purpose and employment type
allowed_codes = {
(1, 1): PURCHASE_RETIRED_DOCUMENT_LIST,
(1, 2): PURCHASE_EMPLOYED_DOCUMENT_LIST,
(1, 3): PURCHASE_SELF_EMPLOYED_DOCUMENT_LIST,
(2, 1): REFINANCE_RETIRED_DOCUMENT_LIST,
(2, 2): REFINANCE_EMPLOYED_DOCUMENT_LIST,
(2, 3): REFINANCE_SELF_EMPLOYED_DOCUMENT_LIST,
}
allowed_documents = []
# Loop through each employment type in the payload
for employment in payload["employments"]:
employment_type_id = employment["employmentTypeId"]
# Add the relevant document codes to the final allowed codes
documents_to_append = allowed_codes.get((payload["loanPurposeId"], employment_type_id))
if documents_to_append:
allowed_documents.extend(documents_to_append)
# Flatten the common_documents list of lists
flattened_common_documents = list(chain.from_iterable(common_documents))
# Combine allowed_documents and flattened_common_documents
allowed_documents.extend(flattened_common_documents)
# Filter the documents based on the final allowed codes
for document in documents:
document["sub_types"] = [
sub_type
for sub_type in document.get("sub_types", [])
if (
sub_type.get("code") in allowed_documents
or sub_type.get("code") in common_documents
or (
sub_type.get("documentTypeId") == MISCELLANEOUS_DOCUMENT_TYPE_ID
and sub_type.get("applicationId") == payload.get("applicationId")
)
or (
sub_type.get("documentTypeId") == LENDERS_DOCUMENT_TYPE_ID
and sub_type.get("applicationId") == payload.get("applicationId")
)
)
]
return sorted(documents, key=sort_by_document_type_id)
def sort_by_document_type_id(item):
return item["documentTypeId"]
def get_download_file_stream_from_blob_storage(file_path):
# Get blob client
blob_client = container_client.get_blob_client(blob=file_path)
# Fetch the file content as a stream from Azure Blob Storage
return blob_client.download_blob().readall()
def format_bytes(size):
"""
Convert a size in bytes to a human-readable string format (KB, MB, GB).
Parameters
----------
size (int): The size in bytes.
Returns
-------
str: The human-readable string format of the size.
"""
# Define the size units
power = 1024
n = 0
power_labels = {0: "B", 1: "KB", 2: "MB", 3: "GB", 4: "TB"}
# Calculate the appropriate unit
while size >= power and n < 4:
size /= power
n += 1
return f"{size:.1f} {power_labels[n]}"