# --- Library Imports --- import os import uuid from itertools import chain from azure.storage.blob import BlobServiceClient # --- # --- User Imports --- from app.config.env import env # --- # --- Constant --- BASE_FOLDER_NAME = "doc_agent_userIds" STORAGE_ACCOUNT_NAME = env.ACCOUNT_NAME CONNECTION_STRING = env.CONNECTION_STRING CONTAINER_NAME = env.CONTAINER_NAME USER_FOLDER_PREFIX = "userId_" MISCELLANEOUS_DOCUMENT_TYPE_ID = 7 LENDERS_DOCUMENT_TYPE_ID = 9 PURCHASE_RETIRED_DOCUMENT_LIST = [ "DriversLicense", "AwardLetterSSN", "Recent2Years1099Forms", "Recent2MonthBankStatements", "PurchaseAgreements", ] PURCHASE_EMPLOYED_DOCUMENT_LIST = [ "DriversLicense", "Recent30DaysPaystubs", "Recent2YearsW2Forms", "Recent2MonthBankStatements", "PurchaseAgreements", ] PURCHASE_SELF_EMPLOYED_DOCUMENT_LIST = [ "DriversLicense", "Recent2YearsPersonalTaxReturns", "Recent2YearsBusinessTaxReturns", "Recent2MonthBankStatements", "PurchaseAgreements", ] REFINANCE_RETIRED_DOCUMENT_LIST = [ "DriversLicense", "AwardLetterSSN", "Recent2Years1099Forms", "Recent2MonthBankStatements", "RecentMortgageStatementsRefinance", ] REFINANCE_EMPLOYED_DOCUMENT_LIST = [ "DriversLicense", "Recent30DaysPaystubs", "Recent2YearsW2Forms", "Recent2MonthBankStatements", "RecentMortgageStatementsRefinance", ] REFINANCE_SELF_EMPLOYED_DOCUMENT_LIST = [ "DriversLicense", "Recent2YearsPersonalTaxReturns", "Recent2YearsBusinessTaxReturns", "Recent2MonthBankStatements", "RecentMortgageStatementsRefinance", ] OWNERSHIP_OWNED_DOCUMENT_LIST = [ "PrimaryResidenceMortgageStatement", "HomeownersInsurancePolicy", "PrimaryPropertyTaxStatement", "HomeownersAssociation", "RecentMortgageStatements", ] OTHER_INCOME = ["W2OtherIncome"] OT_BONUS_INCOME = ["2YearsPaystubHistory"] ASSET_DOCUMENT_LIST = ["Recent2MonthBankStatements"] PAYOFF_DOCUMENT_LIST = ["PayoffAmountStatement"] EXCLUDE_DOCUMENT_LIST = ["ProofOfExclusion"] RESIDENT_DOCUMENT_LIST = ["PermanentResidentCardGreenCard"] ESCROWED_DOCUMENT_LIST = ["PrimaryResidenceMortgageStatement"] NON_ESCROWED_DOCUMENT_LIST = ["HomeownersInsurancePolicy", "ProofofPropertyTax", "ProofofPropertyInsurance"] NON_PRA_DOCUMENT_LIST = ["H1B"] # --- # Initiate connection to blob storage blob_service_client = BlobServiceClient.from_connection_string(CONNECTION_STRING) container_client = blob_service_client.get_container_client(container=CONTAINER_NAME) async def write_file(file): # Ensure the temp directory exists os.makedirs("temp", exist_ok=True) # Construct the file path inside the temp directory file_path = os.path.join("temp", file.filename) contents = await file.read() # Write the file contents to the temp directory with open(file_path, "wb") as f: f.write(contents) return file_path async def create_folders(container_client, folder_name, application_id): # Construct the base folder path base_folder_path = f"{BASE_FOLDER_NAME}/{folder_name}" # Define the list of subfolders to create subfolders = [ "processed", "raw", f"applicationId_{application_id}", os.path.join(f"applicationId_{application_id}", "identityDocuments"), os.path.join(f"applicationId_{application_id}", "incomeDocuments"), os.path.join(f"applicationId_{application_id}", "assetsDocuments"), os.path.join(f"applicationId_{application_id}", "employmentVerification"), os.path.join(f"applicationId_{application_id}", "creditDocuments"), os.path.join(f"applicationId_{application_id}", "propertyDocuments"), os.path.join(f"applicationId_{application_id}", "miscellaneousDocuments"), ] # Iterate through each subfolder for sub_folder_name in subfolders: # Construct the full blob name and populate it with a .dummy file blob_name = os.path.join(base_folder_path, sub_folder_name, ".dummy") try: # Attempt to get blob properties blob_client = container_client.get_blob_client(blob=blob_name) blob_properties = blob_client.get_blob_properties() # If the blob exists, skip folder creation if blob_properties: continue except Exception: # If there's any exception, ignore and proceed to folder creation print("proceed to folder creation") # Create the folder by uploading a dummy blob container_client.upload_blob(name=blob_name, data=b"", overwrite=True) async def upload_file(user_id, payload, file): # Create user-specific folders if they don't exist user_folder_name = USER_FOLDER_PREFIX + str(user_id) await create_folders(container_client, user_folder_name, payload["application_id"]) # Specify File Path file_path = f"{BASE_FOLDER_NAME}/{user_folder_name}/raw/{file.filename}" # Get blob client for uploading file to raw folder blob_client = container_client.get_blob_client(blob=file_path) # Read file data file.file.seek(0) data = file.file.read() # Upload file to Azure blob storage blob_client.upload_blob(data, overwrite=True) # Generate blob URL return f"https://{STORAGE_ACCOUNT_NAME}.blob.core.windows.net/{CONTAINER_NAME}/{file_path}" def generate_unique_code(text): """ The function removes white spaces from a string, adds an underscore, and appends a UUID to create a unique code. """ # Remove white spaces text_without_spaces = text.replace(" ", "") # Combine text with underscore and UUID unique_code = f"{text_without_spaces}_" + str(uuid.uuid4()) return unique_code def filter_documents(documents, payload): """ Filters documents based on loan application details, and allowed document codes within sub_types. Modifies the original documents' sub_types list to contain only allowed sub-types. Args: documents: A list of dictionaries representing documents. payload: Various details related to a specific application. Returns ------- A list of dictionaries containing the original documents (modified with filtered sub_types). """ """ Common Documents - Based on below conditions, add respective documents to the filter list 1) Ownership is Owned 2) Consumer is a US citizen 3) Escrowed Payment is True 4) If any of the income is present/filled - ot, bonus or other 5) If none of these fields are filled - lender_credit, realtor_credit and other_credit 6) If payoff is required in liabilities 7) If exclude is selected as yes in liabilities """ common_documents = [] if payload["ownershipId"] == 1: common_documents.append(OWNERSHIP_OWNED_DOCUMENT_LIST) if payload["citizenshipStatusId"] == 1: common_documents.append(RESIDENT_DOCUMENT_LIST) elif payload["citizenshipStatusId"] == 3: common_documents.append(NON_PRA_DOCUMENT_LIST) for reo in payload["reos"]: if reo["escrowedPayment"] == "1" and payload["ownershipId"] != 2: common_documents.append(ESCROWED_DOCUMENT_LIST) elif reo["escrowedPayment"] == "0": common_documents.append(NON_ESCROWED_DOCUMENT_LIST) for employment in payload["employments"]: if employment["otherIncome"]: common_documents.append(OTHER_INCOME) if employment["bonusIncome"] or employment["OTIncome"]: common_documents.append(OT_BONUS_INCOME) for asset in payload["assets"]: if asset["lenderCredit"] != 1 and asset["realtorCredit"] != 1 and asset["otherCredit"] != 1: common_documents.append(ASSET_DOCUMENT_LIST) for liability in payload["liabilities"]: if liability["isPayoffRequired"] == "1": common_documents.append(PAYOFF_DOCUMENT_LIST) if liability["isExclude"] == "1": common_documents.append(EXCLUDE_DOCUMENT_LIST) # Define allowed document codes for each combination of loan purpose and employment type allowed_codes = { (1, 1): PURCHASE_RETIRED_DOCUMENT_LIST, (1, 2): PURCHASE_EMPLOYED_DOCUMENT_LIST, (1, 3): PURCHASE_SELF_EMPLOYED_DOCUMENT_LIST, (2, 1): REFINANCE_RETIRED_DOCUMENT_LIST, (2, 2): REFINANCE_EMPLOYED_DOCUMENT_LIST, (2, 3): REFINANCE_SELF_EMPLOYED_DOCUMENT_LIST, } allowed_documents = [] # Loop through each employment type in the payload for employment in payload["employments"]: employment_type_id = employment["employmentTypeId"] # Add the relevant document codes to the final allowed codes documents_to_append = allowed_codes.get((payload["loanPurposeId"], employment_type_id)) if documents_to_append: allowed_documents.extend(documents_to_append) # Flatten the common_documents list of lists flattened_common_documents = list(chain.from_iterable(common_documents)) # Combine allowed_documents and flattened_common_documents allowed_documents.extend(flattened_common_documents) # Filter the documents based on the final allowed codes for document in documents: document["sub_types"] = [ sub_type for sub_type in document.get("sub_types", []) if ( sub_type.get("code") in allowed_documents or sub_type.get("code") in common_documents or ( sub_type.get("documentTypeId") == MISCELLANEOUS_DOCUMENT_TYPE_ID and sub_type.get("applicationId") == payload.get("applicationId") ) or ( sub_type.get("documentTypeId") == LENDERS_DOCUMENT_TYPE_ID and sub_type.get("applicationId") == payload.get("applicationId") ) ) ] return sorted(documents, key=sort_by_document_type_id) def sort_by_document_type_id(item): return item["documentTypeId"] def get_download_file_stream_from_blob_storage(file_path): # Get blob client blob_client = container_client.get_blob_client(blob=file_path) # Fetch the file content as a stream from Azure Blob Storage return blob_client.download_blob().readall() def format_bytes(size): """ Convert a size in bytes to a human-readable string format (KB, MB, GB). Parameters ---------- size (int): The size in bytes. Returns ------- str: The human-readable string format of the size. """ # Define the size units power = 1024 n = 0 power_labels = {0: "B", 1: "KB", 2: "MB", 3: "GB", 4: "TB"} # Calculate the appropriate unit while size >= power and n < 4: size /= power n += 1 return f"{size:.1f} {power_labels[n]}"