File size: 11,152 Bytes
ef1ad9e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 |
# --- Library Imports ---
import os
import uuid
from itertools import chain
from azure.storage.blob import BlobServiceClient
# ---
# --- User Imports ---
from app.config.env import env
# ---
# --- Constant ---
BASE_FOLDER_NAME = "doc_agent_userIds"
STORAGE_ACCOUNT_NAME = env.ACCOUNT_NAME
CONNECTION_STRING = env.CONNECTION_STRING
CONTAINER_NAME = env.CONTAINER_NAME
USER_FOLDER_PREFIX = "userId_"
MISCELLANEOUS_DOCUMENT_TYPE_ID = 7
LENDERS_DOCUMENT_TYPE_ID = 9
PURCHASE_RETIRED_DOCUMENT_LIST = [
"DriversLicense",
"AwardLetterSSN",
"Recent2Years1099Forms",
"Recent2MonthBankStatements",
"PurchaseAgreements",
]
PURCHASE_EMPLOYED_DOCUMENT_LIST = [
"DriversLicense",
"Recent30DaysPaystubs",
"Recent2YearsW2Forms",
"Recent2MonthBankStatements",
"PurchaseAgreements",
]
PURCHASE_SELF_EMPLOYED_DOCUMENT_LIST = [
"DriversLicense",
"Recent2YearsPersonalTaxReturns",
"Recent2YearsBusinessTaxReturns",
"Recent2MonthBankStatements",
"PurchaseAgreements",
]
REFINANCE_RETIRED_DOCUMENT_LIST = [
"DriversLicense",
"AwardLetterSSN",
"Recent2Years1099Forms",
"Recent2MonthBankStatements",
"RecentMortgageStatementsRefinance",
]
REFINANCE_EMPLOYED_DOCUMENT_LIST = [
"DriversLicense",
"Recent30DaysPaystubs",
"Recent2YearsW2Forms",
"Recent2MonthBankStatements",
"RecentMortgageStatementsRefinance",
]
REFINANCE_SELF_EMPLOYED_DOCUMENT_LIST = [
"DriversLicense",
"Recent2YearsPersonalTaxReturns",
"Recent2YearsBusinessTaxReturns",
"Recent2MonthBankStatements",
"RecentMortgageStatementsRefinance",
]
OWNERSHIP_OWNED_DOCUMENT_LIST = [
"PrimaryResidenceMortgageStatement",
"HomeownersInsurancePolicy",
"PrimaryPropertyTaxStatement",
"HomeownersAssociation",
"RecentMortgageStatements",
]
OTHER_INCOME = ["W2OtherIncome"]
OT_BONUS_INCOME = ["2YearsPaystubHistory"]
ASSET_DOCUMENT_LIST = ["Recent2MonthBankStatements"]
PAYOFF_DOCUMENT_LIST = ["PayoffAmountStatement"]
EXCLUDE_DOCUMENT_LIST = ["ProofOfExclusion"]
RESIDENT_DOCUMENT_LIST = ["PermanentResidentCardGreenCard"]
ESCROWED_DOCUMENT_LIST = ["PrimaryResidenceMortgageStatement"]
NON_ESCROWED_DOCUMENT_LIST = ["HomeownersInsurancePolicy", "ProofofPropertyTax", "ProofofPropertyInsurance"]
NON_PRA_DOCUMENT_LIST = ["H1B"]
# ---
# Initiate connection to blob storage
blob_service_client = BlobServiceClient.from_connection_string(CONNECTION_STRING)
container_client = blob_service_client.get_container_client(container=CONTAINER_NAME)
async def write_file(file):
# Ensure the temp directory exists
os.makedirs("temp", exist_ok=True)
# Construct the file path inside the temp directory
file_path = os.path.join("temp", file.filename)
contents = await file.read()
# Write the file contents to the temp directory
with open(file_path, "wb") as f:
f.write(contents)
return file_path
async def create_folders(container_client, folder_name, application_id):
# Construct the base folder path
base_folder_path = f"{BASE_FOLDER_NAME}/{folder_name}"
# Define the list of subfolders to create
subfolders = [
"processed",
"raw",
f"applicationId_{application_id}",
os.path.join(f"applicationId_{application_id}", "identityDocuments"),
os.path.join(f"applicationId_{application_id}", "incomeDocuments"),
os.path.join(f"applicationId_{application_id}", "assetsDocuments"),
os.path.join(f"applicationId_{application_id}", "employmentVerification"),
os.path.join(f"applicationId_{application_id}", "creditDocuments"),
os.path.join(f"applicationId_{application_id}", "propertyDocuments"),
os.path.join(f"applicationId_{application_id}", "miscellaneousDocuments"),
]
# Iterate through each subfolder
for sub_folder_name in subfolders:
# Construct the full blob name and populate it with a .dummy file
blob_name = os.path.join(base_folder_path, sub_folder_name, ".dummy")
try:
# Attempt to get blob properties
blob_client = container_client.get_blob_client(blob=blob_name)
blob_properties = blob_client.get_blob_properties()
# If the blob exists, skip folder creation
if blob_properties:
continue
except Exception:
# If there's any exception, ignore and proceed to folder creation
print("proceed to folder creation")
# Create the folder by uploading a dummy blob
container_client.upload_blob(name=blob_name, data=b"", overwrite=True)
async def upload_file(user_id, payload, file):
# Create user-specific folders if they don't exist
user_folder_name = USER_FOLDER_PREFIX + str(user_id)
await create_folders(container_client, user_folder_name, payload["application_id"])
# Specify File Path
file_path = f"{BASE_FOLDER_NAME}/{user_folder_name}/raw/{file.filename}"
# Get blob client for uploading file to raw folder
blob_client = container_client.get_blob_client(blob=file_path)
# Read file data
file.file.seek(0)
data = file.file.read()
# Upload file to Azure blob storage
blob_client.upload_blob(data, overwrite=True)
# Generate blob URL
return f"https://{STORAGE_ACCOUNT_NAME}.blob.core.windows.net/{CONTAINER_NAME}/{file_path}"
def generate_unique_code(text):
"""
The function removes white spaces from a string, adds an underscore,
and appends a UUID to create a unique code.
"""
# Remove white spaces
text_without_spaces = text.replace(" ", "")
# Combine text with underscore and UUID
unique_code = f"{text_without_spaces}_" + str(uuid.uuid4())
return unique_code
def filter_documents(documents, payload):
"""
Filters documents based on loan application details, and allowed document codes within sub_types.
Modifies the original documents' sub_types list to contain only allowed sub-types.
Args:
documents: A list of dictionaries representing documents.
payload: Various details related to a specific application.
Returns
-------
A list of dictionaries containing the original documents (modified with filtered sub_types).
"""
"""
Common Documents - Based on below conditions, add respective documents to the filter list
1) Ownership is Owned
2) Consumer is a US citizen
3) Escrowed Payment is True
4) If any of the income is present/filled - ot, bonus or other
5) If none of these fields are filled - lender_credit, realtor_credit and other_credit
6) If payoff is required in liabilities
7) If exclude is selected as yes in liabilities
"""
common_documents = []
if payload["ownershipId"] == 1:
common_documents.append(OWNERSHIP_OWNED_DOCUMENT_LIST)
if payload["citizenshipStatusId"] == 1:
common_documents.append(RESIDENT_DOCUMENT_LIST)
elif payload["citizenshipStatusId"] == 3:
common_documents.append(NON_PRA_DOCUMENT_LIST)
for reo in payload["reos"]:
if reo["escrowedPayment"] == "1" and payload["ownershipId"] != 2:
common_documents.append(ESCROWED_DOCUMENT_LIST)
elif reo["escrowedPayment"] == "0":
common_documents.append(NON_ESCROWED_DOCUMENT_LIST)
for employment in payload["employments"]:
if employment["otherIncome"]:
common_documents.append(OTHER_INCOME)
if employment["bonusIncome"] or employment["OTIncome"]:
common_documents.append(OT_BONUS_INCOME)
for asset in payload["assets"]:
if asset["lenderCredit"] != 1 and asset["realtorCredit"] != 1 and asset["otherCredit"] != 1:
common_documents.append(ASSET_DOCUMENT_LIST)
for liability in payload["liabilities"]:
if liability["isPayoffRequired"] == "1":
common_documents.append(PAYOFF_DOCUMENT_LIST)
if liability["isExclude"] == "1":
common_documents.append(EXCLUDE_DOCUMENT_LIST)
# Define allowed document codes for each combination of loan purpose and employment type
allowed_codes = {
(1, 1): PURCHASE_RETIRED_DOCUMENT_LIST,
(1, 2): PURCHASE_EMPLOYED_DOCUMENT_LIST,
(1, 3): PURCHASE_SELF_EMPLOYED_DOCUMENT_LIST,
(2, 1): REFINANCE_RETIRED_DOCUMENT_LIST,
(2, 2): REFINANCE_EMPLOYED_DOCUMENT_LIST,
(2, 3): REFINANCE_SELF_EMPLOYED_DOCUMENT_LIST,
}
allowed_documents = []
# Loop through each employment type in the payload
for employment in payload["employments"]:
employment_type_id = employment["employmentTypeId"]
# Add the relevant document codes to the final allowed codes
documents_to_append = allowed_codes.get((payload["loanPurposeId"], employment_type_id))
if documents_to_append:
allowed_documents.extend(documents_to_append)
# Flatten the common_documents list of lists
flattened_common_documents = list(chain.from_iterable(common_documents))
# Combine allowed_documents and flattened_common_documents
allowed_documents.extend(flattened_common_documents)
# Filter the documents based on the final allowed codes
for document in documents:
document["sub_types"] = [
sub_type
for sub_type in document.get("sub_types", [])
if (
sub_type.get("code") in allowed_documents
or sub_type.get("code") in common_documents
or (
sub_type.get("documentTypeId") == MISCELLANEOUS_DOCUMENT_TYPE_ID
and sub_type.get("applicationId") == payload.get("applicationId")
)
or (
sub_type.get("documentTypeId") == LENDERS_DOCUMENT_TYPE_ID
and sub_type.get("applicationId") == payload.get("applicationId")
)
)
]
return sorted(documents, key=sort_by_document_type_id)
def sort_by_document_type_id(item):
return item["documentTypeId"]
def get_download_file_stream_from_blob_storage(file_path):
# Get blob client
blob_client = container_client.get_blob_client(blob=file_path)
# Fetch the file content as a stream from Azure Blob Storage
return blob_client.download_blob().readall()
def format_bytes(size):
"""
Convert a size in bytes to a human-readable string format (KB, MB, GB).
Parameters
----------
size (int): The size in bytes.
Returns
-------
str: The human-readable string format of the size.
"""
# Define the size units
power = 1024
n = 0
power_labels = {0: "B", 1: "KB", 2: "MB", 3: "GB", 4: "TB"}
# Calculate the appropriate unit
while size >= power and n < 4:
size /= power
n += 1
return f"{size:.1f} {power_labels[n]}"
|