Spaces:
Sleeping
Sleeping
import random | |
import json | |
import re | |
import asyncio | |
import time | |
# if no document found suggest some ... | |
# " - Remove currency symbol if present, convert currency to AED if user mentioned currency symbol other than AED.\n\n" | |
def extract_json_from_response(response): | |
""" | |
Extract a JSON object using brace counting. | |
""" | |
response = response.strip() | |
start_index = response.find('{') | |
if start_index == -1: | |
return {} | |
brace_count = 0 | |
end_index = start_index | |
for i in range(start_index, len(response)): | |
if response[i] == '{': | |
brace_count += 1 | |
elif response[i] == '}': | |
brace_count -= 1 | |
if brace_count == 0: | |
end_index = i | |
break | |
candidate = response[start_index:end_index+1] | |
try: | |
return json.loads(candidate) | |
except json.JSONDecodeError as e: | |
print("Error parsing candidate JSON:", e) | |
return {} | |
def sendTokenViaSocket(state, manager_socket, message): | |
connection_id = state.get("connection_id") | |
loop = state.get("loop") | |
if connection_id and loop: | |
tokens = message.split(" ") | |
for token in tokens: | |
asyncio.run_coroutine_threadsafe( | |
manager_socket.send_message(connection_id, token + " "), | |
loop | |
) | |
time.sleep(0.05) | |
def rule_based_extract(query): | |
""" | |
A lightweight extraction using regular expressions. | |
Currently detects cost thresholds and a few keywords. | |
""" | |
result = {} | |
q_lower = query.lower() | |
# Look for cost threshold phrases such as "under 43k" | |
cost_pattern = re.compile(r'(?:under|below|less than)\s*(\d+(?:\.\d+)?)([kKmMbB])') | |
cost_match = cost_pattern.search(q_lower) | |
if cost_match: | |
value = float(cost_match.group(1)) | |
multiplier = cost_match.group(2).lower() | |
if multiplier == 'k': | |
value = int(value * 1000) | |
elif multiplier == 'm': | |
value = int(value * 1000000) | |
elif multiplier == 'b': | |
value = int(value * 1000000000) | |
result['totalCosts'] = value | |
# Detect property type if mentioned | |
prop_type_match = re.search(r'\b(\d+bhk|villa|apartment|studio)\b', q_lower) | |
if prop_type_match: | |
result['propertyType'] = prop_type_match.group(1) | |
return result | |
def apply_filters_partial(docs, filters): | |
scored_docs = [] | |
for doc in docs: | |
score = 0 | |
for key, value in filters.items(): | |
if key not in doc: | |
continue | |
doc_value = doc[key] | |
# For cost thresholds, compare numerically. | |
if key == "totalCosts": | |
try: | |
doc_cost = float(doc_value) | |
if doc_cost <= float(value): | |
score += 1 | |
except Exception: | |
continue | |
else: | |
if isinstance(doc_value, str): | |
if value.lower() in doc_value.lower(): | |
score += 1 | |
else: | |
if doc_value == value: | |
score += 1 | |
scored_docs.append((score, doc)) | |
scored_docs = [ (score, doc) for score, doc in scored_docs if score > 0 ] | |
scored_docs.sort(key=lambda x: x[0], reverse=True) | |
return [doc for score, doc in scored_docs] | |
def format_property_data(properties: list) -> str: | |
"""Convert property JSON data into a structured string for LLM.""" | |
formatted = [] | |
for idx, prop in enumerate(properties, 1): | |
formatted.append( | |
f"Property {idx}:\n" | |
f"- Property Type: {prop.get('propertyType', 'N/A')}\n" | |
f"- Total Cost: AED {prop.get('totalCosts'):,}" if isinstance(prop.get('totalCosts'), (int, float)) else f"AED {prop.get('totalCosts', 'N/A')}\n" | |
f"- Size: {prop.get('propertySize', 'N/A')} sqft\n" | |
f"- Property Address: {prop.get('propertyAddress', 'N/A')}\n" | |
f"- Surrounding Area: {prop.get('surroundingArea', 'N/A')}\n" | |
f"- Project Name: {prop.get('projectName', 'N/A')}\n" | |
f"- Ownership: {prop.get('ownershipType', 'N/A')}\n" | |
f"- Rental Yield: {prop.get('expectedRentalYield', 'N/A')}%\n" | |
f"- Amenities: {', '.join(prop['amenities']) if prop.get('amenities') else 'N/A'}\n" | |
f"- Legal Details: {prop.get('legal', 'N/A')}\n" | |
) | |
return "\n".join(formatted) | |
def structured_property_data(state): | |
structured_data = [] | |
# Add list start with count | |
property_count = len(state["current_properties"]) | |
structured_data.append(f"PROPERTY_LIST_START||{property_count}\n\n") | |
# Add each property item | |
for idx, prop in enumerate(state["current_properties"], 1): | |
# Format cost with commas if numeric | |
cost = prop.get("totalCosts", "N/A") | |
cost_str = f"AED {cost:,}" if isinstance(cost, (int, float)) else cost | |
# Format amenities | |
amenities = ', '.join(map(str, prop.get('amenities', []))) if prop.get('amenities') else 'N/A' | |
# Build property item | |
item = [ | |
f"ITEM_START||{prop.get("uniqueId")}||{idx}", | |
f"Type: {prop.get('propertyType', 'N/A')}", | |
f"Cost: {cost_str}", | |
f"Size: {prop.get('propertySize', 'N/A')}", | |
f"Amenities: {amenities}", | |
f"Rental Yield: {prop.get('expectedRentalYield', 'N/A')}", | |
f"Ownership: {prop.get('ownershipType', 'N/A')}", | |
"ITEM_END\n" | |
] | |
structured_data.append("\n".join(item) + "\n") | |
# Add list end marker | |
structured_data.append("PROPERTY_LIST_END") | |
return structured_data | |
estateKeywords = [ | |
# Property Types | |
"apartment", "condo", "condominium", "townhouse", "villa", "duplex", "penthouse", "studio", | |
"loft", "bungalow", "cottage", "mansion", "house", "residence", "residential", "ranch", "estate", | |
"farmhouse", "row house", "micro-apartment", "annex", "flat", "high-rise", "low-rise", "mid-rise", | |
"complex", "housing", "subdivision", "manor", "castle", "chalet", "detached", "semi-detached", | |
"terraced", "multi-family", "loft-style", "penthouse suite", "garden apartment", "luxury apartment", | |
"2bhk", "1bhk", "3bhk", "4bhk", "5bhk", "6bhk", "7bhk", | |
# Transaction & Financing Terms | |
"buy", "sell", "purchase", "rent", "lease", "mortgage", "financing", "investment", "appraisal", | |
"valuation", "listing", "offer", "down payment", "closing costs", "commission", "escrow", | |
"interest rate", "loan", "refinance", "pre-approval", "subsidy", "foreclosure", "buyer", | |
"seller", "renter", "lender", "broker", "realtor", "agent", "property tax", "assessment", | |
"price", "cost", "expense", | |
# Legal & Regulatory | |
"contract", "agreement", "title", "deed", "ownership", "legal", "zoning", "regulation", "lien", | |
"disclosure", "covenant", "restriction", "mortgage deed", "notary", "fiduciary", "amortization", | |
"leasehold", "freehold", "easement", "encumbrance", "compliance", "bylaw", "permit", "license", | |
"inspection", "certification", "survey", "boundary", "deed restriction", "eminent domain", | |
"expropriation", "title insurance", "closing statement", "settlement statement", "property assessment", | |
"tax deduction", "legal fees", | |
# Building Services & Amenities | |
"maintenance", "security", "concierge", "cleaning", "HVAC", "elevator", "parking", "garage", "pool", | |
"gym", "clubhouse", "garden", "landscaping", "utility", "service charge", "facility", "building management", | |
"doorman", "reception", "lobby", "front desk", "maintenance fee", "cleaner", "janitorial", "waste management", | |
"recycling", "water supply", "electricity", "gas", "internet", "cable", "satellite", "fire alarm", | |
"sprinkler", "CCTV", "access control", "smart home", "automation", "security system", "alarm system", | |
# Property Features & Specifications | |
"size", "area", "square feet", "sq ft", "square meter", "sqm", "layout", "floor plan", "bedrooms", "beds", | |
"bathrooms", "baths", "kitchen", "balcony", "view", "furnished", "unfurnished", "modern", "renovated", | |
"new", "old", "under construction", "pre-construction", "storage", "fireplace", "insulation", "windows", | |
"doors", "tile", "hardwood", "carpet", "luxury", "energy efficient", "solar panels", "waterproof", | |
"air-conditioned", "heating", "cooling", "soundproof", "smart features", "double glazing", "open plan", | |
"loft", "studio", "number of floors", "flooring", "ceiling height", "curb appeal", "landscaped", "patio", | |
"deck", "terrace", "roof", "basement", "attic", "renovation", "refurbishment", "architectural", "design", | |
"blueprint", "structural integrity", "energy rating", "EPC", "green building", "LEED certification", | |
# Location & Infrastructure | |
"location", "neighborhood", "district", "community", "proximity", "access", "landmark", "street", | |
"boulevard", "region", "central", "suburban", "urban", "rural", "metro", "vicinity", "road", "avenue", | |
"block", "postcode", "zipcode", "local", "zone", "map", "transit", "bus", "subway", "highway", | |
"railway", "airport", "shopping center", "mall", "public transport", "commute", "walkability", "bike path", | |
"pedestrian", "infrastructure", "urban planning", "master plan", "road access", "public amenities", | |
"school", "hospital", "park", "recreation", "community center", "shopping", "restaurant", "cafe", "dining", | |
"entertainment", "cultural center", "museum", "cinema", "theater", "library", | |
# Additional Keywords | |
"pet-friendly", "smoke-free", "homeowners association", "HOA", "amenities", "market trends", "rental yield", | |
"occupancy", "resale", "investment potential", "appreciation", "listing price", "market value", "open house", | |
"virtual tour", "3D tour", "drone footage", "photography", "staging", "showing", "signage", "sales office", | |
"walk score", "neighborhood watch", "property management", "utilities", "land", "lot", "acreage", "fenced", | |
"gated", "seaview", "mountain view", "city view", "waterfront", "lakefront", "beachfront", "vacation rental", | |
"holiday home", "timeshare", "co-op", "shared ownership", "land bank", "infill", "revitalization", | |
"urban renewal", "gentrification", "brownfield", "greenfield", "tax increment financing", "TIF", | |
"economic zone", "special economic zone", "business improvement district", "BID", "asset management", | |
"capital improvement", "utility corridor", "utility easement", "land lease", "lease option", "seller financing", | |
"buyer financing", "interest", "escrow account", "comparative market analysis", "CMA", "brokerage", "MLS", | |
"multiple listing service", "digital listing", "virtual staging", "marketing", "advertising", "sales strategy", | |
"client", "customer", "inquiry", "valuation report", "property survey", "geodetic", "topographical", "parcel", | |
"lot size", "gross floor area", "GFA", "buildable area", "usable area", "constructible area", "occupancy certificate", | |
"completion certificate", "energy performance certificate", "EPC", "retrofitting", "upgrading", "furniture", | |
"fixtures", "equipment", "FF&E", "soft costs", "hard costs", "build cost", "construction cost", "land cost", | |
"tax assessment", "expropriation", "eminent domain", "title search", "title insurance", "closing statement", | |
"settlement statement", "financial statement", "profitability", "operating expense", "CAPEX", "OPEX", "debt service", | |
"capitalization rate", "effective gross income", "net operating income", "NOI", "cash-on-cash return", "discount rate", | |
"internal rate of return", "IRR", "term sheet", "memorandum", "offering memorandum", "investment memorandum", | |
"property brochure", "marketing materials", "customer inquiry", "buyer inquiry", "seller inquiry", "agent commission", | |
"valuation model", "property portfolio", "realty", "real estate market", "property market", "property trends", | |
"rental market", "commercial real estate", "residential real estate", "real estate investment trust", "REIT", | |
"vacancy rate", "absorption rate", "lease renewal", "option to renew", "property turnover", "asset", "liability", | |
"equity", "net worth", "investment property", "tax benefit", "depreciation", "capital gain", "capital loss", | |
"market analysis", "risk assessment", "due diligence", "investment analysis", "financial analysis", "cash flow", | |
"profit margin", "return on investment", "ROI", "exit strategy", "hold period", "leasing commission", "broker fee", | |
"real estate agent fee", "property listing", "sales contract", "rent roll", "occupancy rate", "turnover", "tenant", | |
"landlord", "lease agreement", "sublease", "rental agreement", "utility bill", "property management fee", | |
"service charge fee", "annual fee", "maintenance budget", "repair cost", "operating cost", "management expense", | |
"vacancy", "absorption", "market rental rate", "submarket", "investment strategy", "property acquisition", | |
"development", "speculative development", "planned unit development", "PUD", "real estate development", | |
"site development", "land development", "construction management", "contractor", "builder", | |
"real estate consultant", "property consultant", "market research", "economic indicator", "demographics", | |
"population density", "employment rate", "income level", "consumer confidence", "building code", "sustainability", | |
"green building", "LEED", "BREEAM", "smart city", "innovation", "technology", "internet of things", "IoT", | |
"big data", "data analytics", "virtual reality", "VR", "augmented reality", "AR", "3D modeling", "drone survey", | |
"aerial photography", "satellite imagery", "market forecast", "property forecast" | |
] | |