Spaces:
Sleeping
Sleeping
Commit
·
6ce998e
1
Parent(s):
9ce7793
Add application file
Browse files- Dockerfile +14 -0
- app.py +34 -0
- faiss.index +0 -0
- metadata.pkl +3 -0
- requirements.txt +0 -0
- tools.py +217 -0
Dockerfile
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
FROM python:3.9
|
3 |
+
|
4 |
+
RUN useradd -m -u 1000 user
|
5 |
+
USER user
|
6 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
7 |
+
|
8 |
+
WORKDIR /app
|
9 |
+
|
10 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
11 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
12 |
+
|
13 |
+
COPY --chown=user . /app
|
14 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
app.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI
|
2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
3 |
+
from langchain.llms import HuggingFacePipeline
|
4 |
+
import torch
|
5 |
+
|
6 |
+
app = FastAPI()
|
7 |
+
|
8 |
+
# --- LLM Initialization using Hugging Face ---
|
9 |
+
model_id = "Qwen/Qwen2.5-1.5B-Instruct"
|
10 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
11 |
+
model = AutoModelForCausalLM.from_pretrained(
|
12 |
+
model_id,
|
13 |
+
device_map="auto",
|
14 |
+
torch_dtype=torch.float16
|
15 |
+
)
|
16 |
+
generator = pipeline(
|
17 |
+
"text-generation",
|
18 |
+
model=model,
|
19 |
+
tokenizer=tokenizer,
|
20 |
+
max_length=256,
|
21 |
+
temperature=0.3,
|
22 |
+
)
|
23 |
+
llm = HuggingFacePipeline(pipeline=generator)
|
24 |
+
|
25 |
+
# Example endpoint using the new llm
|
26 |
+
@app.post("/query")
|
27 |
+
async def post_query(query: str):
|
28 |
+
# Create a simple prompt structure
|
29 |
+
prompt = f"Answer the following query:\n\n{query}\n"
|
30 |
+
# Get the response from the LLM
|
31 |
+
response = llm(prompt)
|
32 |
+
return {"response": response}
|
33 |
+
|
34 |
+
# (Keep your WebSocket endpoint and other code mostly unchanged)
|
faiss.index
ADDED
Binary file (15.4 kB). View file
|
|
metadata.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fae26b569be47c1dfae3aff3cd9ec583aaf3c7c1529e05d568278ab461fd64cf
|
3 |
+
size 15500
|
requirements.txt
ADDED
Binary file (1.16 kB). View file
|
|
tools.py
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import random
|
2 |
+
import json
|
3 |
+
import re
|
4 |
+
|
5 |
+
# if no document found suggest some ...
|
6 |
+
# " - Remove currency symbol if present, convert currency to AED if user mentioned currency symbol other than AED.\n\n"
|
7 |
+
def extract_json_from_response(response):
|
8 |
+
"""
|
9 |
+
Extract a JSON object using brace counting.
|
10 |
+
"""
|
11 |
+
response = response.strip()
|
12 |
+
start_index = response.find('{')
|
13 |
+
if start_index == -1:
|
14 |
+
return {}
|
15 |
+
|
16 |
+
brace_count = 0
|
17 |
+
end_index = start_index
|
18 |
+
for i in range(start_index, len(response)):
|
19 |
+
if response[i] == '{':
|
20 |
+
brace_count += 1
|
21 |
+
elif response[i] == '}':
|
22 |
+
brace_count -= 1
|
23 |
+
if brace_count == 0:
|
24 |
+
end_index = i
|
25 |
+
break
|
26 |
+
candidate = response[start_index:end_index+1]
|
27 |
+
try:
|
28 |
+
return json.loads(candidate)
|
29 |
+
except json.JSONDecodeError as e:
|
30 |
+
print("Error parsing candidate JSON:", e)
|
31 |
+
return {}
|
32 |
+
|
33 |
+
|
34 |
+
|
35 |
+
def rule_based_extract(query):
|
36 |
+
"""
|
37 |
+
A lightweight extraction using regular expressions.
|
38 |
+
Currently detects cost thresholds and a few keywords.
|
39 |
+
"""
|
40 |
+
result = {}
|
41 |
+
q_lower = query.lower()
|
42 |
+
|
43 |
+
# Look for cost threshold phrases such as "under 43k"
|
44 |
+
cost_pattern = re.compile(r'(?:under|below|less than)\s*(\d+(?:\.\d+)?)([kKmMbB])')
|
45 |
+
cost_match = cost_pattern.search(q_lower)
|
46 |
+
if cost_match:
|
47 |
+
value = float(cost_match.group(1))
|
48 |
+
multiplier = cost_match.group(2).lower()
|
49 |
+
if multiplier == 'k':
|
50 |
+
value = int(value * 1000)
|
51 |
+
elif multiplier == 'm':
|
52 |
+
value = int(value * 1000000)
|
53 |
+
elif multiplier == 'b':
|
54 |
+
value = int(value * 1000000000)
|
55 |
+
result['totalCosts'] = value
|
56 |
+
|
57 |
+
# Detect property type if mentioned
|
58 |
+
prop_type_match = re.search(r'\b(\d+bhk|villa|apartment|studio)\b', q_lower)
|
59 |
+
if prop_type_match:
|
60 |
+
result['propertyType'] = prop_type_match.group(1)
|
61 |
+
|
62 |
+
return result
|
63 |
+
|
64 |
+
|
65 |
+
|
66 |
+
|
67 |
+
|
68 |
+
|
69 |
+
def apply_filters_partial(docs, filters):
|
70 |
+
scored_docs = []
|
71 |
+
|
72 |
+
for doc in docs:
|
73 |
+
score = 0
|
74 |
+
for key, value in filters.items():
|
75 |
+
if key not in doc:
|
76 |
+
continue
|
77 |
+
|
78 |
+
doc_value = doc[key]
|
79 |
+
|
80 |
+
# For cost thresholds, compare numerically.
|
81 |
+
if key == "totalCosts":
|
82 |
+
try:
|
83 |
+
doc_cost = float(doc_value)
|
84 |
+
if doc_cost <= float(value):
|
85 |
+
score += 1
|
86 |
+
except Exception:
|
87 |
+
continue
|
88 |
+
else:
|
89 |
+
if isinstance(doc_value, str):
|
90 |
+
if value.lower() in doc_value.lower():
|
91 |
+
score += 1
|
92 |
+
else:
|
93 |
+
if doc_value == value:
|
94 |
+
score += 1
|
95 |
+
|
96 |
+
scored_docs.append((score, doc))
|
97 |
+
|
98 |
+
scored_docs = [ (score, doc) for score, doc in scored_docs if score > 0 ]
|
99 |
+
scored_docs.sort(key=lambda x: x[0], reverse=True)
|
100 |
+
return [doc for score, doc in scored_docs]
|
101 |
+
|
102 |
+
|
103 |
+
|
104 |
+
def format_property_data(properties: list) -> str:
|
105 |
+
"""Convert property JSON data into a structured string for LLM."""
|
106 |
+
formatted = []
|
107 |
+
|
108 |
+
for idx, prop in enumerate(properties, 1):
|
109 |
+
formatted.append(
|
110 |
+
f"Property {idx}:\n"
|
111 |
+
f"- Property Type: {prop.get('propertyType', 'N/A')}\n"
|
112 |
+
f"- Total Cost: AED {prop.get('totalCosts'):,}" if isinstance(prop.get('totalCosts'), (int, float)) else f"AED {prop.get('totalCosts', 'N/A')}\n"
|
113 |
+
f"- Size: {prop.get('propertySize', 'N/A')} sqft\n"
|
114 |
+
f"- Property Address: {prop.get('propertyAddress', 'N/A')}\n"
|
115 |
+
f"- Surrounding Area: {prop.get('surroundingArea', 'N/A')}\n"
|
116 |
+
f"- Project Name: {prop.get('projectName', 'N/A')}\n"
|
117 |
+
f"- Ownership: {prop.get('ownershipType', 'N/A')}\n"
|
118 |
+
f"- Rental Yield: {prop.get('expectedRentalYield', 'N/A')}%\n"
|
119 |
+
f"- Amenities: {', '.join(prop['amenities']) if prop.get('amenities') else 'N/A'}\n"
|
120 |
+
f"- Legal Details: {prop.get('legal', 'N/A')}\n"
|
121 |
+
)
|
122 |
+
|
123 |
+
return "\n".join(formatted)
|
124 |
+
|
125 |
+
|
126 |
+
|
127 |
+
|
128 |
+
|
129 |
+
estateKeywords = [
|
130 |
+
# Property Types
|
131 |
+
"apartment", "condo", "condominium", "townhouse", "villa", "duplex", "penthouse", "studio",
|
132 |
+
"loft", "bungalow", "cottage", "mansion", "house", "residence", "residential", "ranch", "estate",
|
133 |
+
"farmhouse", "row house", "micro-apartment", "annex", "flat", "high-rise", "low-rise", "mid-rise",
|
134 |
+
"complex", "housing", "subdivision", "manor", "castle", "chalet", "detached", "semi-detached",
|
135 |
+
"terraced", "multi-family", "loft-style", "penthouse suite", "garden apartment", "luxury apartment",
|
136 |
+
"2bhk", "1bhk", "3bhk", "4bhk", "5bhk", "6bhk", "7bhk",
|
137 |
+
|
138 |
+
# Transaction & Financing Terms
|
139 |
+
"buy", "sell", "purchase", "rent", "lease", "mortgage", "financing", "investment", "appraisal",
|
140 |
+
"valuation", "listing", "offer", "down payment", "closing costs", "commission", "escrow",
|
141 |
+
"interest rate", "loan", "refinance", "pre-approval", "subsidy", "foreclosure", "buyer",
|
142 |
+
"seller", "renter", "lender", "broker", "realtor", "agent", "property tax", "assessment",
|
143 |
+
"price", "cost", "expense",
|
144 |
+
|
145 |
+
# Legal & Regulatory
|
146 |
+
"contract", "agreement", "title", "deed", "ownership", "legal", "zoning", "regulation", "lien",
|
147 |
+
"disclosure", "covenant", "restriction", "mortgage deed", "notary", "fiduciary", "amortization",
|
148 |
+
"leasehold", "freehold", "easement", "encumbrance", "compliance", "bylaw", "permit", "license",
|
149 |
+
"inspection", "certification", "survey", "boundary", "deed restriction", "eminent domain",
|
150 |
+
"expropriation", "title insurance", "closing statement", "settlement statement", "property assessment",
|
151 |
+
"tax deduction", "legal fees",
|
152 |
+
|
153 |
+
# Building Services & Amenities
|
154 |
+
"maintenance", "security", "concierge", "cleaning", "HVAC", "elevator", "parking", "garage", "pool",
|
155 |
+
"gym", "clubhouse", "garden", "landscaping", "utility", "service charge", "facility", "building management",
|
156 |
+
"doorman", "reception", "lobby", "front desk", "maintenance fee", "cleaner", "janitorial", "waste management",
|
157 |
+
"recycling", "water supply", "electricity", "gas", "internet", "cable", "satellite", "fire alarm",
|
158 |
+
"sprinkler", "CCTV", "access control", "smart home", "automation", "security system", "alarm system",
|
159 |
+
|
160 |
+
# Property Features & Specifications
|
161 |
+
"size", "area", "square feet", "sq ft", "square meter", "sqm", "layout", "floor plan", "bedrooms", "beds",
|
162 |
+
"bathrooms", "baths", "kitchen", "balcony", "view", "furnished", "unfurnished", "modern", "renovated",
|
163 |
+
"new", "old", "under construction", "pre-construction", "storage", "fireplace", "insulation", "windows",
|
164 |
+
"doors", "tile", "hardwood", "carpet", "luxury", "energy efficient", "solar panels", "waterproof",
|
165 |
+
"air-conditioned", "heating", "cooling", "soundproof", "smart features", "double glazing", "open plan",
|
166 |
+
"loft", "studio", "number of floors", "flooring", "ceiling height", "curb appeal", "landscaped", "patio",
|
167 |
+
"deck", "terrace", "roof", "basement", "attic", "renovation", "refurbishment", "architectural", "design",
|
168 |
+
"blueprint", "structural integrity", "energy rating", "EPC", "green building", "LEED certification",
|
169 |
+
|
170 |
+
# Location & Infrastructure
|
171 |
+
"location", "neighborhood", "district", "community", "proximity", "access", "landmark", "street",
|
172 |
+
"boulevard", "region", "central", "suburban", "urban", "rural", "metro", "vicinity", "road", "avenue",
|
173 |
+
"block", "postcode", "zipcode", "local", "zone", "map", "transit", "bus", "subway", "highway",
|
174 |
+
"railway", "airport", "shopping center", "mall", "public transport", "commute", "walkability", "bike path",
|
175 |
+
"pedestrian", "infrastructure", "urban planning", "master plan", "road access", "public amenities",
|
176 |
+
"school", "hospital", "park", "recreation", "community center", "shopping", "restaurant", "cafe", "dining",
|
177 |
+
"entertainment", "cultural center", "museum", "cinema", "theater", "library",
|
178 |
+
|
179 |
+
# Additional Keywords
|
180 |
+
"pet-friendly", "smoke-free", "homeowners association", "HOA", "amenities", "market trends", "rental yield",
|
181 |
+
"occupancy", "resale", "investment potential", "appreciation", "listing price", "market value", "open house",
|
182 |
+
"virtual tour", "3D tour", "drone footage", "photography", "staging", "showing", "signage", "sales office",
|
183 |
+
"walk score", "neighborhood watch", "property management", "utilities", "land", "lot", "acreage", "fenced",
|
184 |
+
"gated", "seaview", "mountain view", "city view", "waterfront", "lakefront", "beachfront", "vacation rental",
|
185 |
+
"holiday home", "timeshare", "co-op", "shared ownership", "land bank", "infill", "revitalization",
|
186 |
+
"urban renewal", "gentrification", "brownfield", "greenfield", "tax increment financing", "TIF",
|
187 |
+
"economic zone", "special economic zone", "business improvement district", "BID", "asset management",
|
188 |
+
"capital improvement", "utility corridor", "utility easement", "land lease", "lease option", "seller financing",
|
189 |
+
"buyer financing", "interest", "escrow account", "comparative market analysis", "CMA", "brokerage", "MLS",
|
190 |
+
"multiple listing service", "digital listing", "virtual staging", "marketing", "advertising", "sales strategy",
|
191 |
+
"client", "customer", "inquiry", "valuation report", "property survey", "geodetic", "topographical", "parcel",
|
192 |
+
"lot size", "gross floor area", "GFA", "buildable area", "usable area", "constructible area", "occupancy certificate",
|
193 |
+
"completion certificate", "energy performance certificate", "EPC", "retrofitting", "upgrading", "furniture",
|
194 |
+
"fixtures", "equipment", "FF&E", "soft costs", "hard costs", "build cost", "construction cost", "land cost",
|
195 |
+
"tax assessment", "expropriation", "eminent domain", "title search", "title insurance", "closing statement",
|
196 |
+
"settlement statement", "financial statement", "profitability", "operating expense", "CAPEX", "OPEX", "debt service",
|
197 |
+
"capitalization rate", "effective gross income", "net operating income", "NOI", "cash-on-cash return", "discount rate",
|
198 |
+
"internal rate of return", "IRR", "term sheet", "memorandum", "offering memorandum", "investment memorandum",
|
199 |
+
"property brochure", "marketing materials", "customer inquiry", "buyer inquiry", "seller inquiry", "agent commission",
|
200 |
+
"valuation model", "property portfolio", "realty", "real estate market", "property market", "property trends",
|
201 |
+
"rental market", "commercial real estate", "residential real estate", "real estate investment trust", "REIT",
|
202 |
+
"vacancy rate", "absorption rate", "lease renewal", "option to renew", "property turnover", "asset", "liability",
|
203 |
+
"equity", "net worth", "investment property", "tax benefit", "depreciation", "capital gain", "capital loss",
|
204 |
+
"market analysis", "risk assessment", "due diligence", "investment analysis", "financial analysis", "cash flow",
|
205 |
+
"profit margin", "return on investment", "ROI", "exit strategy", "hold period", "leasing commission", "broker fee",
|
206 |
+
"real estate agent fee", "property listing", "sales contract", "rent roll", "occupancy rate", "turnover", "tenant",
|
207 |
+
"landlord", "lease agreement", "sublease", "rental agreement", "utility bill", "property management fee",
|
208 |
+
"service charge fee", "annual fee", "maintenance budget", "repair cost", "operating cost", "management expense",
|
209 |
+
"vacancy", "absorption", "market rental rate", "submarket", "investment strategy", "property acquisition",
|
210 |
+
"development", "speculative development", "planned unit development", "PUD", "real estate development",
|
211 |
+
"site development", "land development", "construction management", "contractor", "builder",
|
212 |
+
"real estate consultant", "property consultant", "market research", "economic indicator", "demographics",
|
213 |
+
"population density", "employment rate", "income level", "consumer confidence", "building code", "sustainability",
|
214 |
+
"green building", "LEED", "BREEAM", "smart city", "innovation", "technology", "internet of things", "IoT",
|
215 |
+
"big data", "data analytics", "virtual reality", "VR", "augmented reality", "AR", "3D modeling", "drone survey",
|
216 |
+
"aerial photography", "satellite imagery", "market forecast", "property forecast"
|
217 |
+
]
|