pathakDev10 commited on
Commit
6ce998e
·
1 Parent(s): 9ce7793

Add application file

Browse files
Files changed (6) hide show
  1. Dockerfile +14 -0
  2. app.py +34 -0
  3. faiss.index +0 -0
  4. metadata.pkl +3 -0
  5. requirements.txt +0 -0
  6. tools.py +217 -0
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ FROM python:3.9
3
+
4
+ RUN useradd -m -u 1000 user
5
+ USER user
6
+ ENV PATH="/home/user/.local/bin:$PATH"
7
+
8
+ WORKDIR /app
9
+
10
+ COPY --chown=user ./requirements.txt requirements.txt
11
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
12
+
13
+ COPY --chown=user . /app
14
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
+ from langchain.llms import HuggingFacePipeline
4
+ import torch
5
+
6
+ app = FastAPI()
7
+
8
+ # --- LLM Initialization using Hugging Face ---
9
+ model_id = "Qwen/Qwen2.5-1.5B-Instruct"
10
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
11
+ model = AutoModelForCausalLM.from_pretrained(
12
+ model_id,
13
+ device_map="auto",
14
+ torch_dtype=torch.float16
15
+ )
16
+ generator = pipeline(
17
+ "text-generation",
18
+ model=model,
19
+ tokenizer=tokenizer,
20
+ max_length=256,
21
+ temperature=0.3,
22
+ )
23
+ llm = HuggingFacePipeline(pipeline=generator)
24
+
25
+ # Example endpoint using the new llm
26
+ @app.post("/query")
27
+ async def post_query(query: str):
28
+ # Create a simple prompt structure
29
+ prompt = f"Answer the following query:\n\n{query}\n"
30
+ # Get the response from the LLM
31
+ response = llm(prompt)
32
+ return {"response": response}
33
+
34
+ # (Keep your WebSocket endpoint and other code mostly unchanged)
faiss.index ADDED
Binary file (15.4 kB). View file
 
metadata.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fae26b569be47c1dfae3aff3cd9ec583aaf3c7c1529e05d568278ab461fd64cf
3
+ size 15500
requirements.txt ADDED
Binary file (1.16 kB). View file
 
tools.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import json
3
+ import re
4
+
5
+ # if no document found suggest some ...
6
+ # " - Remove currency symbol if present, convert currency to AED if user mentioned currency symbol other than AED.\n\n"
7
+ def extract_json_from_response(response):
8
+ """
9
+ Extract a JSON object using brace counting.
10
+ """
11
+ response = response.strip()
12
+ start_index = response.find('{')
13
+ if start_index == -1:
14
+ return {}
15
+
16
+ brace_count = 0
17
+ end_index = start_index
18
+ for i in range(start_index, len(response)):
19
+ if response[i] == '{':
20
+ brace_count += 1
21
+ elif response[i] == '}':
22
+ brace_count -= 1
23
+ if brace_count == 0:
24
+ end_index = i
25
+ break
26
+ candidate = response[start_index:end_index+1]
27
+ try:
28
+ return json.loads(candidate)
29
+ except json.JSONDecodeError as e:
30
+ print("Error parsing candidate JSON:", e)
31
+ return {}
32
+
33
+
34
+
35
+ def rule_based_extract(query):
36
+ """
37
+ A lightweight extraction using regular expressions.
38
+ Currently detects cost thresholds and a few keywords.
39
+ """
40
+ result = {}
41
+ q_lower = query.lower()
42
+
43
+ # Look for cost threshold phrases such as "under 43k"
44
+ cost_pattern = re.compile(r'(?:under|below|less than)\s*(\d+(?:\.\d+)?)([kKmMbB])')
45
+ cost_match = cost_pattern.search(q_lower)
46
+ if cost_match:
47
+ value = float(cost_match.group(1))
48
+ multiplier = cost_match.group(2).lower()
49
+ if multiplier == 'k':
50
+ value = int(value * 1000)
51
+ elif multiplier == 'm':
52
+ value = int(value * 1000000)
53
+ elif multiplier == 'b':
54
+ value = int(value * 1000000000)
55
+ result['totalCosts'] = value
56
+
57
+ # Detect property type if mentioned
58
+ prop_type_match = re.search(r'\b(\d+bhk|villa|apartment|studio)\b', q_lower)
59
+ if prop_type_match:
60
+ result['propertyType'] = prop_type_match.group(1)
61
+
62
+ return result
63
+
64
+
65
+
66
+
67
+
68
+
69
+ def apply_filters_partial(docs, filters):
70
+ scored_docs = []
71
+
72
+ for doc in docs:
73
+ score = 0
74
+ for key, value in filters.items():
75
+ if key not in doc:
76
+ continue
77
+
78
+ doc_value = doc[key]
79
+
80
+ # For cost thresholds, compare numerically.
81
+ if key == "totalCosts":
82
+ try:
83
+ doc_cost = float(doc_value)
84
+ if doc_cost <= float(value):
85
+ score += 1
86
+ except Exception:
87
+ continue
88
+ else:
89
+ if isinstance(doc_value, str):
90
+ if value.lower() in doc_value.lower():
91
+ score += 1
92
+ else:
93
+ if doc_value == value:
94
+ score += 1
95
+
96
+ scored_docs.append((score, doc))
97
+
98
+ scored_docs = [ (score, doc) for score, doc in scored_docs if score > 0 ]
99
+ scored_docs.sort(key=lambda x: x[0], reverse=True)
100
+ return [doc for score, doc in scored_docs]
101
+
102
+
103
+
104
+ def format_property_data(properties: list) -> str:
105
+ """Convert property JSON data into a structured string for LLM."""
106
+ formatted = []
107
+
108
+ for idx, prop in enumerate(properties, 1):
109
+ formatted.append(
110
+ f"Property {idx}:\n"
111
+ f"- Property Type: {prop.get('propertyType', 'N/A')}\n"
112
+ f"- Total Cost: AED {prop.get('totalCosts'):,}" if isinstance(prop.get('totalCosts'), (int, float)) else f"AED {prop.get('totalCosts', 'N/A')}\n"
113
+ f"- Size: {prop.get('propertySize', 'N/A')} sqft\n"
114
+ f"- Property Address: {prop.get('propertyAddress', 'N/A')}\n"
115
+ f"- Surrounding Area: {prop.get('surroundingArea', 'N/A')}\n"
116
+ f"- Project Name: {prop.get('projectName', 'N/A')}\n"
117
+ f"- Ownership: {prop.get('ownershipType', 'N/A')}\n"
118
+ f"- Rental Yield: {prop.get('expectedRentalYield', 'N/A')}%\n"
119
+ f"- Amenities: {', '.join(prop['amenities']) if prop.get('amenities') else 'N/A'}\n"
120
+ f"- Legal Details: {prop.get('legal', 'N/A')}\n"
121
+ )
122
+
123
+ return "\n".join(formatted)
124
+
125
+
126
+
127
+
128
+
129
+ estateKeywords = [
130
+ # Property Types
131
+ "apartment", "condo", "condominium", "townhouse", "villa", "duplex", "penthouse", "studio",
132
+ "loft", "bungalow", "cottage", "mansion", "house", "residence", "residential", "ranch", "estate",
133
+ "farmhouse", "row house", "micro-apartment", "annex", "flat", "high-rise", "low-rise", "mid-rise",
134
+ "complex", "housing", "subdivision", "manor", "castle", "chalet", "detached", "semi-detached",
135
+ "terraced", "multi-family", "loft-style", "penthouse suite", "garden apartment", "luxury apartment",
136
+ "2bhk", "1bhk", "3bhk", "4bhk", "5bhk", "6bhk", "7bhk",
137
+
138
+ # Transaction & Financing Terms
139
+ "buy", "sell", "purchase", "rent", "lease", "mortgage", "financing", "investment", "appraisal",
140
+ "valuation", "listing", "offer", "down payment", "closing costs", "commission", "escrow",
141
+ "interest rate", "loan", "refinance", "pre-approval", "subsidy", "foreclosure", "buyer",
142
+ "seller", "renter", "lender", "broker", "realtor", "agent", "property tax", "assessment",
143
+ "price", "cost", "expense",
144
+
145
+ # Legal & Regulatory
146
+ "contract", "agreement", "title", "deed", "ownership", "legal", "zoning", "regulation", "lien",
147
+ "disclosure", "covenant", "restriction", "mortgage deed", "notary", "fiduciary", "amortization",
148
+ "leasehold", "freehold", "easement", "encumbrance", "compliance", "bylaw", "permit", "license",
149
+ "inspection", "certification", "survey", "boundary", "deed restriction", "eminent domain",
150
+ "expropriation", "title insurance", "closing statement", "settlement statement", "property assessment",
151
+ "tax deduction", "legal fees",
152
+
153
+ # Building Services & Amenities
154
+ "maintenance", "security", "concierge", "cleaning", "HVAC", "elevator", "parking", "garage", "pool",
155
+ "gym", "clubhouse", "garden", "landscaping", "utility", "service charge", "facility", "building management",
156
+ "doorman", "reception", "lobby", "front desk", "maintenance fee", "cleaner", "janitorial", "waste management",
157
+ "recycling", "water supply", "electricity", "gas", "internet", "cable", "satellite", "fire alarm",
158
+ "sprinkler", "CCTV", "access control", "smart home", "automation", "security system", "alarm system",
159
+
160
+ # Property Features & Specifications
161
+ "size", "area", "square feet", "sq ft", "square meter", "sqm", "layout", "floor plan", "bedrooms", "beds",
162
+ "bathrooms", "baths", "kitchen", "balcony", "view", "furnished", "unfurnished", "modern", "renovated",
163
+ "new", "old", "under construction", "pre-construction", "storage", "fireplace", "insulation", "windows",
164
+ "doors", "tile", "hardwood", "carpet", "luxury", "energy efficient", "solar panels", "waterproof",
165
+ "air-conditioned", "heating", "cooling", "soundproof", "smart features", "double glazing", "open plan",
166
+ "loft", "studio", "number of floors", "flooring", "ceiling height", "curb appeal", "landscaped", "patio",
167
+ "deck", "terrace", "roof", "basement", "attic", "renovation", "refurbishment", "architectural", "design",
168
+ "blueprint", "structural integrity", "energy rating", "EPC", "green building", "LEED certification",
169
+
170
+ # Location & Infrastructure
171
+ "location", "neighborhood", "district", "community", "proximity", "access", "landmark", "street",
172
+ "boulevard", "region", "central", "suburban", "urban", "rural", "metro", "vicinity", "road", "avenue",
173
+ "block", "postcode", "zipcode", "local", "zone", "map", "transit", "bus", "subway", "highway",
174
+ "railway", "airport", "shopping center", "mall", "public transport", "commute", "walkability", "bike path",
175
+ "pedestrian", "infrastructure", "urban planning", "master plan", "road access", "public amenities",
176
+ "school", "hospital", "park", "recreation", "community center", "shopping", "restaurant", "cafe", "dining",
177
+ "entertainment", "cultural center", "museum", "cinema", "theater", "library",
178
+
179
+ # Additional Keywords
180
+ "pet-friendly", "smoke-free", "homeowners association", "HOA", "amenities", "market trends", "rental yield",
181
+ "occupancy", "resale", "investment potential", "appreciation", "listing price", "market value", "open house",
182
+ "virtual tour", "3D tour", "drone footage", "photography", "staging", "showing", "signage", "sales office",
183
+ "walk score", "neighborhood watch", "property management", "utilities", "land", "lot", "acreage", "fenced",
184
+ "gated", "seaview", "mountain view", "city view", "waterfront", "lakefront", "beachfront", "vacation rental",
185
+ "holiday home", "timeshare", "co-op", "shared ownership", "land bank", "infill", "revitalization",
186
+ "urban renewal", "gentrification", "brownfield", "greenfield", "tax increment financing", "TIF",
187
+ "economic zone", "special economic zone", "business improvement district", "BID", "asset management",
188
+ "capital improvement", "utility corridor", "utility easement", "land lease", "lease option", "seller financing",
189
+ "buyer financing", "interest", "escrow account", "comparative market analysis", "CMA", "brokerage", "MLS",
190
+ "multiple listing service", "digital listing", "virtual staging", "marketing", "advertising", "sales strategy",
191
+ "client", "customer", "inquiry", "valuation report", "property survey", "geodetic", "topographical", "parcel",
192
+ "lot size", "gross floor area", "GFA", "buildable area", "usable area", "constructible area", "occupancy certificate",
193
+ "completion certificate", "energy performance certificate", "EPC", "retrofitting", "upgrading", "furniture",
194
+ "fixtures", "equipment", "FF&E", "soft costs", "hard costs", "build cost", "construction cost", "land cost",
195
+ "tax assessment", "expropriation", "eminent domain", "title search", "title insurance", "closing statement",
196
+ "settlement statement", "financial statement", "profitability", "operating expense", "CAPEX", "OPEX", "debt service",
197
+ "capitalization rate", "effective gross income", "net operating income", "NOI", "cash-on-cash return", "discount rate",
198
+ "internal rate of return", "IRR", "term sheet", "memorandum", "offering memorandum", "investment memorandum",
199
+ "property brochure", "marketing materials", "customer inquiry", "buyer inquiry", "seller inquiry", "agent commission",
200
+ "valuation model", "property portfolio", "realty", "real estate market", "property market", "property trends",
201
+ "rental market", "commercial real estate", "residential real estate", "real estate investment trust", "REIT",
202
+ "vacancy rate", "absorption rate", "lease renewal", "option to renew", "property turnover", "asset", "liability",
203
+ "equity", "net worth", "investment property", "tax benefit", "depreciation", "capital gain", "capital loss",
204
+ "market analysis", "risk assessment", "due diligence", "investment analysis", "financial analysis", "cash flow",
205
+ "profit margin", "return on investment", "ROI", "exit strategy", "hold period", "leasing commission", "broker fee",
206
+ "real estate agent fee", "property listing", "sales contract", "rent roll", "occupancy rate", "turnover", "tenant",
207
+ "landlord", "lease agreement", "sublease", "rental agreement", "utility bill", "property management fee",
208
+ "service charge fee", "annual fee", "maintenance budget", "repair cost", "operating cost", "management expense",
209
+ "vacancy", "absorption", "market rental rate", "submarket", "investment strategy", "property acquisition",
210
+ "development", "speculative development", "planned unit development", "PUD", "real estate development",
211
+ "site development", "land development", "construction management", "contractor", "builder",
212
+ "real estate consultant", "property consultant", "market research", "economic indicator", "demographics",
213
+ "population density", "employment rate", "income level", "consumer confidence", "building code", "sustainability",
214
+ "green building", "LEED", "BREEAM", "smart city", "innovation", "technology", "internet of things", "IoT",
215
+ "big data", "data analytics", "virtual reality", "VR", "augmented reality", "AR", "3D modeling", "drone survey",
216
+ "aerial photography", "satellite imagery", "market forecast", "property forecast"
217
+ ]