Spaces:
Sleeping
Sleeping
Gordon Li
commited on
Commit
·
abcd2bb
1
Parent(s):
53d8ba3
Code refactoring
Browse files- HKUSTBNBConstant.py +219 -0
- AirbnbMapVisualiser.py → HKUSTBNBVisualiser.py +66 -305
- TrafficSpot.py → TDTrafficSpot.py +90 -107
- app.py +55 -145
HKUSTBNBConstant.py
ADDED
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# HKUSTBNBConstant.py
|
2 |
+
|
3 |
+
GET_ALL_NEIGHBORHOODS = """
|
4 |
+
SELECT DISTINCT NEIGHBOURHOOD
|
5 |
+
FROM airbnb_master_data
|
6 |
+
WHERE NEIGHBOURHOOD IS NOT NULL
|
7 |
+
ORDER BY NEIGHBOURHOOD
|
8 |
+
"""
|
9 |
+
|
10 |
+
GET_NEIGHBORHOOD_LISTINGS = """
|
11 |
+
SELECT m.ID, m.NAME, m.HOST_NAME, m.NEIGHBOURHOOD,
|
12 |
+
m.LATITUDE, m.LONGITUDE, m.ROOM_TYPE, m.PRICE,
|
13 |
+
COUNT(r.LISTING_ID) as NUMBER_OF_REVIEWS, m.REVIEWS_PER_MONTH,
|
14 |
+
m.MINIMUM_NIGHTS, m.AVAILABILITY_365
|
15 |
+
FROM airbnb_master_data m
|
16 |
+
LEFT JOIN airbnb_reviews_data r ON m.ID = r.LISTING_ID
|
17 |
+
WHERE m.LATITUDE IS NOT NULL
|
18 |
+
AND m.LONGITUDE IS NOT NULL
|
19 |
+
AND m.NEIGHBOURHOOD = :neighborhood
|
20 |
+
GROUP BY m.ID, m.NAME, m.HOST_NAME, m.NEIGHBOURHOOD,
|
21 |
+
m.LATITUDE, m.LONGITUDE, m.ROOM_TYPE, m.PRICE,
|
22 |
+
m.REVIEWS_PER_MONTH, m.MINIMUM_NIGHTS, m.AVAILABILITY_365
|
23 |
+
ORDER BY COUNT(r.LISTING_ID) DESC, m.PRICE ASC
|
24 |
+
FETCH FIRST :limit ROWS ONLY
|
25 |
+
"""
|
26 |
+
|
27 |
+
GET_LISTING_REVIEWS = """
|
28 |
+
SELECT REVIEW_DATE, REVIEWER_NAME,
|
29 |
+
CASE
|
30 |
+
WHEN LENGTH(COMMENTS) > 200
|
31 |
+
THEN SUBSTR(COMMENTS, 1, 200) || '...'
|
32 |
+
ELSE COMMENTS
|
33 |
+
END as COMMENTS
|
34 |
+
FROM AIRBNB_REVIEWS_DATA
|
35 |
+
WHERE LISTING_ID = :listing_id
|
36 |
+
AND ROWNUM <= 10
|
37 |
+
ORDER BY REVIEW_DATE DESC
|
38 |
+
"""
|
39 |
+
|
40 |
+
GET_LISTING_REVIEWS_FOR_SEARCH = """
|
41 |
+
SELECT COMMENTS
|
42 |
+
FROM AIRBNB_REVIEWS_DATA
|
43 |
+
WHERE LISTING_ID = :listing_id
|
44 |
+
AND COMMENTS IS NOT NULL
|
45 |
+
AND ROWNUM <= 10
|
46 |
+
ORDER BY REVIEW_DATE DESC
|
47 |
+
"""
|
48 |
+
|
49 |
+
GET_TRAFFIC_CAMERA_LOCATIONS = """
|
50 |
+
SELECT KEY, LATITUDE, LONGITUDE
|
51 |
+
FROM TD_TRAFFIC_CAMERA_LOCATION
|
52 |
+
WHERE KEY IN ({placeholders})
|
53 |
+
AND LATITUDE IS NOT NULL
|
54 |
+
AND LONGITUDE IS NOT NULL
|
55 |
+
"""
|
56 |
+
|
57 |
+
DISCOUNT_INFO_TEMPLATE = """
|
58 |
+
<div style='background-color: #e8f5e9; padding: 8px; margin: 10px 0; border-radius: 4px; border-left: 4px solid #4caf50;'>
|
59 |
+
<p style='margin: 2px 0; font-weight: bold; color: #2e7d32;'>{discount_percentage}% ENV PROTECTION DISCOUNT!</p>
|
60 |
+
<p style='margin: 2px 0; font-size: 0.85em;'>Avg. {avg_vehicle_count:.1f} vehicles across {observation_count} observations</p>
|
61 |
+
</div>
|
62 |
+
"""
|
63 |
+
|
64 |
+
TRAFFIC_SPOT_INFO_TEMPLATE = """
|
65 |
+
<div class='traffic-spot-info' style='margin: 10px 0; padding: 8px; background-color: #f0f8ff; border-radius: 4px; border-left: 4px solid #4285f4;'>
|
66 |
+
<p style='margin: 5px 0;'>
|
67 |
+
<strong>Nearest Traffic Spot:</strong> {spot_key}
|
68 |
+
<br/>
|
69 |
+
<strong>Distance:</strong> {distance_str}
|
70 |
+
</p>
|
71 |
+
</div>
|
72 |
+
"""
|
73 |
+
|
74 |
+
RELEVANCE_INFO_TEMPLATE = """
|
75 |
+
<div class='relevance-info' style='margin: 10px 0; padding: 8px; background-color: #f8f9fa; border-radius: 4px;'>
|
76 |
+
<p style='margin: 5px 0;'>
|
77 |
+
<strong>Match Score:</strong> {relevance_percentage:.0f}%
|
78 |
+
<br/>
|
79 |
+
<strong>Relevance:</strong> {relevance_features}
|
80 |
+
<br/>
|
81 |
+
<strong>Match Type:</strong> {matching_features}
|
82 |
+
</p>
|
83 |
+
</div>
|
84 |
+
"""
|
85 |
+
|
86 |
+
POPUP_CONTENT_TEMPLATE = """
|
87 |
+
<div style='min-width: 280px; max-width: 320px; padding: 15px;'>
|
88 |
+
<h4 style='margin: 0 0 10px 0; color: #2c3e50;'>{listing_name}</h4>
|
89 |
+
<p style='margin: 5px 0;'><strong>Host:</strong> {host_name}</p>
|
90 |
+
<p style='margin: 5px 0;'><strong>Room Type:</strong> {room_type}</p>
|
91 |
+
<p style='margin: 5px 0;'>{price_display}</p>
|
92 |
+
<p style='margin: 5px 0;'><strong>Reviews:</strong> {review_count:.0f}</p>
|
93 |
+
{discount_info}
|
94 |
+
{traffic_spot_info}
|
95 |
+
{relevance_info}
|
96 |
+
</div>
|
97 |
+
"""
|
98 |
+
|
99 |
+
MAP_SCRIPT = """
|
100 |
+
<script>
|
101 |
+
function showTrafficSpot(lat, lng) {
|
102 |
+
var map = document.querySelector('.folium-map')._leaflet_map;
|
103 |
+
map.setView([lat, lng], 18);
|
104 |
+
map.eachLayer(function(layer) {
|
105 |
+
if (layer instanceof L.Marker) {
|
106 |
+
var latLng = layer.getLatLng();
|
107 |
+
if (Math.abs(latLng.lat - lat) < 0.0001 && Math.abs(latLng.lng - lng) < 0.0001) {
|
108 |
+
layer.openPopup();
|
109 |
+
}
|
110 |
+
}
|
111 |
+
});
|
112 |
+
}
|
113 |
+
</script>
|
114 |
+
"""
|
115 |
+
|
116 |
+
# HTML Templates for Streamlit UI
|
117 |
+
SIDEBAR_HEADER = '<p class="sidebar-header">HKUST BNB+<BR/></p>'
|
118 |
+
|
119 |
+
SIDEBAR_DIVIDER = '<hr style="margin: 20px 0; border: none; border-top: 1px solid #e0e0e0;">'
|
120 |
+
|
121 |
+
TRAFFIC_EXPLANATION = """
|
122 |
+
### How HKUST BNB+ Acheived (E)SG , use Traffic Spot from Department of Transport and do traffic analysis hence provided discount according
|
123 |
+
to the average traffic on the previous days.
|
124 |
+
|
125 |
+
We use real-time traffic data to offer you the best possible rates:
|
126 |
+
|
127 |
+
* **Blue Camera Icons**: Areas with very low traffic (less than 2 vehicles detected)
|
128 |
+
* Enjoy a peaceful stay with **20% DISCOUNT** on these properties!
|
129 |
+
|
130 |
+
* **Orange Camera Icons**: Areas with moderate traffic (2-5 vehicles detected)
|
131 |
+
* Get a **10% DISCOUNT** on these properties!
|
132 |
+
|
133 |
+
* **Purple Camera Icons**: Areas with heavier traffic (more than 5 vehicles)
|
134 |
+
* Standard rates apply for these properties
|
135 |
+
|
136 |
+
Look for the blue connecting lines on the map to see which traffic spot affects each property!
|
137 |
+
|
138 |
+
Remark : Currently only few traffic spot avaliable, in the future will provide more.
|
139 |
+
"""
|
140 |
+
|
141 |
+
SEARCH_EXPLANATION = """
|
142 |
+
### How HKUST BNB+ Acheived E(S)G , use keyword to provided semantic relevance analysis to matches the require need from HKUST Student
|
143 |
+
|
144 |
+
Our advanced search technology goes beyond simple keyword matching to understand the meaning behind your search terms:
|
145 |
+
|
146 |
+
When you search for terms like "quiet," "convenient," or "spacious," our system:
|
147 |
+
1. Analyzes both listing titles and actual guest reviews
|
148 |
+
2. Understands the context and meaning (not just matching exact words)
|
149 |
+
3. Ranks listings based on overall relevance to your search
|
150 |
+
|
151 |
+
**Search Match Types:**
|
152 |
+
* **"Strong match in title and reviews"** - Perfect matches in both property description and guest experiences
|
153 |
+
* **"Strong match in listing title"** - Property description matches your needs very well
|
154 |
+
* **"Strong match in reviews"** - Guest experiences align perfectly with what you're looking for
|
155 |
+
* **"Better match in listing title/reviews"** - One source is more relevant than the other
|
156 |
+
* **"Moderate semantic match"** - Some relevance but not a perfect match
|
157 |
+
|
158 |
+
This helps you find properties that truly match what you're looking for, even if they don't use the exact words in your search!
|
159 |
+
"""
|
160 |
+
|
161 |
+
REVIEW_CARD_TEMPLATE = """
|
162 |
+
<div class="review-card">
|
163 |
+
<div class="review-header">
|
164 |
+
{reviewer_name} - {review_date}
|
165 |
+
</div>
|
166 |
+
<div class="review-content">
|
167 |
+
{highlighted_comments}
|
168 |
+
</div>
|
169 |
+
</div>
|
170 |
+
"""
|
171 |
+
|
172 |
+
LISTINGS_COUNT_INFO = "<p style='text-align:center; color:#4285f4;'>Showing {listings_limit} listings in {neighborhood}</p>"
|
173 |
+
|
174 |
+
LISTING_CARD_TEMPLATE = """
|
175 |
+
<div class="listing-card" style="background-color: {background_color}">
|
176 |
+
<h4 class="listing-title">{listing_name}</h4>
|
177 |
+
{price_display}
|
178 |
+
<p class="listing-info"> Room Type: {room_type}</p>
|
179 |
+
<p class="listing-info"> Reviews: {review_count:.0f}</p>
|
180 |
+
{relevance_info}
|
181 |
+
</div>
|
182 |
+
"""
|
183 |
+
|
184 |
+
PRICE_DISPLAY_WITH_DISCOUNT = """<p class="listing-info"> Price : <span class="original-price">${original_price:.0f}</span> <span class="discounted-price">${discounted_price:.0f}</span> {discount_tag}</p>"""
|
185 |
+
|
186 |
+
PRICE_DISPLAY_NORMAL = """<p class="listing-info"> Price : ${price:.0f}</p>"""
|
187 |
+
|
188 |
+
RELEVANCE_INFO_LISTING = """<p class="listing-info"> Relevance: {relevance_percentage:.0f}% </p>"""
|
189 |
+
|
190 |
+
TRAFFIC_DISCOUNT_DISPLAY = """
|
191 |
+
<div style='background-color: #e8f5e9; padding: 5px; margin: 5px 0; border-radius: 4px; border-left: 3px solid #4caf50;'>
|
192 |
+
<p style='margin: 2px 0; color: #2e7d32;'><strong>{discount_info}</strong></p>
|
193 |
+
<p style='margin: 2px 0; font-size: 0.9em;'>Avg. {avg_vehicle_count:.1f} vehicles across {observation_count} observations</p>
|
194 |
+
</div>
|
195 |
+
"""
|
196 |
+
|
197 |
+
TRAFFIC_POPUP_BASE = """
|
198 |
+
<div style='min-width: 150px; padding: 10px;'>
|
199 |
+
<p style='margin: 5px 0;'><strong>Location ID:</strong> {location_id}</p>
|
200 |
+
{discount_display}
|
201 |
+
"""
|
202 |
+
|
203 |
+
TRAFFIC_RECORDS_HEADER = "<h4>Recent Records (showing {recent_count} of {total_count} total):</h4>"
|
204 |
+
|
205 |
+
TRAFFIC_RECORD_ENTRY = """
|
206 |
+
<div style='border-top: 1px solid #ccc; padding: 5px 0;'>
|
207 |
+
<p style='margin: 2px 0;'><strong>Time:</strong> {capture_time}</p>
|
208 |
+
<p style='margin: 2px 0;'><strong>Vehicles:</strong> {vehicle_count}</p>
|
209 |
+
{image_html}
|
210 |
+
</div>
|
211 |
+
"""
|
212 |
+
|
213 |
+
TRAFFIC_IMAGE_HTML = """
|
214 |
+
<img src='data:image/jpeg;base64,{base64_encoded}'
|
215 |
+
style='max-width: 100px; max-height: 100px; margin: 5px 0;'
|
216 |
+
alt='Processed Image'>
|
217 |
+
"""
|
218 |
+
|
219 |
+
TRAFFIC_NO_RECORDS = "<p>No records available</p>"
|
AirbnbMapVisualiser.py → HKUSTBNBVisualiser.py
RENAMED
@@ -6,11 +6,21 @@ from sentence_transformers import SentenceTransformer, util
|
|
6 |
from geopy.distance import geodesic
|
7 |
import logging
|
8 |
|
9 |
-
|
10 |
-
from
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
def __init__(self):
|
15 |
self.connection_params = {
|
16 |
'user': 'slliac',
|
@@ -26,30 +36,19 @@ class AirbnbMapVisualiser:
|
|
26 |
increment=1,
|
27 |
getmode=oracledb.SPOOL_ATTRVAL_WAIT
|
28 |
)
|
29 |
-
|
30 |
-
# Initialize TrafficSpotManager with minimal data
|
31 |
self.traffic_manager = TrafficSpotManager(self.connection_params)
|
32 |
logging.info(f"Traffic spots initialized, {len(self.traffic_manager.traffic_spots)} spots loaded")
|
33 |
-
|
34 |
-
# Initialize sentence transformer model
|
35 |
try:
|
36 |
-
# Using a sentence transformer model specifically optimized for semantic search
|
37 |
model_name = "sentence-transformers/all-MiniLM-L6-v2"
|
38 |
self.model = SentenceTransformer(model_name)
|
39 |
print(f"Loaded Sentence Transformer model: {model_name}")
|
40 |
except Exception as e:
|
41 |
print(f"Error loading model: {str(e)}")
|
42 |
self.model = None
|
43 |
-
|
44 |
try:
|
45 |
self.neighborhoods = self.get_all_neighborhoods()
|
46 |
self.cached_listings = {}
|
47 |
-
|
48 |
-
self.cached_listings = {}
|
49 |
-
# Pre-cache Southern neighborhood with default limit of 10
|
50 |
-
self.cached_listings["Southern"] = {}
|
51 |
-
self.cached_listings["Southern"][10] = self.get_neighborhood_listings("Southern", 10)
|
52 |
-
self.cached_embeddings = {} # Cache for listing embeddings
|
53 |
except Exception as e:
|
54 |
print(f"Initialization error: {str(e)}")
|
55 |
self.neighborhoods = []
|
@@ -57,26 +56,18 @@ class AirbnbMapVisualiser:
|
|
57 |
self.cached_embeddings = {}
|
58 |
|
59 |
def find_nearest_traffic_spot(self, airbnb_lat, airbnb_lng, max_distance_km=0.7):
|
60 |
-
"""Find the nearest traffic spot within max_distance_km kilometers of an Airbnb listing"""
|
61 |
nearest_spot = None
|
62 |
min_distance = float('inf')
|
63 |
-
|
64 |
-
# Check each traffic spot
|
65 |
for spot in self.traffic_manager.traffic_spots:
|
66 |
if not spot.is_valid():
|
67 |
continue
|
68 |
-
|
69 |
-
# Calculate distance in kilometers
|
70 |
distance = geodesic(
|
71 |
(airbnb_lat, airbnb_lng),
|
72 |
(spot.latitude, spot.longitude)
|
73 |
).kilometers
|
74 |
-
|
75 |
-
# Update nearest if this spot is closer and within max distance
|
76 |
if distance < min_distance and distance <= max_distance_km:
|
77 |
min_distance = distance
|
78 |
nearest_spot = spot
|
79 |
-
|
80 |
if nearest_spot:
|
81 |
return nearest_spot, min_distance
|
82 |
else:
|
@@ -88,12 +79,7 @@ class AirbnbMapVisualiser:
|
|
88 |
cursor = connection.cursor()
|
89 |
cursor.prefetchrows = 50
|
90 |
cursor.arraysize = 50
|
91 |
-
cursor.execute(
|
92 |
-
SELECT DISTINCT NEIGHBOURHOOD
|
93 |
-
FROM airbnb_master_data
|
94 |
-
WHERE NEIGHBOURHOOD IS NOT NULL
|
95 |
-
ORDER BY NEIGHBOURHOOD
|
96 |
-
""")
|
97 |
neighborhoods = [row[0] for row in cursor.fetchall()]
|
98 |
return neighborhoods
|
99 |
except Exception as e:
|
@@ -103,25 +89,12 @@ class AirbnbMapVisualiser:
|
|
103 |
self.pool.release(connection)
|
104 |
|
105 |
def get_neighborhood_listings(self, neighborhood, limit=10):
|
106 |
-
"""
|
107 |
-
Get listings for a neighborhood with a specified limit.
|
108 |
-
|
109 |
-
Args:
|
110 |
-
neighborhood: The neighborhood to get listings for
|
111 |
-
limit: Maximum number of listings to return (10, 20, 30, 40, or 50)
|
112 |
-
|
113 |
-
Returns:
|
114 |
-
List of listings data
|
115 |
-
"""
|
116 |
-
# Ensure limit is one of the allowed values
|
117 |
if limit not in [10, 20, 30, 40, 50]:
|
118 |
-
limit = 10
|
119 |
|
120 |
-
# Check if we already have this neighborhood and limit cached
|
121 |
if neighborhood in self.cached_listings and limit in self.cached_listings[neighborhood]:
|
122 |
return self.cached_listings[neighborhood][limit]
|
123 |
|
124 |
-
# Initialize neighborhood in cache if needed
|
125 |
if neighborhood not in self.cached_listings:
|
126 |
self.cached_listings[neighborhood] = {}
|
127 |
|
@@ -130,22 +103,11 @@ class AirbnbMapVisualiser:
|
|
130 |
cursor = connection.cursor()
|
131 |
cursor.prefetchrows = 50
|
132 |
cursor.arraysize = 50
|
133 |
-
cursor.execute(
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
FROM airbnb_master_data m
|
139 |
-
LEFT JOIN airbnb_reviews_data r ON m.ID = r.LISTING_ID
|
140 |
-
WHERE m.LATITUDE IS NOT NULL
|
141 |
-
AND m.LONGITUDE IS NOT NULL
|
142 |
-
AND m.NEIGHBOURHOOD = :neighborhood
|
143 |
-
GROUP BY m.ID, m.NAME, m.HOST_NAME, m.NEIGHBOURHOOD,
|
144 |
-
m.LATITUDE, m.LONGITUDE, m.ROOM_TYPE, m.PRICE,
|
145 |
-
m.REVIEWS_PER_MONTH, m.MINIMUM_NIGHTS, m.AVAILABILITY_365
|
146 |
-
ORDER BY COUNT(r.LISTING_ID) DESC, m.PRICE ASC
|
147 |
-
FETCH FIRST :limit ROWS ONLY
|
148 |
-
""", neighborhood=neighborhood, limit=limit)
|
149 |
|
150 |
listings = cursor.fetchall()
|
151 |
self.cached_listings[neighborhood][limit] = listings
|
@@ -160,18 +122,10 @@ class AirbnbMapVisualiser:
|
|
160 |
connection = self.pool.acquire()
|
161 |
try:
|
162 |
cursor = connection.cursor()
|
163 |
-
cursor.execute(
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
THEN SUBSTR(COMMENTS, 1, 200) || '...'
|
168 |
-
ELSE COMMENTS
|
169 |
-
END as COMMENTS
|
170 |
-
FROM AIRBNB_REVIEWS_DATA
|
171 |
-
WHERE LISTING_ID = :listing_id
|
172 |
-
AND ROWNUM <= 10
|
173 |
-
ORDER BY REVIEW_DATE DESC
|
174 |
-
""", listing_id=int(listing_id))
|
175 |
|
176 |
reviews = cursor.fetchall()
|
177 |
formatted_reviews = []
|
@@ -192,26 +146,17 @@ class AirbnbMapVisualiser:
|
|
192 |
self.pool.release(connection)
|
193 |
|
194 |
def get_listing_reviews_for_search(self, listing_id):
|
195 |
-
"""Get reviews for search analysis and handle LOB objects correctly"""
|
196 |
connection = self.pool.acquire()
|
197 |
try:
|
198 |
cursor = connection.cursor()
|
199 |
-
cursor.execute(
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
AND COMMENTS IS NOT NULL
|
204 |
-
AND ROWNUM <= 10
|
205 |
-
ORDER BY REVIEW_DATE DESC
|
206 |
-
""", listing_id=int(listing_id))
|
207 |
-
|
208 |
reviews = cursor.fetchall()
|
209 |
-
|
210 |
-
# Properly convert LOB objects to strings
|
211 |
formatted_reviews = []
|
212 |
for review in reviews:
|
213 |
if review[0] is not None:
|
214 |
-
# Check if it's a LOB object and read it
|
215 |
if hasattr(review[0], 'read'):
|
216 |
formatted_reviews.append(review[0].read())
|
217 |
else:
|
@@ -225,35 +170,10 @@ class AirbnbMapVisualiser:
|
|
225 |
finally:
|
226 |
self.pool.release(connection)
|
227 |
|
228 |
-
def get_title_review_embeddings(self, title, reviews):
|
229 |
-
"""Get separate embeddings for title and reviews using Sentence Transformer"""
|
230 |
-
if self.model is None:
|
231 |
-
return None, None
|
232 |
-
|
233 |
-
try:
|
234 |
-
# Encode the title
|
235 |
-
title_embedding = self.model.encode(title, convert_to_tensor=True)
|
236 |
-
|
237 |
-
# Encode reviews if available, otherwise return None
|
238 |
-
review_embedding = None
|
239 |
-
if reviews and len(reviews) > 0:
|
240 |
-
# Concatenate reviews into a single text to get embedding
|
241 |
-
review_text = " ".join(reviews[:5]) # Limit to first 5 reviews
|
242 |
-
review_embedding = self.model.encode(review_text, convert_to_tensor=True)
|
243 |
-
|
244 |
-
return title_embedding, review_embedding
|
245 |
-
|
246 |
-
except Exception as e:
|
247 |
-
print(f"Error getting embeddings: {str(e)}")
|
248 |
-
return None, None
|
249 |
-
|
250 |
def compute_similarity(self, query_embedding, target_embedding):
|
251 |
-
"""Compute cosine similarity between two embeddings"""
|
252 |
if query_embedding is None or target_embedding is None:
|
253 |
return 0.0
|
254 |
-
|
255 |
try:
|
256 |
-
# Use the util function from sentence_transformers for cosine similarity
|
257 |
similarity = util.pytorch_cos_sim(query_embedding, target_embedding).item()
|
258 |
return similarity
|
259 |
except Exception as e:
|
@@ -261,36 +181,24 @@ class AirbnbMapVisualiser:
|
|
261 |
return 0.0
|
262 |
|
263 |
def compute_search_scores(self, df, search_query):
|
264 |
-
"""Compute search scores comparing query with title and reviews separately"""
|
265 |
if not search_query or self.model is None:
|
266 |
return [0.0] * len(df)
|
267 |
-
|
268 |
try:
|
269 |
-
# Encode the search query
|
270 |
query_key = f"query_{search_query}"
|
271 |
if query_key not in self.cached_embeddings:
|
272 |
self.cached_embeddings[query_key] = self.model.encode(search_query, convert_to_tensor=True)
|
273 |
query_embedding = self.cached_embeddings[query_key]
|
274 |
-
|
275 |
-
# Calculate similarity for each listing
|
276 |
scores = []
|
277 |
-
|
278 |
for idx, row in df.iterrows():
|
279 |
-
# Get title and reviews
|
280 |
title = str(row['name'])
|
281 |
reviews = self.get_listing_reviews_for_search(row['id'])
|
282 |
-
|
283 |
-
# Get or compute embeddings
|
284 |
title_key = f"title_{row['id']}"
|
285 |
review_key = f"review_{row['id']}"
|
286 |
-
|
287 |
if title_key not in self.cached_embeddings:
|
288 |
title_embedding = self.model.encode(title, convert_to_tensor=True)
|
289 |
self.cached_embeddings[title_key] = title_embedding
|
290 |
else:
|
291 |
title_embedding = self.cached_embeddings[title_key]
|
292 |
-
|
293 |
-
# Only compute review embedding if we have reviews
|
294 |
review_embedding = None
|
295 |
if reviews and len(reviews) > 0:
|
296 |
if review_key not in self.cached_embeddings:
|
@@ -299,23 +207,12 @@ class AirbnbMapVisualiser:
|
|
299 |
self.cached_embeddings[review_key] = review_embedding
|
300 |
else:
|
301 |
review_embedding = self.cached_embeddings[review_key]
|
302 |
-
|
303 |
-
# Compute similarities
|
304 |
title_similarity = self.compute_similarity(query_embedding, title_embedding)
|
305 |
review_similarity = 0.0
|
306 |
if review_embedding is not None:
|
307 |
review_similarity = self.compute_similarity(query_embedding, review_embedding)
|
308 |
-
|
309 |
-
# Calculate final score - emphasis on reviews if available
|
310 |
-
if review_embedding is not None:
|
311 |
-
# Weight reviews more heavily if there are reviews
|
312 |
-
final_score = title_similarity * 0.4 + review_similarity * 0.6
|
313 |
-
else:
|
314 |
-
# Use only title similarity if no reviews
|
315 |
-
final_score = title_similarity
|
316 |
-
|
317 |
scores.append(final_score)
|
318 |
-
|
319 |
return scores
|
320 |
|
321 |
except Exception as e:
|
@@ -323,93 +220,18 @@ class AirbnbMapVisualiser:
|
|
323 |
return [0.0] * len(df)
|
324 |
|
325 |
def sort_by_relevance(self, df, search_query):
|
326 |
-
"""Sort listings by relevance using sentence transformer comparison"""
|
327 |
if not search_query:
|
328 |
return df
|
329 |
-
|
330 |
-
# Compute semantic similarity scores
|
331 |
scores = self.compute_search_scores(df, search_query)
|
332 |
df['relevance_score'] = scores
|
333 |
df['relevance_percentage'] = df['relevance_score'] * 100
|
334 |
-
|
335 |
-
# Add relevance description
|
336 |
-
def get_relevance_description(score):
|
337 |
-
if score >= 80:
|
338 |
-
return "Perfect match"
|
339 |
-
elif score >= 60:
|
340 |
-
return "Excellent match"
|
341 |
-
elif score >= 40:
|
342 |
-
return "Good match"
|
343 |
-
elif score >= 20:
|
344 |
-
return "Partial match"
|
345 |
-
else:
|
346 |
-
return "Low relevance"
|
347 |
-
|
348 |
-
df['relevance_features'] = df['relevance_percentage'].apply(get_relevance_description)
|
349 |
-
|
350 |
-
# Add match information about which part matched better
|
351 |
-
def get_match_source(row):
|
352 |
-
# Get title and reviews
|
353 |
-
title = str(row['name'])
|
354 |
-
reviews = self.get_listing_reviews_for_search(row['id'])
|
355 |
-
|
356 |
-
# Recompute individual similarities to determine match source
|
357 |
-
title_similarity = 0.0
|
358 |
-
review_similarity = 0.0
|
359 |
-
|
360 |
-
if self.model is not None:
|
361 |
-
query_embedding = self.model.encode(search_query, convert_to_tensor=True)
|
362 |
-
title_embedding = self.model.encode(title, convert_to_tensor=True)
|
363 |
-
title_similarity = self.compute_similarity(query_embedding, title_embedding)
|
364 |
-
|
365 |
-
if reviews and len(reviews) > 0:
|
366 |
-
review_text = " ".join(reviews[:5])
|
367 |
-
review_embedding = self.model.encode(review_text, convert_to_tensor=True)
|
368 |
-
review_similarity = self.compute_similarity(query_embedding, review_embedding)
|
369 |
-
|
370 |
-
# Determine which source matched better
|
371 |
-
if title_similarity > 0.2 and review_similarity > 0:
|
372 |
-
return "Strong match in title and reviews"
|
373 |
-
elif title_similarity > 0.2 and review_similarity > 0.2:
|
374 |
-
return "Strong match in title and strong match in reviews"
|
375 |
-
elif title_similarity > 0.2:
|
376 |
-
return "Strong match in listing title"
|
377 |
-
elif review_similarity > 0.2:
|
378 |
-
return "Strong match in reviews"
|
379 |
-
|
380 |
-
# Only calculate match source if score is above threshold
|
381 |
-
df['matching_features'] = df.apply(
|
382 |
-
lambda row: get_match_source(row) if row['relevance_score'] > 0.2 else "Low semantic match",
|
383 |
-
axis=1
|
384 |
-
)
|
385 |
-
|
386 |
-
# Sort by relevance score
|
387 |
return df.sort_values('relevance_score', ascending=False)
|
388 |
|
389 |
def create_map_and_data(self, neighborhood="Sha Tin", show_traffic=True, center_lat=None, center_lng=None,
|
390 |
selected_id=None, search_query=None, current_page=1, items_per_page=3, listings_limit=10):
|
391 |
-
"""
|
392 |
-
Create a map and dataframe of listings for a neighborhood
|
393 |
-
|
394 |
-
Args:
|
395 |
-
neighborhood: Neighborhood to get listings for
|
396 |
-
show_traffic: Whether to show traffic spots
|
397 |
-
center_lat: Center latitude for the map
|
398 |
-
center_lng: Center longitude for the map
|
399 |
-
selected_id: ID of the selected listing
|
400 |
-
search_query: Search query for filtering listings
|
401 |
-
current_page: Current page of pagination
|
402 |
-
items_per_page: Number of items per page
|
403 |
-
listings_limit: Maximum number of listings to display (10, 20, 30, 40, or 50)
|
404 |
-
|
405 |
-
Returns:
|
406 |
-
Tuple of (map, dataframe)
|
407 |
-
"""
|
408 |
-
# Ensure listings_limit is valid
|
409 |
if listings_limit not in [10, 20, 30, 40, 50]:
|
410 |
listings_limit = 10
|
411 |
|
412 |
-
# Get the listings with the specified limit
|
413 |
listings = self.get_neighborhood_listings(neighborhood, listings_limit)
|
414 |
|
415 |
if not listings:
|
@@ -428,7 +250,6 @@ class AirbnbMapVisualiser:
|
|
428 |
df[col] = pd.to_numeric(df[col], errors='coerce')
|
429 |
|
430 |
if search_query:
|
431 |
-
# Use the sentence transformer semantic search
|
432 |
df = self.sort_by_relevance(df, search_query)
|
433 |
|
434 |
if df.empty:
|
@@ -444,79 +265,49 @@ class AirbnbMapVisualiser:
|
|
444 |
tiles='OpenStreetMap'
|
445 |
)
|
446 |
|
447 |
-
# Calculate pagination indices
|
448 |
-
total_items = len(df)
|
449 |
-
start_idx = (current_page - 1) * items_per_page
|
450 |
-
end_idx = min(start_idx + items_per_page, total_items)
|
451 |
-
|
452 |
-
# Get the current page's listings
|
453 |
-
current_page_df = df.iloc[start_idx:end_idx]
|
454 |
-
|
455 |
-
# Create a list to store all traffic spots we need to display
|
456 |
all_traffic_spots_to_display = set()
|
457 |
-
|
458 |
-
# Find nearest traffic spots for ALL listings
|
459 |
all_nearest_traffic_spots = {}
|
460 |
-
|
461 |
-
# First find all nearest traffic spots
|
462 |
for idx, row in df.iterrows():
|
463 |
nearest_spot, distance = self.find_nearest_traffic_spot(row['latitude'], row['longitude'])
|
464 |
if nearest_spot:
|
465 |
all_nearest_traffic_spots[row['id']] = (nearest_spot, distance)
|
466 |
all_traffic_spots_to_display.add(nearest_spot.key)
|
467 |
|
468 |
-
# Create a feature group for connection lines
|
469 |
lines_group = folium.FeatureGroup(name="Connection Lines")
|
470 |
m.add_child(lines_group)
|
471 |
|
472 |
-
# Display all traffic spots
|
473 |
if show_traffic and all_traffic_spots_to_display:
|
474 |
self.traffic_manager.add_spots_to_map(m, all_traffic_spots_to_display)
|
475 |
|
476 |
-
# Add all Airbnb markers and connection lines
|
477 |
for idx, row in df.iterrows():
|
478 |
marker_id = f"marker_{row['id']}"
|
479 |
-
reviews = self.get_listing_reviews(row['id'])
|
480 |
-
review_button_key = f"review_btn_{row['id']}"
|
481 |
-
|
482 |
-
# Get traffic spot info if available for this listing
|
483 |
traffic_spot_info = ""
|
484 |
discount_info = ""
|
485 |
discounted_price = row['price']
|
486 |
|
487 |
-
# Check if this listing has a nearest traffic spot
|
488 |
if row['id'] in all_nearest_traffic_spots:
|
489 |
nearest_spot, distance = all_nearest_traffic_spots[row['id']]
|
490 |
-
|
491 |
-
# Get discount rate and apply to price
|
492 |
discount_rate = nearest_spot.get_discount_rate()
|
|
|
493 |
if discount_rate > 0:
|
494 |
discounted_price = row['price'] * (1 - discount_rate)
|
495 |
discount_percentage = int(discount_rate * 100)
|
496 |
|
497 |
-
|
498 |
-
|
499 |
-
|
500 |
-
|
501 |
-
|
502 |
-
|
503 |
-
|
504 |
-
"""
|
505 |
|
506 |
-
# Format distance for display (convert to meters if less than 1km)
|
507 |
distance_str = f"{distance:.2f} km" if distance >= 0.1 else f"{distance * 1000:.0f} meters"
|
508 |
|
509 |
-
traffic_spot_info =
|
510 |
-
|
511 |
-
|
512 |
-
|
513 |
-
|
514 |
-
<strong>Distance:</strong> {distance_str}
|
515 |
-
</p>
|
516 |
-
</div>
|
517 |
-
"""
|
518 |
-
|
519 |
-
# Add connection lines for ALL listings with nearby traffic spots
|
520 |
folium.PolyLine(
|
521 |
locations=[
|
522 |
[row['latitude'], row['longitude']],
|
@@ -531,35 +322,28 @@ class AirbnbMapVisualiser:
|
|
531 |
|
532 |
relevance_info = ""
|
533 |
if search_query and 'relevance_percentage' in row and 'relevance_features' in row:
|
534 |
-
relevance_info =
|
535 |
-
|
536 |
-
|
537 |
-
|
538 |
-
|
539 |
-
|
540 |
-
<br/>
|
541 |
-
<strong>Match Type:</strong> {row['matching_features']}
|
542 |
-
</p>
|
543 |
-
</div>
|
544 |
-
"""
|
545 |
-
|
546 |
-
# Show price with strikethrough if discounted
|
547 |
price_display = f"<strong>Price:</strong> ${row['price']:.0f}"
|
548 |
if discount_info:
|
549 |
-
price_display = f"<strong>Price:</strong>
|
550 |
-
|
551 |
-
|
552 |
-
|
553 |
-
|
554 |
-
|
555 |
-
|
556 |
-
|
557 |
-
|
558 |
-
|
559 |
-
|
560 |
-
|
561 |
-
|
562 |
-
|
563 |
|
564 |
marker_color = 'green' if selected_id == row['id'] else 'red'
|
565 |
marker = folium.Marker(
|
@@ -572,30 +356,7 @@ class AirbnbMapVisualiser:
|
|
572 |
if selected_id is not None and row['id'] == selected_id:
|
573 |
marker._name = marker_id
|
574 |
|
575 |
-
|
576 |
-
folium.Element("""
|
577 |
-
<script>
|
578 |
-
function showTrafficSpot(lat, lng) {
|
579 |
-
// Get the map object
|
580 |
-
var map = document.querySelector('.folium-map')._leaflet_map;
|
581 |
-
|
582 |
-
// Pan to the traffic spot and zoom in
|
583 |
-
map.setView([lat, lng], 18);
|
584 |
-
|
585 |
-
// Find and open the popup for the traffic spot marker
|
586 |
-
map.eachLayer(function(layer) {
|
587 |
-
if (layer instanceof L.Marker) {
|
588 |
-
var latLng = layer.getLatLng();
|
589 |
-
if (Math.abs(latLng.lat - lat) < 0.0001 && Math.abs(latLng.lng - lng) < 0.0001) {
|
590 |
-
layer.openPopup();
|
591 |
-
}
|
592 |
-
}
|
593 |
-
});
|
594 |
-
}
|
595 |
-
</script>
|
596 |
-
""").add_to(m)
|
597 |
-
|
598 |
-
# Add layer control to toggle connection lines
|
599 |
folium.LayerControl().add_to(m)
|
600 |
|
601 |
return m, df
|
|
|
6 |
from geopy.distance import geodesic
|
7 |
import logging
|
8 |
|
9 |
+
from TDTrafficSpot import TrafficSpotManager
|
10 |
+
from HKUSTBNBConstant import (
|
11 |
+
GET_ALL_NEIGHBORHOODS,
|
12 |
+
GET_NEIGHBORHOOD_LISTINGS,
|
13 |
+
GET_LISTING_REVIEWS,
|
14 |
+
GET_LISTING_REVIEWS_FOR_SEARCH,
|
15 |
+
DISCOUNT_INFO_TEMPLATE,
|
16 |
+
TRAFFIC_SPOT_INFO_TEMPLATE,
|
17 |
+
RELEVANCE_INFO_TEMPLATE,
|
18 |
+
POPUP_CONTENT_TEMPLATE,
|
19 |
+
MAP_SCRIPT
|
20 |
+
)
|
21 |
+
|
22 |
+
|
23 |
+
class HKUSTBNBVisualiser:
|
24 |
def __init__(self):
|
25 |
self.connection_params = {
|
26 |
'user': 'slliac',
|
|
|
36 |
increment=1,
|
37 |
getmode=oracledb.SPOOL_ATTRVAL_WAIT
|
38 |
)
|
|
|
|
|
39 |
self.traffic_manager = TrafficSpotManager(self.connection_params)
|
40 |
logging.info(f"Traffic spots initialized, {len(self.traffic_manager.traffic_spots)} spots loaded")
|
|
|
|
|
41 |
try:
|
|
|
42 |
model_name = "sentence-transformers/all-MiniLM-L6-v2"
|
43 |
self.model = SentenceTransformer(model_name)
|
44 |
print(f"Loaded Sentence Transformer model: {model_name}")
|
45 |
except Exception as e:
|
46 |
print(f"Error loading model: {str(e)}")
|
47 |
self.model = None
|
|
|
48 |
try:
|
49 |
self.neighborhoods = self.get_all_neighborhoods()
|
50 |
self.cached_listings = {}
|
51 |
+
self.cached_embeddings = {}
|
|
|
|
|
|
|
|
|
|
|
52 |
except Exception as e:
|
53 |
print(f"Initialization error: {str(e)}")
|
54 |
self.neighborhoods = []
|
|
|
56 |
self.cached_embeddings = {}
|
57 |
|
58 |
def find_nearest_traffic_spot(self, airbnb_lat, airbnb_lng, max_distance_km=0.7):
|
|
|
59 |
nearest_spot = None
|
60 |
min_distance = float('inf')
|
|
|
|
|
61 |
for spot in self.traffic_manager.traffic_spots:
|
62 |
if not spot.is_valid():
|
63 |
continue
|
|
|
|
|
64 |
distance = geodesic(
|
65 |
(airbnb_lat, airbnb_lng),
|
66 |
(spot.latitude, spot.longitude)
|
67 |
).kilometers
|
|
|
|
|
68 |
if distance < min_distance and distance <= max_distance_km:
|
69 |
min_distance = distance
|
70 |
nearest_spot = spot
|
|
|
71 |
if nearest_spot:
|
72 |
return nearest_spot, min_distance
|
73 |
else:
|
|
|
79 |
cursor = connection.cursor()
|
80 |
cursor.prefetchrows = 50
|
81 |
cursor.arraysize = 50
|
82 |
+
cursor.execute(GET_ALL_NEIGHBORHOODS)
|
|
|
|
|
|
|
|
|
|
|
83 |
neighborhoods = [row[0] for row in cursor.fetchall()]
|
84 |
return neighborhoods
|
85 |
except Exception as e:
|
|
|
89 |
self.pool.release(connection)
|
90 |
|
91 |
def get_neighborhood_listings(self, neighborhood, limit=10):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
if limit not in [10, 20, 30, 40, 50]:
|
93 |
+
limit = 10
|
94 |
|
|
|
95 |
if neighborhood in self.cached_listings and limit in self.cached_listings[neighborhood]:
|
96 |
return self.cached_listings[neighborhood][limit]
|
97 |
|
|
|
98 |
if neighborhood not in self.cached_listings:
|
99 |
self.cached_listings[neighborhood] = {}
|
100 |
|
|
|
103 |
cursor = connection.cursor()
|
104 |
cursor.prefetchrows = 50
|
105 |
cursor.arraysize = 50
|
106 |
+
cursor.execute(
|
107 |
+
GET_NEIGHBORHOOD_LISTINGS,
|
108 |
+
neighborhood=neighborhood,
|
109 |
+
limit=limit
|
110 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
|
112 |
listings = cursor.fetchall()
|
113 |
self.cached_listings[neighborhood][limit] = listings
|
|
|
122 |
connection = self.pool.acquire()
|
123 |
try:
|
124 |
cursor = connection.cursor()
|
125 |
+
cursor.execute(
|
126 |
+
GET_LISTING_REVIEWS,
|
127 |
+
listing_id=int(listing_id)
|
128 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
|
130 |
reviews = cursor.fetchall()
|
131 |
formatted_reviews = []
|
|
|
146 |
self.pool.release(connection)
|
147 |
|
148 |
def get_listing_reviews_for_search(self, listing_id):
|
|
|
149 |
connection = self.pool.acquire()
|
150 |
try:
|
151 |
cursor = connection.cursor()
|
152 |
+
cursor.execute(
|
153 |
+
GET_LISTING_REVIEWS_FOR_SEARCH,
|
154 |
+
listing_id=int(listing_id)
|
155 |
+
)
|
|
|
|
|
|
|
|
|
|
|
156 |
reviews = cursor.fetchall()
|
|
|
|
|
157 |
formatted_reviews = []
|
158 |
for review in reviews:
|
159 |
if review[0] is not None:
|
|
|
160 |
if hasattr(review[0], 'read'):
|
161 |
formatted_reviews.append(review[0].read())
|
162 |
else:
|
|
|
170 |
finally:
|
171 |
self.pool.release(connection)
|
172 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
def compute_similarity(self, query_embedding, target_embedding):
|
|
|
174 |
if query_embedding is None or target_embedding is None:
|
175 |
return 0.0
|
|
|
176 |
try:
|
|
|
177 |
similarity = util.pytorch_cos_sim(query_embedding, target_embedding).item()
|
178 |
return similarity
|
179 |
except Exception as e:
|
|
|
181 |
return 0.0
|
182 |
|
183 |
def compute_search_scores(self, df, search_query):
|
|
|
184 |
if not search_query or self.model is None:
|
185 |
return [0.0] * len(df)
|
|
|
186 |
try:
|
|
|
187 |
query_key = f"query_{search_query}"
|
188 |
if query_key not in self.cached_embeddings:
|
189 |
self.cached_embeddings[query_key] = self.model.encode(search_query, convert_to_tensor=True)
|
190 |
query_embedding = self.cached_embeddings[query_key]
|
|
|
|
|
191 |
scores = []
|
|
|
192 |
for idx, row in df.iterrows():
|
|
|
193 |
title = str(row['name'])
|
194 |
reviews = self.get_listing_reviews_for_search(row['id'])
|
|
|
|
|
195 |
title_key = f"title_{row['id']}"
|
196 |
review_key = f"review_{row['id']}"
|
|
|
197 |
if title_key not in self.cached_embeddings:
|
198 |
title_embedding = self.model.encode(title, convert_to_tensor=True)
|
199 |
self.cached_embeddings[title_key] = title_embedding
|
200 |
else:
|
201 |
title_embedding = self.cached_embeddings[title_key]
|
|
|
|
|
202 |
review_embedding = None
|
203 |
if reviews and len(reviews) > 0:
|
204 |
if review_key not in self.cached_embeddings:
|
|
|
207 |
self.cached_embeddings[review_key] = review_embedding
|
208 |
else:
|
209 |
review_embedding = self.cached_embeddings[review_key]
|
|
|
|
|
210 |
title_similarity = self.compute_similarity(query_embedding, title_embedding)
|
211 |
review_similarity = 0.0
|
212 |
if review_embedding is not None:
|
213 |
review_similarity = self.compute_similarity(query_embedding, review_embedding)
|
214 |
+
final_score = title_similarity * 0.7 + review_similarity * 0.3 if review_embedding is not None else title_similarity
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
scores.append(final_score)
|
|
|
216 |
return scores
|
217 |
|
218 |
except Exception as e:
|
|
|
220 |
return [0.0] * len(df)
|
221 |
|
222 |
def sort_by_relevance(self, df, search_query):
|
|
|
223 |
if not search_query:
|
224 |
return df
|
|
|
|
|
225 |
scores = self.compute_search_scores(df, search_query)
|
226 |
df['relevance_score'] = scores
|
227 |
df['relevance_percentage'] = df['relevance_score'] * 100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
228 |
return df.sort_values('relevance_score', ascending=False)
|
229 |
|
230 |
def create_map_and_data(self, neighborhood="Sha Tin", show_traffic=True, center_lat=None, center_lng=None,
|
231 |
selected_id=None, search_query=None, current_page=1, items_per_page=3, listings_limit=10):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
232 |
if listings_limit not in [10, 20, 30, 40, 50]:
|
233 |
listings_limit = 10
|
234 |
|
|
|
235 |
listings = self.get_neighborhood_listings(neighborhood, listings_limit)
|
236 |
|
237 |
if not listings:
|
|
|
250 |
df[col] = pd.to_numeric(df[col], errors='coerce')
|
251 |
|
252 |
if search_query:
|
|
|
253 |
df = self.sort_by_relevance(df, search_query)
|
254 |
|
255 |
if df.empty:
|
|
|
265 |
tiles='OpenStreetMap'
|
266 |
)
|
267 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
268 |
all_traffic_spots_to_display = set()
|
|
|
|
|
269 |
all_nearest_traffic_spots = {}
|
|
|
|
|
270 |
for idx, row in df.iterrows():
|
271 |
nearest_spot, distance = self.find_nearest_traffic_spot(row['latitude'], row['longitude'])
|
272 |
if nearest_spot:
|
273 |
all_nearest_traffic_spots[row['id']] = (nearest_spot, distance)
|
274 |
all_traffic_spots_to_display.add(nearest_spot.key)
|
275 |
|
|
|
276 |
lines_group = folium.FeatureGroup(name="Connection Lines")
|
277 |
m.add_child(lines_group)
|
278 |
|
|
|
279 |
if show_traffic and all_traffic_spots_to_display:
|
280 |
self.traffic_manager.add_spots_to_map(m, all_traffic_spots_to_display)
|
281 |
|
|
|
282 |
for idx, row in df.iterrows():
|
283 |
marker_id = f"marker_{row['id']}"
|
|
|
|
|
|
|
|
|
284 |
traffic_spot_info = ""
|
285 |
discount_info = ""
|
286 |
discounted_price = row['price']
|
287 |
|
|
|
288 |
if row['id'] in all_nearest_traffic_spots:
|
289 |
nearest_spot, distance = all_nearest_traffic_spots[row['id']]
|
|
|
|
|
290 |
discount_rate = nearest_spot.get_discount_rate()
|
291 |
+
|
292 |
if discount_rate > 0:
|
293 |
discounted_price = row['price'] * (1 - discount_rate)
|
294 |
discount_percentage = int(discount_rate * 100)
|
295 |
|
296 |
+
discount_info = DISCOUNT_INFO_TEMPLATE.format(
|
297 |
+
discount_percentage=discount_percentage,
|
298 |
+
original_price=row['price'],
|
299 |
+
discounted_price=discounted_price,
|
300 |
+
avg_vehicle_count=nearest_spot.avg_vehicle_count,
|
301 |
+
observation_count=len(nearest_spot.dataset_rows)
|
302 |
+
)
|
|
|
303 |
|
|
|
304 |
distance_str = f"{distance:.2f} km" if distance >= 0.1 else f"{distance * 1000:.0f} meters"
|
305 |
|
306 |
+
traffic_spot_info = TRAFFIC_SPOT_INFO_TEMPLATE.format(
|
307 |
+
spot_key=escape(str(nearest_spot.key)),
|
308 |
+
distance_str=distance_str
|
309 |
+
)
|
310 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
folium.PolyLine(
|
312 |
locations=[
|
313 |
[row['latitude'], row['longitude']],
|
|
|
322 |
|
323 |
relevance_info = ""
|
324 |
if search_query and 'relevance_percentage' in row and 'relevance_features' in row:
|
325 |
+
relevance_info = RELEVANCE_INFO_TEMPLATE.format(
|
326 |
+
relevance_percentage=row['relevance_percentage'],
|
327 |
+
relevance_features=row['relevance_features'],
|
328 |
+
matching_features=row['matching_features']
|
329 |
+
)
|
330 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
331 |
price_display = f"<strong>Price:</strong> ${row['price']:.0f}"
|
332 |
if discount_info:
|
333 |
+
price_display = (f"<strong>Price:</strong> "
|
334 |
+
f"<span style='text-decoration: line-through;'>${row['price']:.0f}</span> "
|
335 |
+
f"<span style='color: #2e7d32; font-weight: bold;'>${discounted_price:.0f}</span>")
|
336 |
+
|
337 |
+
popup_content = POPUP_CONTENT_TEMPLATE.format(
|
338 |
+
listing_name=escape(str(row['name'])),
|
339 |
+
host_name=escape(str(row['host_name'])),
|
340 |
+
room_type=escape(str(row['room_type'])),
|
341 |
+
price_display=price_display,
|
342 |
+
review_count=row['number_of_reviews'],
|
343 |
+
discount_info=discount_info,
|
344 |
+
traffic_spot_info=traffic_spot_info,
|
345 |
+
relevance_info=relevance_info
|
346 |
+
)
|
347 |
|
348 |
marker_color = 'green' if selected_id == row['id'] else 'red'
|
349 |
marker = folium.Marker(
|
|
|
356 |
if selected_id is not None and row['id'] == selected_id:
|
357 |
marker._name = marker_id
|
358 |
|
359 |
+
folium.Element(MAP_SCRIPT).add_to(m)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
360 |
folium.LayerControl().add_to(m)
|
361 |
|
362 |
return m, df
|
TrafficSpot.py → TDTrafficSpot.py
RENAMED
@@ -5,46 +5,56 @@ import base64
|
|
5 |
import numpy as np
|
6 |
from html import escape
|
7 |
from datasets import load_dataset
|
8 |
-
from
|
9 |
-
|
10 |
-
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
def __init__(self, key, latitude, longitude, dataset_rows=None):
|
13 |
self.key = key
|
14 |
self.latitude = float(latitude) if latitude is not None else None
|
15 |
self.longitude = float(longitude) if longitude is not None else None
|
16 |
-
self.dataset_rows = dataset_rows or []
|
17 |
self.avg_vehicle_count = self.calculate_avg_vehicle_count()
|
|
|
18 |
|
19 |
def is_valid(self):
|
20 |
return self.latitude is not None and self.longitude is not None
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
def calculate_avg_vehicle_count(self):
|
23 |
-
"""Calculate average vehicle count from the recent data"""
|
24 |
if not self.dataset_rows:
|
25 |
return 0
|
26 |
|
27 |
-
# Extract vehicle counts from dataset rows
|
28 |
vehicle_counts = [row.get('vehicle_count', 0) for row in self.dataset_rows if 'vehicle_count' in row]
|
29 |
|
30 |
-
# If no valid counts are found, return 0
|
31 |
if not vehicle_counts:
|
32 |
return 0
|
33 |
|
34 |
-
# Calculate and return the average
|
35 |
return np.mean(vehicle_counts)
|
36 |
|
37 |
def get_discount_rate(self):
|
38 |
-
"""Calculate discount rate based on average vehicle count"""
|
39 |
if self.avg_vehicle_count < 2:
|
40 |
-
return 0.20
|
41 |
elif self.avg_vehicle_count < 5:
|
42 |
-
return 0.10
|
43 |
else:
|
44 |
-
return 0.0
|
45 |
|
46 |
def get_discount_info(self):
|
47 |
-
"""Get discount information as a formatted string"""
|
48 |
discount_rate = self.get_discount_rate()
|
49 |
|
50 |
if discount_rate <= 0:
|
@@ -57,55 +67,50 @@ class TrafficSpot:
|
|
57 |
discount_display = ""
|
58 |
|
59 |
if "discount" in discount_info.lower() and "no" not in discount_info.lower():
|
60 |
-
discount_display =
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
if
|
74 |
-
html +=
|
75 |
-
|
76 |
-
|
|
|
|
|
|
|
77 |
image_data = row.get('processed_image')
|
78 |
image_html = ""
|
79 |
if image_data:
|
80 |
try:
|
81 |
-
# Encode binary data to base64
|
82 |
base64_encoded = base64.b64encode(image_data).decode('utf-8')
|
83 |
-
|
84 |
-
image_html = f"""
|
85 |
-
<img src='data:image/jpeg;base64,{base64_encoded}'
|
86 |
-
style='max-width: 100px; max-height: 100px; margin: 5px 0;'
|
87 |
-
alt='Processed Image'>
|
88 |
-
"""
|
89 |
except Exception as e:
|
90 |
logging.error(f"Error encoding image for {self.key}: {str(e)}")
|
91 |
image_html = "<p>Image load failed</p>"
|
92 |
|
93 |
-
html +=
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
</div>
|
99 |
-
"""
|
100 |
else:
|
101 |
-
html +=
|
102 |
|
103 |
html += "</div>"
|
104 |
return html
|
105 |
|
106 |
def add_to_map(self, folium_map):
|
107 |
if self.is_valid():
|
108 |
-
|
109 |
if self.avg_vehicle_count < 2:
|
110 |
color = 'blue' # Low traffic - 20% discount
|
111 |
elif self.avg_vehicle_count < 5:
|
@@ -124,64 +129,63 @@ class TrafficSpotManager:
|
|
124 |
def __init__(self, connection_params):
|
125 |
self.connection_params = connection_params
|
126 |
self.traffic_spots = []
|
127 |
-
self.spot_dict = {}
|
128 |
-
# Only load limited spots when initialized
|
129 |
self.load_limited_traffic_spots()
|
130 |
|
131 |
def load_limited_traffic_spots(self, limit=10):
|
132 |
-
"""Load only a very limited set of traffic spots initially"""
|
133 |
try:
|
134 |
dataset = load_dataset("slliac/isom5240-td-application-traffic-analysis", split="application")
|
135 |
-
dataset_list =
|
136 |
-
dataset_list.sort(key=lambda x: x['capture_time'], reverse=True)
|
137 |
|
138 |
-
|
139 |
-
dataset_dict = {}
|
140 |
-
unique_count = 0
|
141 |
for row in dataset_list:
|
142 |
loc_id = row['location_id']
|
143 |
-
if
|
144 |
-
|
|
|
145 |
|
146 |
-
|
147 |
-
|
148 |
-
|
|
|
|
|
|
|
149 |
|
150 |
-
|
151 |
-
|
152 |
|
153 |
-
|
154 |
-
|
|
|
|
|
|
|
|
|
|
|
155 |
|
156 |
with oracledb.connect(**self.connection_params) as conn:
|
157 |
cursor = conn.cursor()
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
AND LATITUDE IS NOT NULL
|
163 |
-
AND LONGITUDE IS NOT NULL
|
164 |
-
""".format(','.join([':' + str(i + 1) for i in range(len(location_ids))]))
|
165 |
|
166 |
cursor.execute(query, location_ids)
|
167 |
spots = cursor.fetchall()
|
168 |
|
169 |
self.traffic_spots = [
|
170 |
-
|
171 |
spot[0],
|
172 |
spot[1],
|
173 |
spot[2],
|
174 |
-
|
175 |
)
|
176 |
for spot in spots
|
177 |
]
|
178 |
|
179 |
-
# Build lookup dictionary
|
180 |
for spot in self.traffic_spots:
|
181 |
self.spot_dict[spot.key] = spot
|
182 |
|
183 |
-
|
184 |
-
logging.info(f"Loaded {len(self.traffic_spots)} limited traffic spots")
|
185 |
|
186 |
except Exception as e:
|
187 |
logging.error(f"Error loading traffic spots: {str(e)}")
|
@@ -189,8 +193,6 @@ class TrafficSpotManager:
|
|
189 |
self.spot_dict = {}
|
190 |
|
191 |
def load_specific_traffic_spots(self, keys):
|
192 |
-
"""Load specific traffic spots by their keys"""
|
193 |
-
# Filter out keys we already have
|
194 |
needed_keys = [key for key in keys if key not in self.spot_dict]
|
195 |
|
196 |
if not needed_keys:
|
@@ -198,77 +200,58 @@ class TrafficSpotManager:
|
|
198 |
|
199 |
try:
|
200 |
dataset = load_dataset("slliac/isom5240-td-application-traffic-analysis", split="application")
|
201 |
-
dataset_list =
|
202 |
-
dataset_list.sort(key=lambda x: x['capture_time'], reverse=True)
|
203 |
|
204 |
-
|
205 |
for row in dataset_list:
|
206 |
loc_id = row['location_id']
|
207 |
if loc_id in needed_keys:
|
208 |
-
if loc_id not in
|
209 |
-
|
210 |
-
|
211 |
-
dataset_dict[loc_id].append(row)
|
212 |
|
213 |
-
|
214 |
-
if needed_keys:
|
215 |
with oracledb.connect(**self.connection_params) as conn:
|
216 |
cursor = conn.cursor()
|
217 |
|
218 |
-
# Prepare placeholders for the IN clause
|
219 |
placeholders = ','.join([':' + str(i + 1) for i in range(len(needed_keys))])
|
220 |
|
221 |
-
query =
|
222 |
-
SELECT KEY, LATITUDE, LONGITUDE
|
223 |
-
FROM TD_TRAFFIC_CAMERA_LOCATION
|
224 |
-
WHERE KEY IN ({placeholders})
|
225 |
-
AND LATITUDE IS NOT NULL
|
226 |
-
AND LONGITUDE IS NOT NULL
|
227 |
-
"""
|
228 |
|
229 |
cursor.execute(query, tuple(needed_keys))
|
230 |
spots = cursor.fetchall()
|
231 |
|
232 |
new_spots = [
|
233 |
-
|
234 |
spot[0],
|
235 |
spot[1],
|
236 |
spot[2],
|
237 |
-
|
238 |
)
|
239 |
for spot in spots
|
240 |
]
|
241 |
|
242 |
-
# Add to our collections
|
243 |
for spot in new_spots:
|
244 |
self.spot_dict[spot.key] = spot
|
245 |
self.traffic_spots.append(spot)
|
246 |
|
247 |
-
|
248 |
-
logging.info(f"Loaded {len(new_spots)} additional traffic spots")
|
249 |
|
250 |
except Exception as e:
|
251 |
logging.error(f"Error loading specific traffic spots: {str(e)}")
|
252 |
|
253 |
def add_spots_to_map(self, folium_map, spot_keys=None):
|
254 |
-
"""Add only specific spots to map"""
|
255 |
if spot_keys is None:
|
256 |
-
# If no keys specified, add all loaded spots
|
257 |
for spot in self.traffic_spots:
|
258 |
spot.add_to_map(folium_map)
|
259 |
else:
|
260 |
-
# Add only the specified spots
|
261 |
for key in spot_keys:
|
262 |
if key in self.spot_dict:
|
263 |
self.spot_dict[key].add_to_map(folium_map)
|
264 |
|
265 |
def get_spot_by_key(self, key):
|
266 |
-
"""Get a traffic spot by its key, loading it if necessary"""
|
267 |
if key in self.spot_dict:
|
268 |
return self.spot_dict[key]
|
269 |
|
270 |
-
# Try to load it if we don't have it
|
271 |
self.load_specific_traffic_spots([key])
|
272 |
-
|
273 |
-
# Return if found, None otherwise
|
274 |
return self.spot_dict.get(key)
|
|
|
5 |
import numpy as np
|
6 |
from html import escape
|
7 |
from datasets import load_dataset
|
8 |
+
from HKUSTBNBConstant import (
|
9 |
+
GET_TRAFFIC_CAMERA_LOCATIONS,
|
10 |
+
TRAFFIC_DISCOUNT_DISPLAY,
|
11 |
+
TRAFFIC_POPUP_BASE,
|
12 |
+
TRAFFIC_RECORDS_HEADER,
|
13 |
+
TRAFFIC_RECORD_ENTRY,
|
14 |
+
TRAFFIC_IMAGE_HTML,
|
15 |
+
TRAFFIC_NO_RECORDS
|
16 |
+
)
|
17 |
+
|
18 |
+
|
19 |
+
class TDTrafficSpot:
|
20 |
def __init__(self, key, latitude, longitude, dataset_rows=None):
|
21 |
self.key = key
|
22 |
self.latitude = float(latitude) if latitude is not None else None
|
23 |
self.longitude = float(longitude) if longitude is not None else None
|
24 |
+
self.dataset_rows = dataset_rows or []
|
25 |
self.avg_vehicle_count = self.calculate_avg_vehicle_count()
|
26 |
+
self.recent_display_rows = self.get_recent_display_rows()
|
27 |
|
28 |
def is_valid(self):
|
29 |
return self.latitude is not None and self.longitude is not None
|
30 |
|
31 |
+
def get_recent_display_rows(self, max_display=2):
|
32 |
+
if not self.dataset_rows:
|
33 |
+
return []
|
34 |
+
|
35 |
+
sorted_rows = sorted(self.dataset_rows, key=lambda x: x['capture_time'], reverse=True)
|
36 |
+
return sorted_rows[:max_display]
|
37 |
+
|
38 |
def calculate_avg_vehicle_count(self):
|
|
|
39 |
if not self.dataset_rows:
|
40 |
return 0
|
41 |
|
|
|
42 |
vehicle_counts = [row.get('vehicle_count', 0) for row in self.dataset_rows if 'vehicle_count' in row]
|
43 |
|
|
|
44 |
if not vehicle_counts:
|
45 |
return 0
|
46 |
|
|
|
47 |
return np.mean(vehicle_counts)
|
48 |
|
49 |
def get_discount_rate(self):
|
|
|
50 |
if self.avg_vehicle_count < 2:
|
51 |
+
return 0.20
|
52 |
elif self.avg_vehicle_count < 5:
|
53 |
+
return 0.10
|
54 |
else:
|
55 |
+
return 0.0
|
56 |
|
57 |
def get_discount_info(self):
|
|
|
58 |
discount_rate = self.get_discount_rate()
|
59 |
|
60 |
if discount_rate <= 0:
|
|
|
67 |
discount_display = ""
|
68 |
|
69 |
if "discount" in discount_info.lower() and "no" not in discount_info.lower():
|
70 |
+
discount_display = TRAFFIC_DISCOUNT_DISPLAY.format(
|
71 |
+
discount_info=discount_info,
|
72 |
+
avg_vehicle_count=self.avg_vehicle_count,
|
73 |
+
observation_count=len(self.dataset_rows)
|
74 |
+
)
|
75 |
+
|
76 |
+
html = TRAFFIC_POPUP_BASE.format(
|
77 |
+
location_id=escape(str(self.key)),
|
78 |
+
discount_display=discount_display
|
79 |
+
)
|
80 |
+
|
81 |
+
recent_rows = self.recent_display_rows
|
82 |
+
|
83 |
+
if recent_rows:
|
84 |
+
html += TRAFFIC_RECORDS_HEADER.format(
|
85 |
+
recent_count=len(recent_rows),
|
86 |
+
total_count=len(self.dataset_rows)
|
87 |
+
)
|
88 |
+
|
89 |
+
for row in recent_rows:
|
90 |
image_data = row.get('processed_image')
|
91 |
image_html = ""
|
92 |
if image_data:
|
93 |
try:
|
|
|
94 |
base64_encoded = base64.b64encode(image_data).decode('utf-8')
|
95 |
+
image_html = TRAFFIC_IMAGE_HTML.format(base64_encoded=base64_encoded)
|
|
|
|
|
|
|
|
|
|
|
96 |
except Exception as e:
|
97 |
logging.error(f"Error encoding image for {self.key}: {str(e)}")
|
98 |
image_html = "<p>Image load failed</p>"
|
99 |
|
100 |
+
html += TRAFFIC_RECORD_ENTRY.format(
|
101 |
+
capture_time=escape(str(row['capture_time'])),
|
102 |
+
vehicle_count=escape(str(row['vehicle_count'])),
|
103 |
+
image_html=image_html
|
104 |
+
)
|
|
|
|
|
105 |
else:
|
106 |
+
html += TRAFFIC_NO_RECORDS
|
107 |
|
108 |
html += "</div>"
|
109 |
return html
|
110 |
|
111 |
def add_to_map(self, folium_map):
|
112 |
if self.is_valid():
|
113 |
+
|
114 |
if self.avg_vehicle_count < 2:
|
115 |
color = 'blue' # Low traffic - 20% discount
|
116 |
elif self.avg_vehicle_count < 5:
|
|
|
129 |
def __init__(self, connection_params):
|
130 |
self.connection_params = connection_params
|
131 |
self.traffic_spots = []
|
132 |
+
self.spot_dict = {}
|
|
|
133 |
self.load_limited_traffic_spots()
|
134 |
|
135 |
def load_limited_traffic_spots(self, limit=10):
|
|
|
136 |
try:
|
137 |
dataset = load_dataset("slliac/isom5240-td-application-traffic-analysis", split="application")
|
138 |
+
dataset_list = list(dataset)
|
|
|
139 |
|
140 |
+
location_data = {}
|
|
|
|
|
141 |
for row in dataset_list:
|
142 |
loc_id = row['location_id']
|
143 |
+
if loc_id not in location_data:
|
144 |
+
location_data[loc_id] = []
|
145 |
+
location_data[loc_id].append(row)
|
146 |
|
147 |
+
if len(location_data) > limit:
|
148 |
+
recent_activities = {}
|
149 |
+
for loc_id, rows in location_data.items():
|
150 |
+
if rows:
|
151 |
+
most_recent = max(rows, key=lambda x: x['capture_time'])
|
152 |
+
recent_activities[loc_id] = most_recent['capture_time']
|
153 |
|
154 |
+
top_locations = sorted(recent_activities.items(), key=lambda x: x[1], reverse=True)[:limit]
|
155 |
+
selected_locations = [loc_id for loc_id, _ in top_locations]
|
156 |
|
157 |
+
location_data = {loc_id: location_data[loc_id] for loc_id in selected_locations}
|
158 |
+
|
159 |
+
if not location_data:
|
160 |
+
logging.warning("No locations found in dataset")
|
161 |
+
return
|
162 |
+
|
163 |
+
location_ids = tuple(location_data.keys())
|
164 |
|
165 |
with oracledb.connect(**self.connection_params) as conn:
|
166 |
cursor = conn.cursor()
|
167 |
+
|
168 |
+
placeholders = ','.join([':' + str(i + 1) for i in range(len(location_ids))])
|
169 |
+
|
170 |
+
query = GET_TRAFFIC_CAMERA_LOCATIONS.format(placeholders=placeholders)
|
|
|
|
|
|
|
171 |
|
172 |
cursor.execute(query, location_ids)
|
173 |
spots = cursor.fetchall()
|
174 |
|
175 |
self.traffic_spots = [
|
176 |
+
TDTrafficSpot(
|
177 |
spot[0],
|
178 |
spot[1],
|
179 |
spot[2],
|
180 |
+
location_data.get(spot[0], [])
|
181 |
)
|
182 |
for spot in spots
|
183 |
]
|
184 |
|
|
|
185 |
for spot in self.traffic_spots:
|
186 |
self.spot_dict[spot.key] = spot
|
187 |
|
188 |
+
logging.info(f"Loaded {len(self.traffic_spots)} traffic spots with full historical data")
|
|
|
189 |
|
190 |
except Exception as e:
|
191 |
logging.error(f"Error loading traffic spots: {str(e)}")
|
|
|
193 |
self.spot_dict = {}
|
194 |
|
195 |
def load_specific_traffic_spots(self, keys):
|
|
|
|
|
196 |
needed_keys = [key for key in keys if key not in self.spot_dict]
|
197 |
|
198 |
if not needed_keys:
|
|
|
200 |
|
201 |
try:
|
202 |
dataset = load_dataset("slliac/isom5240-td-application-traffic-analysis", split="application")
|
203 |
+
dataset_list = list(dataset)
|
|
|
204 |
|
205 |
+
location_data = {}
|
206 |
for row in dataset_list:
|
207 |
loc_id = row['location_id']
|
208 |
if loc_id in needed_keys:
|
209 |
+
if loc_id not in location_data:
|
210 |
+
location_data[loc_id] = []
|
211 |
+
location_data[loc_id].append(row)
|
|
|
212 |
|
213 |
+
if location_data and needed_keys:
|
|
|
214 |
with oracledb.connect(**self.connection_params) as conn:
|
215 |
cursor = conn.cursor()
|
216 |
|
|
|
217 |
placeholders = ','.join([':' + str(i + 1) for i in range(len(needed_keys))])
|
218 |
|
219 |
+
query = GET_TRAFFIC_CAMERA_LOCATIONS.format(placeholders=placeholders)
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
|
221 |
cursor.execute(query, tuple(needed_keys))
|
222 |
spots = cursor.fetchall()
|
223 |
|
224 |
new_spots = [
|
225 |
+
TDTrafficSpot(
|
226 |
spot[0],
|
227 |
spot[1],
|
228 |
spot[2],
|
229 |
+
location_data.get(spot[0], [])
|
230 |
)
|
231 |
for spot in spots
|
232 |
]
|
233 |
|
|
|
234 |
for spot in new_spots:
|
235 |
self.spot_dict[spot.key] = spot
|
236 |
self.traffic_spots.append(spot)
|
237 |
|
238 |
+
logging.info(f"Loaded {len(new_spots)} additional traffic spots with full historical data")
|
|
|
239 |
|
240 |
except Exception as e:
|
241 |
logging.error(f"Error loading specific traffic spots: {str(e)}")
|
242 |
|
243 |
def add_spots_to_map(self, folium_map, spot_keys=None):
|
|
|
244 |
if spot_keys is None:
|
|
|
245 |
for spot in self.traffic_spots:
|
246 |
spot.add_to_map(folium_map)
|
247 |
else:
|
|
|
248 |
for key in spot_keys:
|
249 |
if key in self.spot_dict:
|
250 |
self.spot_dict[key].add_to_map(folium_map)
|
251 |
|
252 |
def get_spot_by_key(self, key):
|
|
|
253 |
if key in self.spot_dict:
|
254 |
return self.spot_dict[key]
|
255 |
|
|
|
256 |
self.load_specific_traffic_spots([key])
|
|
|
|
|
257 |
return self.spot_dict.get(key)
|
app.py
CHANGED
@@ -4,8 +4,20 @@ import streamlit as st
|
|
4 |
from html import escape
|
5 |
from streamlit_folium import st_folium, folium_static
|
6 |
import math
|
7 |
-
from
|
8 |
from huggingface_hub import login
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
|
11 |
def load_css(css_file):
|
@@ -14,7 +26,6 @@ def load_css(css_file):
|
|
14 |
|
15 |
|
16 |
def highlight_search_terms(text, search_query):
|
17 |
-
"""Highlight search terms in text"""
|
18 |
if not search_query:
|
19 |
return text
|
20 |
|
@@ -23,7 +34,6 @@ def highlight_search_terms(text, search_query):
|
|
23 |
|
24 |
for term in search_terms:
|
25 |
if term.strip():
|
26 |
-
# Case-insensitive replacement with word boundaries
|
27 |
pattern = f'(?i)\\b{term}\\b'
|
28 |
replacement = f'<span class="highlight">{term}</span>'
|
29 |
highlighted_text = re.sub(pattern, replacement, highlighted_text)
|
@@ -44,28 +54,24 @@ def render_review_dialog():
|
|
44 |
try:
|
45 |
review_date, reviewer_name, comments = review
|
46 |
|
47 |
-
# Highlight search terms in comments if search query exists
|
48 |
highlighted_comments = highlight_search_terms(
|
49 |
str(comments),
|
50 |
st.session_state.search_query
|
51 |
)
|
52 |
|
53 |
-
st.markdown(
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
</div>
|
62 |
-
""", unsafe_allow_html=True)
|
63 |
except Exception as e:
|
64 |
st.error(f"Error displaying review: {str(e)}")
|
65 |
else:
|
66 |
st.info("No reviews available for this listing.")
|
67 |
|
68 |
-
|
69 |
def main():
|
70 |
st.set_page_config(
|
71 |
layout="wide",
|
@@ -73,8 +79,6 @@ def main():
|
|
73 |
initial_sidebar_state="expanded"
|
74 |
)
|
75 |
load_css('style.css')
|
76 |
-
|
77 |
-
# Initialize session state
|
78 |
if 'center_lat' not in st.session_state:
|
79 |
st.session_state.center_lat = None
|
80 |
if 'center_lng' not in st.session_state:
|
@@ -102,142 +106,77 @@ def main():
|
|
102 |
if 'show_search_explanation' not in st.session_state:
|
103 |
st.session_state.show_search_explanation = False
|
104 |
if 'listings_limit' not in st.session_state:
|
105 |
-
st.session_state.listings_limit = 10
|
106 |
-
|
107 |
-
# Initialize visualizer with loading message for tokenizer
|
108 |
if 'visualizer' not in st.session_state:
|
109 |
with st.spinner('Loading HKUST BNB+ ...'):
|
110 |
-
st.session_state.visualizer =
|
111 |
st.session_state.tokenizer_loaded = True
|
112 |
-
|
113 |
visualizer = st.session_state.visualizer
|
114 |
-
|
115 |
-
# Check if visualizer is properly initialized
|
116 |
if visualizer is None or not hasattr(visualizer, 'neighborhoods'):
|
117 |
st.error("Error initializing the application. Please refresh the page.")
|
118 |
return
|
119 |
-
|
120 |
-
# Show explanations if requested
|
121 |
if st.session_state.show_traffic_explanation:
|
122 |
with st.expander("📊 Traffic-Based Discount System", expanded=True):
|
123 |
-
st.markdown(
|
124 |
-
### How HKUST BNB+ Acheived (E)SG , use Traffic Spot from Department of Transport and do traffic analysis hence provided discount according
|
125 |
-
to the average traffic on the previous days.
|
126 |
-
|
127 |
-
We use real-time traffic data to offer you the best possible rates:
|
128 |
-
|
129 |
-
* **Blue Camera Icons**: Areas with very low traffic (less than 2 vehicles detected)
|
130 |
-
* Enjoy a peaceful stay with **20% DISCOUNT** on these properties!
|
131 |
-
|
132 |
-
* **Orange Camera Icons**: Areas with moderate traffic (2-5 vehicles detected)
|
133 |
-
* Get a **10% DISCOUNT** on these properties!
|
134 |
-
|
135 |
-
* **Purple Camera Icons**: Areas with heavier traffic (more than 5 vehicles)
|
136 |
-
* Standard rates apply for these properties
|
137 |
-
|
138 |
-
Look for the blue connecting lines on the map to see which traffic spot affects each property!
|
139 |
-
|
140 |
-
Remark : Currently only few traffic spot avaliable, in the future will provide more.
|
141 |
-
""")
|
142 |
if st.button("Close", key="close_traffic_btn"):
|
143 |
st.session_state.show_traffic_explanation = False
|
144 |
st.rerun()
|
145 |
-
|
146 |
if st.session_state.show_search_explanation:
|
147 |
with st.expander("🔍 Smart Search System", expanded=True):
|
148 |
-
st.markdown(
|
149 |
-
### How HKUST BNB+ Acheived E(S)G , use keyword to provided semantic relevance analysis to matches the require need from HKUST Student
|
150 |
-
|
151 |
-
Our advanced search technology goes beyond simple keyword matching to understand the meaning behind your search terms:
|
152 |
-
|
153 |
-
When you search for terms like "quiet," "convenient," or "spacious," our system:
|
154 |
-
1. Analyzes both listing titles and actual guest reviews
|
155 |
-
2. Understands the context and meaning (not just matching exact words)
|
156 |
-
3. Ranks listings based on overall relevance to your search
|
157 |
-
|
158 |
-
**Search Match Types:**
|
159 |
-
* **"Strong match in title and reviews"** - Perfect matches in both property description and guest experiences
|
160 |
-
* **"Strong match in listing title"** - Property description matches your needs very well
|
161 |
-
* **"Strong match in reviews"** - Guest experiences align perfectly with what you're looking for
|
162 |
-
* **"Better match in listing title/reviews"** - One source is more relevant than the other
|
163 |
-
* **"Moderate semantic match"** - Some relevance but not a perfect match
|
164 |
-
|
165 |
-
This helps you find properties that truly match what you're looking for, even if they don't use the exact words in your search!
|
166 |
-
""")
|
167 |
if st.button("Close", key="close_search_btn"):
|
168 |
st.session_state.show_search_explanation = False
|
169 |
st.rerun()
|
170 |
-
|
171 |
with st.sidebar:
|
172 |
-
st.markdown(
|
173 |
-
'<p class="sidebar-header">HKUST BNB+<BR/></p>',
|
174 |
-
unsafe_allow_html=True)
|
175 |
-
|
176 |
search_query = st.text_input(
|
177 |
"🔍 Search listings",
|
178 |
value=st.session_state.search_query,
|
179 |
placeholder="Try: 'cozy , quiet '"
|
180 |
)
|
181 |
-
|
182 |
if search_query != st.session_state.search_query:
|
183 |
st.session_state.search_query = search_query
|
184 |
st.session_state.current_page = 1
|
185 |
st.session_state.show_review_dialog = False
|
186 |
-
|
187 |
-
st.markdown('<hr style="margin: 20px 0; border: none; border-top: 1px solid #e0e0e0;">', unsafe_allow_html=True)
|
188 |
-
|
189 |
neighborhood = st.selectbox(
|
190 |
"Select Neighborhood",
|
191 |
options=visualizer.neighborhoods,
|
192 |
index=visualizer.neighborhoods.index("Kowloon City") if "Kowloon City" in visualizer.neighborhoods else 0
|
193 |
)
|
194 |
-
|
195 |
-
# Add dropdown for selecting number of listings to show
|
196 |
listings_limit = st.selectbox(
|
197 |
"Number of listings to show",
|
198 |
options=[10, 20, 30, 40, 50],
|
199 |
-
index=0,
|
200 |
help="Select how many listings to display for this neighborhood"
|
201 |
)
|
202 |
-
|
203 |
-
# Update session state if listings_limit has changed
|
204 |
if listings_limit != st.session_state.listings_limit:
|
205 |
st.session_state.listings_limit = listings_limit
|
206 |
-
# Reset to page 1 when changing the number of listings
|
207 |
st.session_state.current_page = 1
|
208 |
st.session_state.show_review_dialog = False
|
209 |
-
|
210 |
show_traffic = st.checkbox("Show Traffic Cameras", value=True)
|
211 |
-
|
212 |
-
st.markdown('<hr style="margin: 20px 0; border: none; border-top: 1px solid #e0e0e0;">', unsafe_allow_html=True)
|
213 |
-
|
214 |
-
# Help section in sidebar
|
215 |
st.markdown("### 💡 Help & Information")
|
216 |
-
|
217 |
col1, col2 = st.columns(2)
|
218 |
with col1:
|
219 |
if st.button("Green Discount", key="traffic_info_btn"):
|
220 |
st.session_state.show_traffic_explanation = True
|
221 |
st.rerun()
|
222 |
-
|
223 |
with col2:
|
224 |
if st.button("Semantic Search", key="search_info_btn"):
|
225 |
st.session_state.show_search_explanation = True
|
226 |
st.rerun()
|
227 |
-
|
228 |
if st.button("Reset All", key="reset_btn"):
|
229 |
st.session_state.center_lat = None
|
230 |
st.session_state.center_lng = None
|
231 |
st.session_state.selected_id = None
|
232 |
st.session_state.current_page = 1
|
233 |
st.session_state.search_query = ""
|
234 |
-
st.session_state.listings_limit = 10
|
235 |
st.session_state.show_review_dialog = False
|
236 |
st.session_state.show_traffic_explanation = False
|
237 |
st.session_state.show_search_explanation = False
|
238 |
st.rerun()
|
239 |
-
|
240 |
-
# Create map and get data - pass current page information and listings limit
|
241 |
m, df = visualizer.create_map_and_data(
|
242 |
neighborhood,
|
243 |
show_traffic,
|
@@ -249,8 +188,6 @@ def main():
|
|
249 |
st.session_state.items_per_page,
|
250 |
st.session_state.listings_limit
|
251 |
)
|
252 |
-
|
253 |
-
# Handle neighborhood change
|
254 |
if st.session_state.previous_neighborhood != neighborhood:
|
255 |
st.session_state.current_page = 1
|
256 |
if not df.empty:
|
@@ -260,87 +197,71 @@ def main():
|
|
260 |
st.session_state.previous_neighborhood = neighborhood
|
261 |
st.session_state.show_review_dialog = False
|
262 |
st.rerun()
|
263 |
-
|
264 |
if m is None:
|
265 |
st.error("No data available for the selected neighborhood")
|
266 |
return
|
267 |
-
|
268 |
col1, col2 = st.columns([7, 3])
|
269 |
-
|
270 |
with col1:
|
271 |
st.markdown('<div class="map-container">', unsafe_allow_html=True)
|
272 |
st_folium(m, width=None, height=700)
|
273 |
st.markdown('</div>', unsafe_allow_html=True)
|
274 |
-
|
275 |
with col2:
|
276 |
-
# Display info about how many listings are being shown
|
277 |
st.markdown(
|
278 |
-
|
279 |
-
|
280 |
-
|
|
|
|
|
|
|
281 |
total_items = len(df)
|
282 |
total_pages = math.ceil(total_items / st.session_state.items_per_page)
|
283 |
st.session_state.current_page = min(max(1, st.session_state.current_page), total_pages)
|
284 |
start_idx = (st.session_state.current_page - 1) * st.session_state.items_per_page
|
285 |
end_idx = min(start_idx + st.session_state.items_per_page, total_items)
|
286 |
-
|
287 |
st.markdown('<div class="scrollable-container">', unsafe_allow_html=True)
|
288 |
-
|
289 |
for idx in range(start_idx, end_idx):
|
290 |
row = df.iloc[idx]
|
291 |
background_color = "#E3F2FD" if st.session_state.selected_id == row['id'] else "white"
|
292 |
-
|
293 |
-
# Calculate discount based on nearest traffic spot
|
294 |
discounted_price = row['price']
|
295 |
discount_tag = ""
|
296 |
-
|
297 |
-
# Find nearest traffic spot for this listing
|
298 |
listing_lat = row['latitude']
|
299 |
listing_lng = row['longitude']
|
300 |
-
|
301 |
-
# Use the visualizer's method to find the nearest traffic spot
|
302 |
nearest_spot, distance = visualizer.find_nearest_traffic_spot(listing_lat, listing_lng)
|
303 |
-
|
304 |
-
# Apply discount if there's a nearest spot
|
305 |
if nearest_spot:
|
306 |
discount_rate = nearest_spot.get_discount_rate()
|
307 |
if discount_rate > 0:
|
308 |
discounted_price = row['price'] * (1 - discount_rate)
|
309 |
discount_percentage = int(discount_rate * 100)
|
310 |
discount_tag = f"""<span class="discount-tag">-{discount_percentage}%</span>"""
|
311 |
-
|
312 |
-
# Price display logic
|
313 |
if discount_tag:
|
314 |
-
price_display =
|
|
|
|
|
|
|
|
|
315 |
else:
|
316 |
-
price_display =
|
317 |
-
|
318 |
relevance_info = ""
|
319 |
if st.session_state.search_query and 'relevance_percentage' in row:
|
320 |
-
relevance_info =
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
{relevance_info}</div>
|
333 |
-
""", unsafe_allow_html=True)
|
334 |
-
|
335 |
col_details, col_reviews = st.columns(2)
|
336 |
-
|
337 |
with col_details:
|
338 |
if st.button("View Details", key=f"btn_{row['id']}"):
|
339 |
st.session_state.selected_id = row['id']
|
340 |
st.session_state.center_lat = row['latitude']
|
341 |
st.session_state.center_lng = row['longitude']
|
342 |
st.rerun()
|
343 |
-
|
344 |
with col_reviews:
|
345 |
if st.button("View Reviews", key=f"review_btn_{row['id']}"):
|
346 |
st.session_state.show_review_dialog = True
|
@@ -348,12 +269,8 @@ def main():
|
|
348 |
st.session_state.current_review_listing_name = row['name']
|
349 |
st.session_state.scroll_to_review = True
|
350 |
st.rerun()
|
351 |
-
|
352 |
st.markdown('</div>', unsafe_allow_html=True)
|
353 |
-
|
354 |
-
# Pagination controls
|
355 |
col_prev, col_select, col_next = st.columns([1, 1, 1])
|
356 |
-
|
357 |
with col_select:
|
358 |
page_options = list(range(1, total_pages + 1))
|
359 |
new_page = st.selectbox(
|
@@ -363,7 +280,6 @@ def main():
|
|
363 |
key="page_selector",
|
364 |
label_visibility="collapsed"
|
365 |
)
|
366 |
-
|
367 |
if new_page != st.session_state.current_page:
|
368 |
st.session_state.current_page = new_page
|
369 |
new_start_idx = (new_page - 1) * st.session_state.items_per_page
|
@@ -373,7 +289,6 @@ def main():
|
|
373 |
st.session_state.center_lng = df.iloc[new_start_idx]['longitude']
|
374 |
st.session_state.show_review_dialog = False
|
375 |
st.rerun()
|
376 |
-
|
377 |
with col_prev:
|
378 |
if st.button("← Previous", disabled=st.session_state.current_page <= 1):
|
379 |
st.session_state.current_page -= 1
|
@@ -384,7 +299,6 @@ def main():
|
|
384 |
st.session_state.center_lng = df.iloc[new_start_idx]['longitude']
|
385 |
st.session_state.show_review_dialog = False
|
386 |
st.rerun()
|
387 |
-
|
388 |
with col_next:
|
389 |
if st.button("Next →", disabled=st.session_state.current_page >= total_pages):
|
390 |
st.session_state.current_page += 1
|
@@ -395,8 +309,6 @@ def main():
|
|
395 |
st.session_state.center_lng = df.iloc[new_start_idx]['longitude']
|
396 |
st.session_state.show_review_dialog = False
|
397 |
st.rerun()
|
398 |
-
|
399 |
-
# Show review dialog if active
|
400 |
if st.session_state.show_review_dialog:
|
401 |
render_review_dialog()
|
402 |
|
@@ -405,6 +317,4 @@ if __name__ == "__main__":
|
|
405 |
token = os.environ.get("HF_TOKEN")
|
406 |
if token:
|
407 |
login(token=token)
|
408 |
-
|
409 |
-
else:
|
410 |
-
main()
|
|
|
4 |
from html import escape
|
5 |
from streamlit_folium import st_folium, folium_static
|
6 |
import math
|
7 |
+
from HKUSTBNBVisualiser import HKUSTBNBVisualiser
|
8 |
from huggingface_hub import login
|
9 |
+
from HKUSTBNBConstant import (
|
10 |
+
SIDEBAR_HEADER,
|
11 |
+
SIDEBAR_DIVIDER,
|
12 |
+
TRAFFIC_EXPLANATION,
|
13 |
+
SEARCH_EXPLANATION,
|
14 |
+
REVIEW_CARD_TEMPLATE,
|
15 |
+
LISTINGS_COUNT_INFO,
|
16 |
+
LISTING_CARD_TEMPLATE,
|
17 |
+
PRICE_DISPLAY_WITH_DISCOUNT,
|
18 |
+
PRICE_DISPLAY_NORMAL,
|
19 |
+
RELEVANCE_INFO_LISTING
|
20 |
+
)
|
21 |
|
22 |
|
23 |
def load_css(css_file):
|
|
|
26 |
|
27 |
|
28 |
def highlight_search_terms(text, search_query):
|
|
|
29 |
if not search_query:
|
30 |
return text
|
31 |
|
|
|
34 |
|
35 |
for term in search_terms:
|
36 |
if term.strip():
|
|
|
37 |
pattern = f'(?i)\\b{term}\\b'
|
38 |
replacement = f'<span class="highlight">{term}</span>'
|
39 |
highlighted_text = re.sub(pattern, replacement, highlighted_text)
|
|
|
54 |
try:
|
55 |
review_date, reviewer_name, comments = review
|
56 |
|
|
|
57 |
highlighted_comments = highlight_search_terms(
|
58 |
str(comments),
|
59 |
st.session_state.search_query
|
60 |
)
|
61 |
|
62 |
+
st.markdown(
|
63 |
+
REVIEW_CARD_TEMPLATE.format(
|
64 |
+
reviewer_name=escape(str(reviewer_name)),
|
65 |
+
review_date=escape(str(review_date)),
|
66 |
+
highlighted_comments=highlighted_comments
|
67 |
+
),
|
68 |
+
unsafe_allow_html=True
|
69 |
+
)
|
|
|
|
|
70 |
except Exception as e:
|
71 |
st.error(f"Error displaying review: {str(e)}")
|
72 |
else:
|
73 |
st.info("No reviews available for this listing.")
|
74 |
|
|
|
75 |
def main():
|
76 |
st.set_page_config(
|
77 |
layout="wide",
|
|
|
79 |
initial_sidebar_state="expanded"
|
80 |
)
|
81 |
load_css('style.css')
|
|
|
|
|
82 |
if 'center_lat' not in st.session_state:
|
83 |
st.session_state.center_lat = None
|
84 |
if 'center_lng' not in st.session_state:
|
|
|
106 |
if 'show_search_explanation' not in st.session_state:
|
107 |
st.session_state.show_search_explanation = False
|
108 |
if 'listings_limit' not in st.session_state:
|
109 |
+
st.session_state.listings_limit = 10
|
|
|
|
|
110 |
if 'visualizer' not in st.session_state:
|
111 |
with st.spinner('Loading HKUST BNB+ ...'):
|
112 |
+
st.session_state.visualizer = HKUSTBNBVisualiser()
|
113 |
st.session_state.tokenizer_loaded = True
|
|
|
114 |
visualizer = st.session_state.visualizer
|
|
|
|
|
115 |
if visualizer is None or not hasattr(visualizer, 'neighborhoods'):
|
116 |
st.error("Error initializing the application. Please refresh the page.")
|
117 |
return
|
|
|
|
|
118 |
if st.session_state.show_traffic_explanation:
|
119 |
with st.expander("📊 Traffic-Based Discount System", expanded=True):
|
120 |
+
st.markdown(TRAFFIC_EXPLANATION)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
if st.button("Close", key="close_traffic_btn"):
|
122 |
st.session_state.show_traffic_explanation = False
|
123 |
st.rerun()
|
|
|
124 |
if st.session_state.show_search_explanation:
|
125 |
with st.expander("🔍 Smart Search System", expanded=True):
|
126 |
+
st.markdown(SEARCH_EXPLANATION)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
if st.button("Close", key="close_search_btn"):
|
128 |
st.session_state.show_search_explanation = False
|
129 |
st.rerun()
|
|
|
130 |
with st.sidebar:
|
131 |
+
st.markdown(SIDEBAR_HEADER, unsafe_allow_html=True)
|
|
|
|
|
|
|
132 |
search_query = st.text_input(
|
133 |
"🔍 Search listings",
|
134 |
value=st.session_state.search_query,
|
135 |
placeholder="Try: 'cozy , quiet '"
|
136 |
)
|
|
|
137 |
if search_query != st.session_state.search_query:
|
138 |
st.session_state.search_query = search_query
|
139 |
st.session_state.current_page = 1
|
140 |
st.session_state.show_review_dialog = False
|
141 |
+
st.markdown(SIDEBAR_DIVIDER, unsafe_allow_html=True)
|
|
|
|
|
142 |
neighborhood = st.selectbox(
|
143 |
"Select Neighborhood",
|
144 |
options=visualizer.neighborhoods,
|
145 |
index=visualizer.neighborhoods.index("Kowloon City") if "Kowloon City" in visualizer.neighborhoods else 0
|
146 |
)
|
|
|
|
|
147 |
listings_limit = st.selectbox(
|
148 |
"Number of listings to show",
|
149 |
options=[10, 20, 30, 40, 50],
|
150 |
+
index=0,
|
151 |
help="Select how many listings to display for this neighborhood"
|
152 |
)
|
|
|
|
|
153 |
if listings_limit != st.session_state.listings_limit:
|
154 |
st.session_state.listings_limit = listings_limit
|
|
|
155 |
st.session_state.current_page = 1
|
156 |
st.session_state.show_review_dialog = False
|
|
|
157 |
show_traffic = st.checkbox("Show Traffic Cameras", value=True)
|
158 |
+
st.markdown(SIDEBAR_DIVIDER, unsafe_allow_html=True)
|
|
|
|
|
|
|
159 |
st.markdown("### 💡 Help & Information")
|
|
|
160 |
col1, col2 = st.columns(2)
|
161 |
with col1:
|
162 |
if st.button("Green Discount", key="traffic_info_btn"):
|
163 |
st.session_state.show_traffic_explanation = True
|
164 |
st.rerun()
|
|
|
165 |
with col2:
|
166 |
if st.button("Semantic Search", key="search_info_btn"):
|
167 |
st.session_state.show_search_explanation = True
|
168 |
st.rerun()
|
|
|
169 |
if st.button("Reset All", key="reset_btn"):
|
170 |
st.session_state.center_lat = None
|
171 |
st.session_state.center_lng = None
|
172 |
st.session_state.selected_id = None
|
173 |
st.session_state.current_page = 1
|
174 |
st.session_state.search_query = ""
|
175 |
+
st.session_state.listings_limit = 10
|
176 |
st.session_state.show_review_dialog = False
|
177 |
st.session_state.show_traffic_explanation = False
|
178 |
st.session_state.show_search_explanation = False
|
179 |
st.rerun()
|
|
|
|
|
180 |
m, df = visualizer.create_map_and_data(
|
181 |
neighborhood,
|
182 |
show_traffic,
|
|
|
188 |
st.session_state.items_per_page,
|
189 |
st.session_state.listings_limit
|
190 |
)
|
|
|
|
|
191 |
if st.session_state.previous_neighborhood != neighborhood:
|
192 |
st.session_state.current_page = 1
|
193 |
if not df.empty:
|
|
|
197 |
st.session_state.previous_neighborhood = neighborhood
|
198 |
st.session_state.show_review_dialog = False
|
199 |
st.rerun()
|
|
|
200 |
if m is None:
|
201 |
st.error("No data available for the selected neighborhood")
|
202 |
return
|
|
|
203 |
col1, col2 = st.columns([7, 3])
|
|
|
204 |
with col1:
|
205 |
st.markdown('<div class="map-container">', unsafe_allow_html=True)
|
206 |
st_folium(m, width=None, height=700)
|
207 |
st.markdown('</div>', unsafe_allow_html=True)
|
|
|
208 |
with col2:
|
|
|
209 |
st.markdown(
|
210 |
+
LISTINGS_COUNT_INFO.format(
|
211 |
+
listings_limit=st.session_state.listings_limit,
|
212 |
+
neighborhood=neighborhood
|
213 |
+
),
|
214 |
+
unsafe_allow_html=True
|
215 |
+
)
|
216 |
total_items = len(df)
|
217 |
total_pages = math.ceil(total_items / st.session_state.items_per_page)
|
218 |
st.session_state.current_page = min(max(1, st.session_state.current_page), total_pages)
|
219 |
start_idx = (st.session_state.current_page - 1) * st.session_state.items_per_page
|
220 |
end_idx = min(start_idx + st.session_state.items_per_page, total_items)
|
|
|
221 |
st.markdown('<div class="scrollable-container">', unsafe_allow_html=True)
|
|
|
222 |
for idx in range(start_idx, end_idx):
|
223 |
row = df.iloc[idx]
|
224 |
background_color = "#E3F2FD" if st.session_state.selected_id == row['id'] else "white"
|
|
|
|
|
225 |
discounted_price = row['price']
|
226 |
discount_tag = ""
|
|
|
|
|
227 |
listing_lat = row['latitude']
|
228 |
listing_lng = row['longitude']
|
|
|
|
|
229 |
nearest_spot, distance = visualizer.find_nearest_traffic_spot(listing_lat, listing_lng)
|
|
|
|
|
230 |
if nearest_spot:
|
231 |
discount_rate = nearest_spot.get_discount_rate()
|
232 |
if discount_rate > 0:
|
233 |
discounted_price = row['price'] * (1 - discount_rate)
|
234 |
discount_percentage = int(discount_rate * 100)
|
235 |
discount_tag = f"""<span class="discount-tag">-{discount_percentage}%</span>"""
|
|
|
|
|
236 |
if discount_tag:
|
237 |
+
price_display = PRICE_DISPLAY_WITH_DISCOUNT.format(
|
238 |
+
original_price=row['price'],
|
239 |
+
discounted_price=discounted_price,
|
240 |
+
discount_tag=discount_tag
|
241 |
+
)
|
242 |
else:
|
243 |
+
price_display = PRICE_DISPLAY_NORMAL.format(price=row['price'])
|
|
|
244 |
relevance_info = ""
|
245 |
if st.session_state.search_query and 'relevance_percentage' in row:
|
246 |
+
relevance_info = RELEVANCE_INFO_LISTING.format(relevance_percentage=row['relevance_percentage'])
|
247 |
+
st.markdown(
|
248 |
+
LISTING_CARD_TEMPLATE.format(
|
249 |
+
background_color=background_color,
|
250 |
+
listing_name=escape(str(row['name'])),
|
251 |
+
price_display=price_display,
|
252 |
+
room_type=escape(str(row['room_type'])),
|
253 |
+
review_count=row['number_of_reviews'],
|
254 |
+
relevance_info=relevance_info
|
255 |
+
),
|
256 |
+
unsafe_allow_html=True
|
257 |
+
)
|
|
|
|
|
|
|
258 |
col_details, col_reviews = st.columns(2)
|
|
|
259 |
with col_details:
|
260 |
if st.button("View Details", key=f"btn_{row['id']}"):
|
261 |
st.session_state.selected_id = row['id']
|
262 |
st.session_state.center_lat = row['latitude']
|
263 |
st.session_state.center_lng = row['longitude']
|
264 |
st.rerun()
|
|
|
265 |
with col_reviews:
|
266 |
if st.button("View Reviews", key=f"review_btn_{row['id']}"):
|
267 |
st.session_state.show_review_dialog = True
|
|
|
269 |
st.session_state.current_review_listing_name = row['name']
|
270 |
st.session_state.scroll_to_review = True
|
271 |
st.rerun()
|
|
|
272 |
st.markdown('</div>', unsafe_allow_html=True)
|
|
|
|
|
273 |
col_prev, col_select, col_next = st.columns([1, 1, 1])
|
|
|
274 |
with col_select:
|
275 |
page_options = list(range(1, total_pages + 1))
|
276 |
new_page = st.selectbox(
|
|
|
280 |
key="page_selector",
|
281 |
label_visibility="collapsed"
|
282 |
)
|
|
|
283 |
if new_page != st.session_state.current_page:
|
284 |
st.session_state.current_page = new_page
|
285 |
new_start_idx = (new_page - 1) * st.session_state.items_per_page
|
|
|
289 |
st.session_state.center_lng = df.iloc[new_start_idx]['longitude']
|
290 |
st.session_state.show_review_dialog = False
|
291 |
st.rerun()
|
|
|
292 |
with col_prev:
|
293 |
if st.button("← Previous", disabled=st.session_state.current_page <= 1):
|
294 |
st.session_state.current_page -= 1
|
|
|
299 |
st.session_state.center_lng = df.iloc[new_start_idx]['longitude']
|
300 |
st.session_state.show_review_dialog = False
|
301 |
st.rerun()
|
|
|
302 |
with col_next:
|
303 |
if st.button("Next →", disabled=st.session_state.current_page >= total_pages):
|
304 |
st.session_state.current_page += 1
|
|
|
309 |
st.session_state.center_lng = df.iloc[new_start_idx]['longitude']
|
310 |
st.session_state.show_review_dialog = False
|
311 |
st.rerun()
|
|
|
|
|
312 |
if st.session_state.show_review_dialog:
|
313 |
render_review_dialog()
|
314 |
|
|
|
317 |
token = os.environ.get("HF_TOKEN")
|
318 |
if token:
|
319 |
login(token=token)
|
320 |
+
main()
|
|
|
|