Gordon Li commited on
Commit
abcd2bb
·
1 Parent(s): 53d8ba3

Code refactoring

Browse files
HKUSTBNBConstant.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # HKUSTBNBConstant.py
2
+
3
+ GET_ALL_NEIGHBORHOODS = """
4
+ SELECT DISTINCT NEIGHBOURHOOD
5
+ FROM airbnb_master_data
6
+ WHERE NEIGHBOURHOOD IS NOT NULL
7
+ ORDER BY NEIGHBOURHOOD
8
+ """
9
+
10
+ GET_NEIGHBORHOOD_LISTINGS = """
11
+ SELECT m.ID, m.NAME, m.HOST_NAME, m.NEIGHBOURHOOD,
12
+ m.LATITUDE, m.LONGITUDE, m.ROOM_TYPE, m.PRICE,
13
+ COUNT(r.LISTING_ID) as NUMBER_OF_REVIEWS, m.REVIEWS_PER_MONTH,
14
+ m.MINIMUM_NIGHTS, m.AVAILABILITY_365
15
+ FROM airbnb_master_data m
16
+ LEFT JOIN airbnb_reviews_data r ON m.ID = r.LISTING_ID
17
+ WHERE m.LATITUDE IS NOT NULL
18
+ AND m.LONGITUDE IS NOT NULL
19
+ AND m.NEIGHBOURHOOD = :neighborhood
20
+ GROUP BY m.ID, m.NAME, m.HOST_NAME, m.NEIGHBOURHOOD,
21
+ m.LATITUDE, m.LONGITUDE, m.ROOM_TYPE, m.PRICE,
22
+ m.REVIEWS_PER_MONTH, m.MINIMUM_NIGHTS, m.AVAILABILITY_365
23
+ ORDER BY COUNT(r.LISTING_ID) DESC, m.PRICE ASC
24
+ FETCH FIRST :limit ROWS ONLY
25
+ """
26
+
27
+ GET_LISTING_REVIEWS = """
28
+ SELECT REVIEW_DATE, REVIEWER_NAME,
29
+ CASE
30
+ WHEN LENGTH(COMMENTS) > 200
31
+ THEN SUBSTR(COMMENTS, 1, 200) || '...'
32
+ ELSE COMMENTS
33
+ END as COMMENTS
34
+ FROM AIRBNB_REVIEWS_DATA
35
+ WHERE LISTING_ID = :listing_id
36
+ AND ROWNUM <= 10
37
+ ORDER BY REVIEW_DATE DESC
38
+ """
39
+
40
+ GET_LISTING_REVIEWS_FOR_SEARCH = """
41
+ SELECT COMMENTS
42
+ FROM AIRBNB_REVIEWS_DATA
43
+ WHERE LISTING_ID = :listing_id
44
+ AND COMMENTS IS NOT NULL
45
+ AND ROWNUM <= 10
46
+ ORDER BY REVIEW_DATE DESC
47
+ """
48
+
49
+ GET_TRAFFIC_CAMERA_LOCATIONS = """
50
+ SELECT KEY, LATITUDE, LONGITUDE
51
+ FROM TD_TRAFFIC_CAMERA_LOCATION
52
+ WHERE KEY IN ({placeholders})
53
+ AND LATITUDE IS NOT NULL
54
+ AND LONGITUDE IS NOT NULL
55
+ """
56
+
57
+ DISCOUNT_INFO_TEMPLATE = """
58
+ <div style='background-color: #e8f5e9; padding: 8px; margin: 10px 0; border-radius: 4px; border-left: 4px solid #4caf50;'>
59
+ <p style='margin: 2px 0; font-weight: bold; color: #2e7d32;'>{discount_percentage}% ENV PROTECTION DISCOUNT!</p>
60
+ <p style='margin: 2px 0; font-size: 0.85em;'>Avg. {avg_vehicle_count:.1f} vehicles across {observation_count} observations</p>
61
+ </div>
62
+ """
63
+
64
+ TRAFFIC_SPOT_INFO_TEMPLATE = """
65
+ <div class='traffic-spot-info' style='margin: 10px 0; padding: 8px; background-color: #f0f8ff; border-radius: 4px; border-left: 4px solid #4285f4;'>
66
+ <p style='margin: 5px 0;'>
67
+ <strong>Nearest Traffic Spot:</strong> {spot_key}
68
+ <br/>
69
+ <strong>Distance:</strong> {distance_str}
70
+ </p>
71
+ </div>
72
+ """
73
+
74
+ RELEVANCE_INFO_TEMPLATE = """
75
+ <div class='relevance-info' style='margin: 10px 0; padding: 8px; background-color: #f8f9fa; border-radius: 4px;'>
76
+ <p style='margin: 5px 0;'>
77
+ <strong>Match Score:</strong> {relevance_percentage:.0f}%
78
+ <br/>
79
+ <strong>Relevance:</strong> {relevance_features}
80
+ <br/>
81
+ <strong>Match Type:</strong> {matching_features}
82
+ </p>
83
+ </div>
84
+ """
85
+
86
+ POPUP_CONTENT_TEMPLATE = """
87
+ <div style='min-width: 280px; max-width: 320px; padding: 15px;'>
88
+ <h4 style='margin: 0 0 10px 0; color: #2c3e50;'>{listing_name}</h4>
89
+ <p style='margin: 5px 0;'><strong>Host:</strong> {host_name}</p>
90
+ <p style='margin: 5px 0;'><strong>Room Type:</strong> {room_type}</p>
91
+ <p style='margin: 5px 0;'>{price_display}</p>
92
+ <p style='margin: 5px 0;'><strong>Reviews:</strong> {review_count:.0f}</p>
93
+ {discount_info}
94
+ {traffic_spot_info}
95
+ {relevance_info}
96
+ </div>
97
+ """
98
+
99
+ MAP_SCRIPT = """
100
+ <script>
101
+ function showTrafficSpot(lat, lng) {
102
+ var map = document.querySelector('.folium-map')._leaflet_map;
103
+ map.setView([lat, lng], 18);
104
+ map.eachLayer(function(layer) {
105
+ if (layer instanceof L.Marker) {
106
+ var latLng = layer.getLatLng();
107
+ if (Math.abs(latLng.lat - lat) < 0.0001 && Math.abs(latLng.lng - lng) < 0.0001) {
108
+ layer.openPopup();
109
+ }
110
+ }
111
+ });
112
+ }
113
+ </script>
114
+ """
115
+
116
+ # HTML Templates for Streamlit UI
117
+ SIDEBAR_HEADER = '<p class="sidebar-header">HKUST BNB+<BR/></p>'
118
+
119
+ SIDEBAR_DIVIDER = '<hr style="margin: 20px 0; border: none; border-top: 1px solid #e0e0e0;">'
120
+
121
+ TRAFFIC_EXPLANATION = """
122
+ ### How HKUST BNB+ Acheived (E)SG , use Traffic Spot from Department of Transport and do traffic analysis hence provided discount according
123
+ to the average traffic on the previous days.
124
+
125
+ We use real-time traffic data to offer you the best possible rates:
126
+
127
+ * **Blue Camera Icons**: Areas with very low traffic (less than 2 vehicles detected)
128
+ * Enjoy a peaceful stay with **20% DISCOUNT** on these properties!
129
+
130
+ * **Orange Camera Icons**: Areas with moderate traffic (2-5 vehicles detected)
131
+ * Get a **10% DISCOUNT** on these properties!
132
+
133
+ * **Purple Camera Icons**: Areas with heavier traffic (more than 5 vehicles)
134
+ * Standard rates apply for these properties
135
+
136
+ Look for the blue connecting lines on the map to see which traffic spot affects each property!
137
+
138
+ Remark : Currently only few traffic spot avaliable, in the future will provide more.
139
+ """
140
+
141
+ SEARCH_EXPLANATION = """
142
+ ### How HKUST BNB+ Acheived E(S)G , use keyword to provided semantic relevance analysis to matches the require need from HKUST Student
143
+
144
+ Our advanced search technology goes beyond simple keyword matching to understand the meaning behind your search terms:
145
+
146
+ When you search for terms like "quiet," "convenient," or "spacious," our system:
147
+ 1. Analyzes both listing titles and actual guest reviews
148
+ 2. Understands the context and meaning (not just matching exact words)
149
+ 3. Ranks listings based on overall relevance to your search
150
+
151
+ **Search Match Types:**
152
+ * **"Strong match in title and reviews"** - Perfect matches in both property description and guest experiences
153
+ * **"Strong match in listing title"** - Property description matches your needs very well
154
+ * **"Strong match in reviews"** - Guest experiences align perfectly with what you're looking for
155
+ * **"Better match in listing title/reviews"** - One source is more relevant than the other
156
+ * **"Moderate semantic match"** - Some relevance but not a perfect match
157
+
158
+ This helps you find properties that truly match what you're looking for, even if they don't use the exact words in your search!
159
+ """
160
+
161
+ REVIEW_CARD_TEMPLATE = """
162
+ <div class="review-card">
163
+ <div class="review-header">
164
+ {reviewer_name} - {review_date}
165
+ </div>
166
+ <div class="review-content">
167
+ {highlighted_comments}
168
+ </div>
169
+ </div>
170
+ """
171
+
172
+ LISTINGS_COUNT_INFO = "<p style='text-align:center; color:#4285f4;'>Showing {listings_limit} listings in {neighborhood}</p>"
173
+
174
+ LISTING_CARD_TEMPLATE = """
175
+ <div class="listing-card" style="background-color: {background_color}">
176
+ <h4 class="listing-title">{listing_name}</h4>
177
+ {price_display}
178
+ <p class="listing-info"> Room Type: {room_type}</p>
179
+ <p class="listing-info"> Reviews: {review_count:.0f}</p>
180
+ {relevance_info}
181
+ </div>
182
+ """
183
+
184
+ PRICE_DISPLAY_WITH_DISCOUNT = """<p class="listing-info"> Price : <span class="original-price">${original_price:.0f}</span> <span class="discounted-price">${discounted_price:.0f}</span> {discount_tag}</p>"""
185
+
186
+ PRICE_DISPLAY_NORMAL = """<p class="listing-info"> Price : ${price:.0f}</p>"""
187
+
188
+ RELEVANCE_INFO_LISTING = """<p class="listing-info"> Relevance: {relevance_percentage:.0f}% </p>"""
189
+
190
+ TRAFFIC_DISCOUNT_DISPLAY = """
191
+ <div style='background-color: #e8f5e9; padding: 5px; margin: 5px 0; border-radius: 4px; border-left: 3px solid #4caf50;'>
192
+ <p style='margin: 2px 0; color: #2e7d32;'><strong>{discount_info}</strong></p>
193
+ <p style='margin: 2px 0; font-size: 0.9em;'>Avg. {avg_vehicle_count:.1f} vehicles across {observation_count} observations</p>
194
+ </div>
195
+ """
196
+
197
+ TRAFFIC_POPUP_BASE = """
198
+ <div style='min-width: 150px; padding: 10px;'>
199
+ <p style='margin: 5px 0;'><strong>Location ID:</strong> {location_id}</p>
200
+ {discount_display}
201
+ """
202
+
203
+ TRAFFIC_RECORDS_HEADER = "<h4>Recent Records (showing {recent_count} of {total_count} total):</h4>"
204
+
205
+ TRAFFIC_RECORD_ENTRY = """
206
+ <div style='border-top: 1px solid #ccc; padding: 5px 0;'>
207
+ <p style='margin: 2px 0;'><strong>Time:</strong> {capture_time}</p>
208
+ <p style='margin: 2px 0;'><strong>Vehicles:</strong> {vehicle_count}</p>
209
+ {image_html}
210
+ </div>
211
+ """
212
+
213
+ TRAFFIC_IMAGE_HTML = """
214
+ <img src='data:image/jpeg;base64,{base64_encoded}'
215
+ style='max-width: 100px; max-height: 100px; margin: 5px 0;'
216
+ alt='Processed Image'>
217
+ """
218
+
219
+ TRAFFIC_NO_RECORDS = "<p>No records available</p>"
AirbnbMapVisualiser.py → HKUSTBNBVisualiser.py RENAMED
@@ -6,11 +6,21 @@ from sentence_transformers import SentenceTransformer, util
6
  from geopy.distance import geodesic
7
  import logging
8
 
9
- # Import the TrafficSpotManager from TrafficSpot module
10
- from TrafficSpot import TrafficSpotManager
11
-
12
-
13
- class AirbnbMapVisualiser:
 
 
 
 
 
 
 
 
 
 
14
  def __init__(self):
15
  self.connection_params = {
16
  'user': 'slliac',
@@ -26,30 +36,19 @@ class AirbnbMapVisualiser:
26
  increment=1,
27
  getmode=oracledb.SPOOL_ATTRVAL_WAIT
28
  )
29
-
30
- # Initialize TrafficSpotManager with minimal data
31
  self.traffic_manager = TrafficSpotManager(self.connection_params)
32
  logging.info(f"Traffic spots initialized, {len(self.traffic_manager.traffic_spots)} spots loaded")
33
-
34
- # Initialize sentence transformer model
35
  try:
36
- # Using a sentence transformer model specifically optimized for semantic search
37
  model_name = "sentence-transformers/all-MiniLM-L6-v2"
38
  self.model = SentenceTransformer(model_name)
39
  print(f"Loaded Sentence Transformer model: {model_name}")
40
  except Exception as e:
41
  print(f"Error loading model: {str(e)}")
42
  self.model = None
43
-
44
  try:
45
  self.neighborhoods = self.get_all_neighborhoods()
46
  self.cached_listings = {}
47
- # Initialize a nested dictionary to store listings by neighborhood and limit
48
- self.cached_listings = {}
49
- # Pre-cache Southern neighborhood with default limit of 10
50
- self.cached_listings["Southern"] = {}
51
- self.cached_listings["Southern"][10] = self.get_neighborhood_listings("Southern", 10)
52
- self.cached_embeddings = {} # Cache for listing embeddings
53
  except Exception as e:
54
  print(f"Initialization error: {str(e)}")
55
  self.neighborhoods = []
@@ -57,26 +56,18 @@ class AirbnbMapVisualiser:
57
  self.cached_embeddings = {}
58
 
59
  def find_nearest_traffic_spot(self, airbnb_lat, airbnb_lng, max_distance_km=0.7):
60
- """Find the nearest traffic spot within max_distance_km kilometers of an Airbnb listing"""
61
  nearest_spot = None
62
  min_distance = float('inf')
63
-
64
- # Check each traffic spot
65
  for spot in self.traffic_manager.traffic_spots:
66
  if not spot.is_valid():
67
  continue
68
-
69
- # Calculate distance in kilometers
70
  distance = geodesic(
71
  (airbnb_lat, airbnb_lng),
72
  (spot.latitude, spot.longitude)
73
  ).kilometers
74
-
75
- # Update nearest if this spot is closer and within max distance
76
  if distance < min_distance and distance <= max_distance_km:
77
  min_distance = distance
78
  nearest_spot = spot
79
-
80
  if nearest_spot:
81
  return nearest_spot, min_distance
82
  else:
@@ -88,12 +79,7 @@ class AirbnbMapVisualiser:
88
  cursor = connection.cursor()
89
  cursor.prefetchrows = 50
90
  cursor.arraysize = 50
91
- cursor.execute("""
92
- SELECT DISTINCT NEIGHBOURHOOD
93
- FROM airbnb_master_data
94
- WHERE NEIGHBOURHOOD IS NOT NULL
95
- ORDER BY NEIGHBOURHOOD
96
- """)
97
  neighborhoods = [row[0] for row in cursor.fetchall()]
98
  return neighborhoods
99
  except Exception as e:
@@ -103,25 +89,12 @@ class AirbnbMapVisualiser:
103
  self.pool.release(connection)
104
 
105
  def get_neighborhood_listings(self, neighborhood, limit=10):
106
- """
107
- Get listings for a neighborhood with a specified limit.
108
-
109
- Args:
110
- neighborhood: The neighborhood to get listings for
111
- limit: Maximum number of listings to return (10, 20, 30, 40, or 50)
112
-
113
- Returns:
114
- List of listings data
115
- """
116
- # Ensure limit is one of the allowed values
117
  if limit not in [10, 20, 30, 40, 50]:
118
- limit = 10 # Default to 10 if invalid limit provided
119
 
120
- # Check if we already have this neighborhood and limit cached
121
  if neighborhood in self.cached_listings and limit in self.cached_listings[neighborhood]:
122
  return self.cached_listings[neighborhood][limit]
123
 
124
- # Initialize neighborhood in cache if needed
125
  if neighborhood not in self.cached_listings:
126
  self.cached_listings[neighborhood] = {}
127
 
@@ -130,22 +103,11 @@ class AirbnbMapVisualiser:
130
  cursor = connection.cursor()
131
  cursor.prefetchrows = 50
132
  cursor.arraysize = 50
133
- cursor.execute("""
134
- SELECT m.ID, m.NAME, m.HOST_NAME, m.NEIGHBOURHOOD,
135
- m.LATITUDE, m.LONGITUDE, m.ROOM_TYPE, m.PRICE,
136
- COUNT(r.LISTING_ID) as NUMBER_OF_REVIEWS, m.REVIEWS_PER_MONTH,
137
- m.MINIMUM_NIGHTS, m.AVAILABILITY_365
138
- FROM airbnb_master_data m
139
- LEFT JOIN airbnb_reviews_data r ON m.ID = r.LISTING_ID
140
- WHERE m.LATITUDE IS NOT NULL
141
- AND m.LONGITUDE IS NOT NULL
142
- AND m.NEIGHBOURHOOD = :neighborhood
143
- GROUP BY m.ID, m.NAME, m.HOST_NAME, m.NEIGHBOURHOOD,
144
- m.LATITUDE, m.LONGITUDE, m.ROOM_TYPE, m.PRICE,
145
- m.REVIEWS_PER_MONTH, m.MINIMUM_NIGHTS, m.AVAILABILITY_365
146
- ORDER BY COUNT(r.LISTING_ID) DESC, m.PRICE ASC
147
- FETCH FIRST :limit ROWS ONLY
148
- """, neighborhood=neighborhood, limit=limit)
149
 
150
  listings = cursor.fetchall()
151
  self.cached_listings[neighborhood][limit] = listings
@@ -160,18 +122,10 @@ class AirbnbMapVisualiser:
160
  connection = self.pool.acquire()
161
  try:
162
  cursor = connection.cursor()
163
- cursor.execute("""
164
- SELECT REVIEW_DATE, REVIEWER_NAME,
165
- CASE
166
- WHEN LENGTH(COMMENTS) > 200
167
- THEN SUBSTR(COMMENTS, 1, 200) || '...'
168
- ELSE COMMENTS
169
- END as COMMENTS
170
- FROM AIRBNB_REVIEWS_DATA
171
- WHERE LISTING_ID = :listing_id
172
- AND ROWNUM <= 10
173
- ORDER BY REVIEW_DATE DESC
174
- """, listing_id=int(listing_id))
175
 
176
  reviews = cursor.fetchall()
177
  formatted_reviews = []
@@ -192,26 +146,17 @@ class AirbnbMapVisualiser:
192
  self.pool.release(connection)
193
 
194
  def get_listing_reviews_for_search(self, listing_id):
195
- """Get reviews for search analysis and handle LOB objects correctly"""
196
  connection = self.pool.acquire()
197
  try:
198
  cursor = connection.cursor()
199
- cursor.execute("""
200
- SELECT COMMENTS
201
- FROM AIRBNB_REVIEWS_DATA
202
- WHERE LISTING_ID = :listing_id
203
- AND COMMENTS IS NOT NULL
204
- AND ROWNUM <= 10
205
- ORDER BY REVIEW_DATE DESC
206
- """, listing_id=int(listing_id))
207
-
208
  reviews = cursor.fetchall()
209
-
210
- # Properly convert LOB objects to strings
211
  formatted_reviews = []
212
  for review in reviews:
213
  if review[0] is not None:
214
- # Check if it's a LOB object and read it
215
  if hasattr(review[0], 'read'):
216
  formatted_reviews.append(review[0].read())
217
  else:
@@ -225,35 +170,10 @@ class AirbnbMapVisualiser:
225
  finally:
226
  self.pool.release(connection)
227
 
228
- def get_title_review_embeddings(self, title, reviews):
229
- """Get separate embeddings for title and reviews using Sentence Transformer"""
230
- if self.model is None:
231
- return None, None
232
-
233
- try:
234
- # Encode the title
235
- title_embedding = self.model.encode(title, convert_to_tensor=True)
236
-
237
- # Encode reviews if available, otherwise return None
238
- review_embedding = None
239
- if reviews and len(reviews) > 0:
240
- # Concatenate reviews into a single text to get embedding
241
- review_text = " ".join(reviews[:5]) # Limit to first 5 reviews
242
- review_embedding = self.model.encode(review_text, convert_to_tensor=True)
243
-
244
- return title_embedding, review_embedding
245
-
246
- except Exception as e:
247
- print(f"Error getting embeddings: {str(e)}")
248
- return None, None
249
-
250
  def compute_similarity(self, query_embedding, target_embedding):
251
- """Compute cosine similarity between two embeddings"""
252
  if query_embedding is None or target_embedding is None:
253
  return 0.0
254
-
255
  try:
256
- # Use the util function from sentence_transformers for cosine similarity
257
  similarity = util.pytorch_cos_sim(query_embedding, target_embedding).item()
258
  return similarity
259
  except Exception as e:
@@ -261,36 +181,24 @@ class AirbnbMapVisualiser:
261
  return 0.0
262
 
263
  def compute_search_scores(self, df, search_query):
264
- """Compute search scores comparing query with title and reviews separately"""
265
  if not search_query or self.model is None:
266
  return [0.0] * len(df)
267
-
268
  try:
269
- # Encode the search query
270
  query_key = f"query_{search_query}"
271
  if query_key not in self.cached_embeddings:
272
  self.cached_embeddings[query_key] = self.model.encode(search_query, convert_to_tensor=True)
273
  query_embedding = self.cached_embeddings[query_key]
274
-
275
- # Calculate similarity for each listing
276
  scores = []
277
-
278
  for idx, row in df.iterrows():
279
- # Get title and reviews
280
  title = str(row['name'])
281
  reviews = self.get_listing_reviews_for_search(row['id'])
282
-
283
- # Get or compute embeddings
284
  title_key = f"title_{row['id']}"
285
  review_key = f"review_{row['id']}"
286
-
287
  if title_key not in self.cached_embeddings:
288
  title_embedding = self.model.encode(title, convert_to_tensor=True)
289
  self.cached_embeddings[title_key] = title_embedding
290
  else:
291
  title_embedding = self.cached_embeddings[title_key]
292
-
293
- # Only compute review embedding if we have reviews
294
  review_embedding = None
295
  if reviews and len(reviews) > 0:
296
  if review_key not in self.cached_embeddings:
@@ -299,23 +207,12 @@ class AirbnbMapVisualiser:
299
  self.cached_embeddings[review_key] = review_embedding
300
  else:
301
  review_embedding = self.cached_embeddings[review_key]
302
-
303
- # Compute similarities
304
  title_similarity = self.compute_similarity(query_embedding, title_embedding)
305
  review_similarity = 0.0
306
  if review_embedding is not None:
307
  review_similarity = self.compute_similarity(query_embedding, review_embedding)
308
-
309
- # Calculate final score - emphasis on reviews if available
310
- if review_embedding is not None:
311
- # Weight reviews more heavily if there are reviews
312
- final_score = title_similarity * 0.4 + review_similarity * 0.6
313
- else:
314
- # Use only title similarity if no reviews
315
- final_score = title_similarity
316
-
317
  scores.append(final_score)
318
-
319
  return scores
320
 
321
  except Exception as e:
@@ -323,93 +220,18 @@ class AirbnbMapVisualiser:
323
  return [0.0] * len(df)
324
 
325
  def sort_by_relevance(self, df, search_query):
326
- """Sort listings by relevance using sentence transformer comparison"""
327
  if not search_query:
328
  return df
329
-
330
- # Compute semantic similarity scores
331
  scores = self.compute_search_scores(df, search_query)
332
  df['relevance_score'] = scores
333
  df['relevance_percentage'] = df['relevance_score'] * 100
334
-
335
- # Add relevance description
336
- def get_relevance_description(score):
337
- if score >= 80:
338
- return "Perfect match"
339
- elif score >= 60:
340
- return "Excellent match"
341
- elif score >= 40:
342
- return "Good match"
343
- elif score >= 20:
344
- return "Partial match"
345
- else:
346
- return "Low relevance"
347
-
348
- df['relevance_features'] = df['relevance_percentage'].apply(get_relevance_description)
349
-
350
- # Add match information about which part matched better
351
- def get_match_source(row):
352
- # Get title and reviews
353
- title = str(row['name'])
354
- reviews = self.get_listing_reviews_for_search(row['id'])
355
-
356
- # Recompute individual similarities to determine match source
357
- title_similarity = 0.0
358
- review_similarity = 0.0
359
-
360
- if self.model is not None:
361
- query_embedding = self.model.encode(search_query, convert_to_tensor=True)
362
- title_embedding = self.model.encode(title, convert_to_tensor=True)
363
- title_similarity = self.compute_similarity(query_embedding, title_embedding)
364
-
365
- if reviews and len(reviews) > 0:
366
- review_text = " ".join(reviews[:5])
367
- review_embedding = self.model.encode(review_text, convert_to_tensor=True)
368
- review_similarity = self.compute_similarity(query_embedding, review_embedding)
369
-
370
- # Determine which source matched better
371
- if title_similarity > 0.2 and review_similarity > 0:
372
- return "Strong match in title and reviews"
373
- elif title_similarity > 0.2 and review_similarity > 0.2:
374
- return "Strong match in title and strong match in reviews"
375
- elif title_similarity > 0.2:
376
- return "Strong match in listing title"
377
- elif review_similarity > 0.2:
378
- return "Strong match in reviews"
379
-
380
- # Only calculate match source if score is above threshold
381
- df['matching_features'] = df.apply(
382
- lambda row: get_match_source(row) if row['relevance_score'] > 0.2 else "Low semantic match",
383
- axis=1
384
- )
385
-
386
- # Sort by relevance score
387
  return df.sort_values('relevance_score', ascending=False)
388
 
389
  def create_map_and_data(self, neighborhood="Sha Tin", show_traffic=True, center_lat=None, center_lng=None,
390
  selected_id=None, search_query=None, current_page=1, items_per_page=3, listings_limit=10):
391
- """
392
- Create a map and dataframe of listings for a neighborhood
393
-
394
- Args:
395
- neighborhood: Neighborhood to get listings for
396
- show_traffic: Whether to show traffic spots
397
- center_lat: Center latitude for the map
398
- center_lng: Center longitude for the map
399
- selected_id: ID of the selected listing
400
- search_query: Search query for filtering listings
401
- current_page: Current page of pagination
402
- items_per_page: Number of items per page
403
- listings_limit: Maximum number of listings to display (10, 20, 30, 40, or 50)
404
-
405
- Returns:
406
- Tuple of (map, dataframe)
407
- """
408
- # Ensure listings_limit is valid
409
  if listings_limit not in [10, 20, 30, 40, 50]:
410
  listings_limit = 10
411
 
412
- # Get the listings with the specified limit
413
  listings = self.get_neighborhood_listings(neighborhood, listings_limit)
414
 
415
  if not listings:
@@ -428,7 +250,6 @@ class AirbnbMapVisualiser:
428
  df[col] = pd.to_numeric(df[col], errors='coerce')
429
 
430
  if search_query:
431
- # Use the sentence transformer semantic search
432
  df = self.sort_by_relevance(df, search_query)
433
 
434
  if df.empty:
@@ -444,79 +265,49 @@ class AirbnbMapVisualiser:
444
  tiles='OpenStreetMap'
445
  )
446
 
447
- # Calculate pagination indices
448
- total_items = len(df)
449
- start_idx = (current_page - 1) * items_per_page
450
- end_idx = min(start_idx + items_per_page, total_items)
451
-
452
- # Get the current page's listings
453
- current_page_df = df.iloc[start_idx:end_idx]
454
-
455
- # Create a list to store all traffic spots we need to display
456
  all_traffic_spots_to_display = set()
457
-
458
- # Find nearest traffic spots for ALL listings
459
  all_nearest_traffic_spots = {}
460
-
461
- # First find all nearest traffic spots
462
  for idx, row in df.iterrows():
463
  nearest_spot, distance = self.find_nearest_traffic_spot(row['latitude'], row['longitude'])
464
  if nearest_spot:
465
  all_nearest_traffic_spots[row['id']] = (nearest_spot, distance)
466
  all_traffic_spots_to_display.add(nearest_spot.key)
467
 
468
- # Create a feature group for connection lines
469
  lines_group = folium.FeatureGroup(name="Connection Lines")
470
  m.add_child(lines_group)
471
 
472
- # Display all traffic spots
473
  if show_traffic and all_traffic_spots_to_display:
474
  self.traffic_manager.add_spots_to_map(m, all_traffic_spots_to_display)
475
 
476
- # Add all Airbnb markers and connection lines
477
  for idx, row in df.iterrows():
478
  marker_id = f"marker_{row['id']}"
479
- reviews = self.get_listing_reviews(row['id'])
480
- review_button_key = f"review_btn_{row['id']}"
481
-
482
- # Get traffic spot info if available for this listing
483
  traffic_spot_info = ""
484
  discount_info = ""
485
  discounted_price = row['price']
486
 
487
- # Check if this listing has a nearest traffic spot
488
  if row['id'] in all_nearest_traffic_spots:
489
  nearest_spot, distance = all_nearest_traffic_spots[row['id']]
490
-
491
- # Get discount rate and apply to price
492
  discount_rate = nearest_spot.get_discount_rate()
 
493
  if discount_rate > 0:
494
  discounted_price = row['price'] * (1 - discount_rate)
495
  discount_percentage = int(discount_rate * 100)
496
 
497
- # Format discount info
498
- discount_info = f"""
499
- <div style='background-color: #e8f5e9; padding: 8px; margin: 10px 0; border-radius: 4px; border-left: 4px solid #4caf50;'>
500
- <p style='margin: 2px 0; font-weight: bold; color: #2e7d32;'>🎉 {discount_percentage}% ENV PROTECTION DISCOUNT!</p>
501
- <p style='margin: 2px 0;'>Original: ${row['price']:.0f} → Now: ${discounted_price:.0f}</p>
502
- <p style='margin: 2px 0; font-size: 0.85em;'>Avg. {nearest_spot.avg_vehicle_count:.1f} vehicles per observation</p>
503
- </div>
504
- """
505
 
506
- # Format distance for display (convert to meters if less than 1km)
507
  distance_str = f"{distance:.2f} km" if distance >= 0.1 else f"{distance * 1000:.0f} meters"
508
 
509
- traffic_spot_info = f"""
510
- <div class='traffic-spot-info' style='margin: 10px 0; padding: 8px; background-color: #f0f8ff; border-radius: 4px; border-left: 4px solid #4285f4;'>
511
- <p style='margin: 5px 0;'>
512
- <strong>Nearest Traffic Spot:</strong> {escape(str(nearest_spot.key))}
513
- <br/>
514
- <strong>Distance:</strong> {distance_str}
515
- </p>
516
- </div>
517
- """
518
-
519
- # Add connection lines for ALL listings with nearby traffic spots
520
  folium.PolyLine(
521
  locations=[
522
  [row['latitude'], row['longitude']],
@@ -531,35 +322,28 @@ class AirbnbMapVisualiser:
531
 
532
  relevance_info = ""
533
  if search_query and 'relevance_percentage' in row and 'relevance_features' in row:
534
- relevance_info = f"""
535
- <div class='relevance-info' style='margin: 10px 0; padding: 8px; background-color: #f8f9fa; border-radius: 4px;'>
536
- <p style='margin: 5px 0;'>
537
- <strong>Match Score:</strong> {row['relevance_percentage']:.0f}%
538
- <br/>
539
- <strong>Relevance:</strong> {row['relevance_features']}
540
- <br/>
541
- <strong>Match Type:</strong> {row['matching_features']}
542
- </p>
543
- </div>
544
- """
545
-
546
- # Show price with strikethrough if discounted
547
  price_display = f"<strong>Price:</strong> ${row['price']:.0f}"
548
  if discount_info:
549
- price_display = f"<strong>Price:</strong> <span style='text-decoration: line-through;'>${row['price']:.0f}</span> <span style='color: #2e7d32; font-weight: bold;'>${discounted_price:.0f}</span>"
550
-
551
- popup_content = f"""
552
- <div style='min-width: 280px; max-width: 320px; padding: 15px;'>
553
- <h4 style='margin: 0 0 10px 0; color: #2c3e50;'>{escape(str(row['name']))}</h4>
554
- <p style='margin: 5px 0;'><strong>Host:</strong> {escape(str(row['host_name']))}</p>
555
- <p style='margin: 5px 0;'><strong>Room Type:</strong> {escape(str(row['room_type']))}</p>
556
- <p style='margin: 5px 0;'>{price_display}</p>
557
- <p style='margin: 5px 0;'><strong>Reviews:</strong> {row['number_of_reviews']:.0f}</p>
558
- {discount_info}
559
- {traffic_spot_info}
560
- {relevance_info}
561
- </div>
562
- """
563
 
564
  marker_color = 'green' if selected_id == row['id'] else 'red'
565
  marker = folium.Marker(
@@ -572,30 +356,7 @@ class AirbnbMapVisualiser:
572
  if selected_id is not None and row['id'] == selected_id:
573
  marker._name = marker_id
574
 
575
- # Add JavaScript function to help navigate to traffic spots
576
- folium.Element("""
577
- <script>
578
- function showTrafficSpot(lat, lng) {
579
- // Get the map object
580
- var map = document.querySelector('.folium-map')._leaflet_map;
581
-
582
- // Pan to the traffic spot and zoom in
583
- map.setView([lat, lng], 18);
584
-
585
- // Find and open the popup for the traffic spot marker
586
- map.eachLayer(function(layer) {
587
- if (layer instanceof L.Marker) {
588
- var latLng = layer.getLatLng();
589
- if (Math.abs(latLng.lat - lat) < 0.0001 && Math.abs(latLng.lng - lng) < 0.0001) {
590
- layer.openPopup();
591
- }
592
- }
593
- });
594
- }
595
- </script>
596
- """).add_to(m)
597
-
598
- # Add layer control to toggle connection lines
599
  folium.LayerControl().add_to(m)
600
 
601
  return m, df
 
6
  from geopy.distance import geodesic
7
  import logging
8
 
9
+ from TDTrafficSpot import TrafficSpotManager
10
+ from HKUSTBNBConstant import (
11
+ GET_ALL_NEIGHBORHOODS,
12
+ GET_NEIGHBORHOOD_LISTINGS,
13
+ GET_LISTING_REVIEWS,
14
+ GET_LISTING_REVIEWS_FOR_SEARCH,
15
+ DISCOUNT_INFO_TEMPLATE,
16
+ TRAFFIC_SPOT_INFO_TEMPLATE,
17
+ RELEVANCE_INFO_TEMPLATE,
18
+ POPUP_CONTENT_TEMPLATE,
19
+ MAP_SCRIPT
20
+ )
21
+
22
+
23
+ class HKUSTBNBVisualiser:
24
  def __init__(self):
25
  self.connection_params = {
26
  'user': 'slliac',
 
36
  increment=1,
37
  getmode=oracledb.SPOOL_ATTRVAL_WAIT
38
  )
 
 
39
  self.traffic_manager = TrafficSpotManager(self.connection_params)
40
  logging.info(f"Traffic spots initialized, {len(self.traffic_manager.traffic_spots)} spots loaded")
 
 
41
  try:
 
42
  model_name = "sentence-transformers/all-MiniLM-L6-v2"
43
  self.model = SentenceTransformer(model_name)
44
  print(f"Loaded Sentence Transformer model: {model_name}")
45
  except Exception as e:
46
  print(f"Error loading model: {str(e)}")
47
  self.model = None
 
48
  try:
49
  self.neighborhoods = self.get_all_neighborhoods()
50
  self.cached_listings = {}
51
+ self.cached_embeddings = {}
 
 
 
 
 
52
  except Exception as e:
53
  print(f"Initialization error: {str(e)}")
54
  self.neighborhoods = []
 
56
  self.cached_embeddings = {}
57
 
58
  def find_nearest_traffic_spot(self, airbnb_lat, airbnb_lng, max_distance_km=0.7):
 
59
  nearest_spot = None
60
  min_distance = float('inf')
 
 
61
  for spot in self.traffic_manager.traffic_spots:
62
  if not spot.is_valid():
63
  continue
 
 
64
  distance = geodesic(
65
  (airbnb_lat, airbnb_lng),
66
  (spot.latitude, spot.longitude)
67
  ).kilometers
 
 
68
  if distance < min_distance and distance <= max_distance_km:
69
  min_distance = distance
70
  nearest_spot = spot
 
71
  if nearest_spot:
72
  return nearest_spot, min_distance
73
  else:
 
79
  cursor = connection.cursor()
80
  cursor.prefetchrows = 50
81
  cursor.arraysize = 50
82
+ cursor.execute(GET_ALL_NEIGHBORHOODS)
 
 
 
 
 
83
  neighborhoods = [row[0] for row in cursor.fetchall()]
84
  return neighborhoods
85
  except Exception as e:
 
89
  self.pool.release(connection)
90
 
91
  def get_neighborhood_listings(self, neighborhood, limit=10):
 
 
 
 
 
 
 
 
 
 
 
92
  if limit not in [10, 20, 30, 40, 50]:
93
+ limit = 10
94
 
 
95
  if neighborhood in self.cached_listings and limit in self.cached_listings[neighborhood]:
96
  return self.cached_listings[neighborhood][limit]
97
 
 
98
  if neighborhood not in self.cached_listings:
99
  self.cached_listings[neighborhood] = {}
100
 
 
103
  cursor = connection.cursor()
104
  cursor.prefetchrows = 50
105
  cursor.arraysize = 50
106
+ cursor.execute(
107
+ GET_NEIGHBORHOOD_LISTINGS,
108
+ neighborhood=neighborhood,
109
+ limit=limit
110
+ )
 
 
 
 
 
 
 
 
 
 
 
111
 
112
  listings = cursor.fetchall()
113
  self.cached_listings[neighborhood][limit] = listings
 
122
  connection = self.pool.acquire()
123
  try:
124
  cursor = connection.cursor()
125
+ cursor.execute(
126
+ GET_LISTING_REVIEWS,
127
+ listing_id=int(listing_id)
128
+ )
 
 
 
 
 
 
 
 
129
 
130
  reviews = cursor.fetchall()
131
  formatted_reviews = []
 
146
  self.pool.release(connection)
147
 
148
  def get_listing_reviews_for_search(self, listing_id):
 
149
  connection = self.pool.acquire()
150
  try:
151
  cursor = connection.cursor()
152
+ cursor.execute(
153
+ GET_LISTING_REVIEWS_FOR_SEARCH,
154
+ listing_id=int(listing_id)
155
+ )
 
 
 
 
 
156
  reviews = cursor.fetchall()
 
 
157
  formatted_reviews = []
158
  for review in reviews:
159
  if review[0] is not None:
 
160
  if hasattr(review[0], 'read'):
161
  formatted_reviews.append(review[0].read())
162
  else:
 
170
  finally:
171
  self.pool.release(connection)
172
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  def compute_similarity(self, query_embedding, target_embedding):
 
174
  if query_embedding is None or target_embedding is None:
175
  return 0.0
 
176
  try:
 
177
  similarity = util.pytorch_cos_sim(query_embedding, target_embedding).item()
178
  return similarity
179
  except Exception as e:
 
181
  return 0.0
182
 
183
  def compute_search_scores(self, df, search_query):
 
184
  if not search_query or self.model is None:
185
  return [0.0] * len(df)
 
186
  try:
 
187
  query_key = f"query_{search_query}"
188
  if query_key not in self.cached_embeddings:
189
  self.cached_embeddings[query_key] = self.model.encode(search_query, convert_to_tensor=True)
190
  query_embedding = self.cached_embeddings[query_key]
 
 
191
  scores = []
 
192
  for idx, row in df.iterrows():
 
193
  title = str(row['name'])
194
  reviews = self.get_listing_reviews_for_search(row['id'])
 
 
195
  title_key = f"title_{row['id']}"
196
  review_key = f"review_{row['id']}"
 
197
  if title_key not in self.cached_embeddings:
198
  title_embedding = self.model.encode(title, convert_to_tensor=True)
199
  self.cached_embeddings[title_key] = title_embedding
200
  else:
201
  title_embedding = self.cached_embeddings[title_key]
 
 
202
  review_embedding = None
203
  if reviews and len(reviews) > 0:
204
  if review_key not in self.cached_embeddings:
 
207
  self.cached_embeddings[review_key] = review_embedding
208
  else:
209
  review_embedding = self.cached_embeddings[review_key]
 
 
210
  title_similarity = self.compute_similarity(query_embedding, title_embedding)
211
  review_similarity = 0.0
212
  if review_embedding is not None:
213
  review_similarity = self.compute_similarity(query_embedding, review_embedding)
214
+ final_score = title_similarity * 0.7 + review_similarity * 0.3 if review_embedding is not None else title_similarity
 
 
 
 
 
 
 
 
215
  scores.append(final_score)
 
216
  return scores
217
 
218
  except Exception as e:
 
220
  return [0.0] * len(df)
221
 
222
  def sort_by_relevance(self, df, search_query):
 
223
  if not search_query:
224
  return df
 
 
225
  scores = self.compute_search_scores(df, search_query)
226
  df['relevance_score'] = scores
227
  df['relevance_percentage'] = df['relevance_score'] * 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  return df.sort_values('relevance_score', ascending=False)
229
 
230
  def create_map_and_data(self, neighborhood="Sha Tin", show_traffic=True, center_lat=None, center_lng=None,
231
  selected_id=None, search_query=None, current_page=1, items_per_page=3, listings_limit=10):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  if listings_limit not in [10, 20, 30, 40, 50]:
233
  listings_limit = 10
234
 
 
235
  listings = self.get_neighborhood_listings(neighborhood, listings_limit)
236
 
237
  if not listings:
 
250
  df[col] = pd.to_numeric(df[col], errors='coerce')
251
 
252
  if search_query:
 
253
  df = self.sort_by_relevance(df, search_query)
254
 
255
  if df.empty:
 
265
  tiles='OpenStreetMap'
266
  )
267
 
 
 
 
 
 
 
 
 
 
268
  all_traffic_spots_to_display = set()
 
 
269
  all_nearest_traffic_spots = {}
 
 
270
  for idx, row in df.iterrows():
271
  nearest_spot, distance = self.find_nearest_traffic_spot(row['latitude'], row['longitude'])
272
  if nearest_spot:
273
  all_nearest_traffic_spots[row['id']] = (nearest_spot, distance)
274
  all_traffic_spots_to_display.add(nearest_spot.key)
275
 
 
276
  lines_group = folium.FeatureGroup(name="Connection Lines")
277
  m.add_child(lines_group)
278
 
 
279
  if show_traffic and all_traffic_spots_to_display:
280
  self.traffic_manager.add_spots_to_map(m, all_traffic_spots_to_display)
281
 
 
282
  for idx, row in df.iterrows():
283
  marker_id = f"marker_{row['id']}"
 
 
 
 
284
  traffic_spot_info = ""
285
  discount_info = ""
286
  discounted_price = row['price']
287
 
 
288
  if row['id'] in all_nearest_traffic_spots:
289
  nearest_spot, distance = all_nearest_traffic_spots[row['id']]
 
 
290
  discount_rate = nearest_spot.get_discount_rate()
291
+
292
  if discount_rate > 0:
293
  discounted_price = row['price'] * (1 - discount_rate)
294
  discount_percentage = int(discount_rate * 100)
295
 
296
+ discount_info = DISCOUNT_INFO_TEMPLATE.format(
297
+ discount_percentage=discount_percentage,
298
+ original_price=row['price'],
299
+ discounted_price=discounted_price,
300
+ avg_vehicle_count=nearest_spot.avg_vehicle_count,
301
+ observation_count=len(nearest_spot.dataset_rows)
302
+ )
 
303
 
 
304
  distance_str = f"{distance:.2f} km" if distance >= 0.1 else f"{distance * 1000:.0f} meters"
305
 
306
+ traffic_spot_info = TRAFFIC_SPOT_INFO_TEMPLATE.format(
307
+ spot_key=escape(str(nearest_spot.key)),
308
+ distance_str=distance_str
309
+ )
310
+
 
 
 
 
 
 
311
  folium.PolyLine(
312
  locations=[
313
  [row['latitude'], row['longitude']],
 
322
 
323
  relevance_info = ""
324
  if search_query and 'relevance_percentage' in row and 'relevance_features' in row:
325
+ relevance_info = RELEVANCE_INFO_TEMPLATE.format(
326
+ relevance_percentage=row['relevance_percentage'],
327
+ relevance_features=row['relevance_features'],
328
+ matching_features=row['matching_features']
329
+ )
330
+
 
 
 
 
 
 
 
331
  price_display = f"<strong>Price:</strong> ${row['price']:.0f}"
332
  if discount_info:
333
+ price_display = (f"<strong>Price:</strong> "
334
+ f"<span style='text-decoration: line-through;'>${row['price']:.0f}</span> "
335
+ f"<span style='color: #2e7d32; font-weight: bold;'>${discounted_price:.0f}</span>")
336
+
337
+ popup_content = POPUP_CONTENT_TEMPLATE.format(
338
+ listing_name=escape(str(row['name'])),
339
+ host_name=escape(str(row['host_name'])),
340
+ room_type=escape(str(row['room_type'])),
341
+ price_display=price_display,
342
+ review_count=row['number_of_reviews'],
343
+ discount_info=discount_info,
344
+ traffic_spot_info=traffic_spot_info,
345
+ relevance_info=relevance_info
346
+ )
347
 
348
  marker_color = 'green' if selected_id == row['id'] else 'red'
349
  marker = folium.Marker(
 
356
  if selected_id is not None and row['id'] == selected_id:
357
  marker._name = marker_id
358
 
359
+ folium.Element(MAP_SCRIPT).add_to(m)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
360
  folium.LayerControl().add_to(m)
361
 
362
  return m, df
TrafficSpot.py → TDTrafficSpot.py RENAMED
@@ -5,46 +5,56 @@ import base64
5
  import numpy as np
6
  from html import escape
7
  from datasets import load_dataset
8
- from datetime import datetime, timedelta
9
-
10
-
11
- class TrafficSpot:
 
 
 
 
 
 
 
 
12
  def __init__(self, key, latitude, longitude, dataset_rows=None):
13
  self.key = key
14
  self.latitude = float(latitude) if latitude is not None else None
15
  self.longitude = float(longitude) if longitude is not None else None
16
- self.dataset_rows = dataset_rows or [] # List of matching dataset rows (up to 5)
17
  self.avg_vehicle_count = self.calculate_avg_vehicle_count()
 
18
 
19
  def is_valid(self):
20
  return self.latitude is not None and self.longitude is not None
21
 
 
 
 
 
 
 
 
22
  def calculate_avg_vehicle_count(self):
23
- """Calculate average vehicle count from the recent data"""
24
  if not self.dataset_rows:
25
  return 0
26
 
27
- # Extract vehicle counts from dataset rows
28
  vehicle_counts = [row.get('vehicle_count', 0) for row in self.dataset_rows if 'vehicle_count' in row]
29
 
30
- # If no valid counts are found, return 0
31
  if not vehicle_counts:
32
  return 0
33
 
34
- # Calculate and return the average
35
  return np.mean(vehicle_counts)
36
 
37
  def get_discount_rate(self):
38
- """Calculate discount rate based on average vehicle count"""
39
  if self.avg_vehicle_count < 2:
40
- return 0.20 # 20% discount
41
  elif self.avg_vehicle_count < 5:
42
- return 0.10 # 10% discount
43
  else:
44
- return 0.0 # No discount
45
 
46
  def get_discount_info(self):
47
- """Get discount information as a formatted string"""
48
  discount_rate = self.get_discount_rate()
49
 
50
  if discount_rate <= 0:
@@ -57,55 +67,50 @@ class TrafficSpot:
57
  discount_display = ""
58
 
59
  if "discount" in discount_info.lower() and "no" not in discount_info.lower():
60
- discount_display = f"""
61
- <div style='background-color: #e8f5e9; padding: 5px; margin: 5px 0; border-radius: 4px; border-left: 3px solid #4caf50;'>
62
- <p style='margin: 2px 0; color: #2e7d32;'><strong>🎉 {discount_info}</strong></p>
63
- <p style='margin: 2px 0; font-size: 0.9em;'>Avg. {self.avg_vehicle_count:.1f} vehicles per observation</p>
64
- </div>
65
- """
66
-
67
- html = f"""
68
- <div style='min-width: 150px; padding: 10px;'>
69
- <p style='margin: 5px 0;'><strong>Location ID:</strong> {escape(str(self.key))}</p>
70
- {discount_display}
71
- """
72
-
73
- if self.dataset_rows:
74
- html += "<h4>Recent Records:</h4>"
75
- for row in self.dataset_rows:
76
- # Convert binary processed_image to base64
 
 
 
77
  image_data = row.get('processed_image')
78
  image_html = ""
79
  if image_data:
80
  try:
81
- # Encode binary data to base64
82
  base64_encoded = base64.b64encode(image_data).decode('utf-8')
83
- # Create img tag with base64 data
84
- image_html = f"""
85
- <img src='data:image/jpeg;base64,{base64_encoded}'
86
- style='max-width: 100px; max-height: 100px; margin: 5px 0;'
87
- alt='Processed Image'>
88
- """
89
  except Exception as e:
90
  logging.error(f"Error encoding image for {self.key}: {str(e)}")
91
  image_html = "<p>Image load failed</p>"
92
 
93
- html += f"""
94
- <div style='border-top: 1px solid #ccc; padding: 5px 0;'>
95
- <p style='margin: 2px 0;'><strong>Time:</strong> {escape(str(row['capture_time']))}</p>
96
- <p style='margin: 2px 0;'><strong>Vehicles:</strong> {escape(str(row['vehicle_count']))}</p>
97
- {image_html}
98
- </div>
99
- """
100
  else:
101
- html += "<p>No recent records available</p>"
102
 
103
  html += "</div>"
104
  return html
105
 
106
  def add_to_map(self, folium_map):
107
  if self.is_valid():
108
- # Choose color based on traffic level
109
  if self.avg_vehicle_count < 2:
110
  color = 'blue' # Low traffic - 20% discount
111
  elif self.avg_vehicle_count < 5:
@@ -124,64 +129,63 @@ class TrafficSpotManager:
124
  def __init__(self, connection_params):
125
  self.connection_params = connection_params
126
  self.traffic_spots = []
127
- self.spot_dict = {} # For quick lookup by key
128
- # Only load limited spots when initialized
129
  self.load_limited_traffic_spots()
130
 
131
  def load_limited_traffic_spots(self, limit=10):
132
- """Load only a very limited set of traffic spots initially"""
133
  try:
134
  dataset = load_dataset("slliac/isom5240-td-application-traffic-analysis", split="application")
135
- dataset_list = [row for row in dataset]
136
- dataset_list.sort(key=lambda x: x['capture_time'], reverse=True)
137
 
138
- # Limit to just a few samples
139
- dataset_dict = {}
140
- unique_count = 0
141
  for row in dataset_list:
142
  loc_id = row['location_id']
143
- if unique_count >= limit:
144
- break
 
145
 
146
- if loc_id not in dataset_dict:
147
- dataset_dict[loc_id] = []
148
- unique_count += 1
 
 
 
149
 
150
- if len(dataset_dict[loc_id]) < 10: # Store up to 10 records for averaging
151
- dataset_dict[loc_id].append(row)
152
 
153
- unique_locations = list(dataset_dict.keys())
154
- location_ids = tuple(unique_locations) if unique_locations else ('',)
 
 
 
 
 
155
 
156
  with oracledb.connect(**self.connection_params) as conn:
157
  cursor = conn.cursor()
158
- query = """
159
- SELECT KEY, LATITUDE, LONGITUDE
160
- FROM TD_TRAFFIC_CAMERA_LOCATION
161
- WHERE KEY IN ({})
162
- AND LATITUDE IS NOT NULL
163
- AND LONGITUDE IS NOT NULL
164
- """.format(','.join([':' + str(i + 1) for i in range(len(location_ids))]))
165
 
166
  cursor.execute(query, location_ids)
167
  spots = cursor.fetchall()
168
 
169
  self.traffic_spots = [
170
- TrafficSpot(
171
  spot[0],
172
  spot[1],
173
  spot[2],
174
- dataset_dict.get(spot[0])
175
  )
176
  for spot in spots
177
  ]
178
 
179
- # Build lookup dictionary
180
  for spot in self.traffic_spots:
181
  self.spot_dict[spot.key] = spot
182
 
183
- conn.commit()
184
- logging.info(f"Loaded {len(self.traffic_spots)} limited traffic spots")
185
 
186
  except Exception as e:
187
  logging.error(f"Error loading traffic spots: {str(e)}")
@@ -189,8 +193,6 @@ class TrafficSpotManager:
189
  self.spot_dict = {}
190
 
191
  def load_specific_traffic_spots(self, keys):
192
- """Load specific traffic spots by their keys"""
193
- # Filter out keys we already have
194
  needed_keys = [key for key in keys if key not in self.spot_dict]
195
 
196
  if not needed_keys:
@@ -198,77 +200,58 @@ class TrafficSpotManager:
198
 
199
  try:
200
  dataset = load_dataset("slliac/isom5240-td-application-traffic-analysis", split="application")
201
- dataset_list = [row for row in dataset]
202
- dataset_list.sort(key=lambda x: x['capture_time'], reverse=True)
203
 
204
- dataset_dict = {}
205
  for row in dataset_list:
206
  loc_id = row['location_id']
207
  if loc_id in needed_keys:
208
- if loc_id not in dataset_dict:
209
- dataset_dict[loc_id] = []
210
- if len(dataset_dict[loc_id]) < 10: # Store up to 10 records for averaging
211
- dataset_dict[loc_id].append(row)
212
 
213
- # Only load if we have keys to load
214
- if needed_keys:
215
  with oracledb.connect(**self.connection_params) as conn:
216
  cursor = conn.cursor()
217
 
218
- # Prepare placeholders for the IN clause
219
  placeholders = ','.join([':' + str(i + 1) for i in range(len(needed_keys))])
220
 
221
- query = f"""
222
- SELECT KEY, LATITUDE, LONGITUDE
223
- FROM TD_TRAFFIC_CAMERA_LOCATION
224
- WHERE KEY IN ({placeholders})
225
- AND LATITUDE IS NOT NULL
226
- AND LONGITUDE IS NOT NULL
227
- """
228
 
229
  cursor.execute(query, tuple(needed_keys))
230
  spots = cursor.fetchall()
231
 
232
  new_spots = [
233
- TrafficSpot(
234
  spot[0],
235
  spot[1],
236
  spot[2],
237
- dataset_dict.get(spot[0])
238
  )
239
  for spot in spots
240
  ]
241
 
242
- # Add to our collections
243
  for spot in new_spots:
244
  self.spot_dict[spot.key] = spot
245
  self.traffic_spots.append(spot)
246
 
247
- conn.commit()
248
- logging.info(f"Loaded {len(new_spots)} additional traffic spots")
249
 
250
  except Exception as e:
251
  logging.error(f"Error loading specific traffic spots: {str(e)}")
252
 
253
  def add_spots_to_map(self, folium_map, spot_keys=None):
254
- """Add only specific spots to map"""
255
  if spot_keys is None:
256
- # If no keys specified, add all loaded spots
257
  for spot in self.traffic_spots:
258
  spot.add_to_map(folium_map)
259
  else:
260
- # Add only the specified spots
261
  for key in spot_keys:
262
  if key in self.spot_dict:
263
  self.spot_dict[key].add_to_map(folium_map)
264
 
265
  def get_spot_by_key(self, key):
266
- """Get a traffic spot by its key, loading it if necessary"""
267
  if key in self.spot_dict:
268
  return self.spot_dict[key]
269
 
270
- # Try to load it if we don't have it
271
  self.load_specific_traffic_spots([key])
272
-
273
- # Return if found, None otherwise
274
  return self.spot_dict.get(key)
 
5
  import numpy as np
6
  from html import escape
7
  from datasets import load_dataset
8
+ from HKUSTBNBConstant import (
9
+ GET_TRAFFIC_CAMERA_LOCATIONS,
10
+ TRAFFIC_DISCOUNT_DISPLAY,
11
+ TRAFFIC_POPUP_BASE,
12
+ TRAFFIC_RECORDS_HEADER,
13
+ TRAFFIC_RECORD_ENTRY,
14
+ TRAFFIC_IMAGE_HTML,
15
+ TRAFFIC_NO_RECORDS
16
+ )
17
+
18
+
19
+ class TDTrafficSpot:
20
  def __init__(self, key, latitude, longitude, dataset_rows=None):
21
  self.key = key
22
  self.latitude = float(latitude) if latitude is not None else None
23
  self.longitude = float(longitude) if longitude is not None else None
24
+ self.dataset_rows = dataset_rows or []
25
  self.avg_vehicle_count = self.calculate_avg_vehicle_count()
26
+ self.recent_display_rows = self.get_recent_display_rows()
27
 
28
  def is_valid(self):
29
  return self.latitude is not None and self.longitude is not None
30
 
31
+ def get_recent_display_rows(self, max_display=2):
32
+ if not self.dataset_rows:
33
+ return []
34
+
35
+ sorted_rows = sorted(self.dataset_rows, key=lambda x: x['capture_time'], reverse=True)
36
+ return sorted_rows[:max_display]
37
+
38
  def calculate_avg_vehicle_count(self):
 
39
  if not self.dataset_rows:
40
  return 0
41
 
 
42
  vehicle_counts = [row.get('vehicle_count', 0) for row in self.dataset_rows if 'vehicle_count' in row]
43
 
 
44
  if not vehicle_counts:
45
  return 0
46
 
 
47
  return np.mean(vehicle_counts)
48
 
49
  def get_discount_rate(self):
 
50
  if self.avg_vehicle_count < 2:
51
+ return 0.20
52
  elif self.avg_vehicle_count < 5:
53
+ return 0.10
54
  else:
55
+ return 0.0
56
 
57
  def get_discount_info(self):
 
58
  discount_rate = self.get_discount_rate()
59
 
60
  if discount_rate <= 0:
 
67
  discount_display = ""
68
 
69
  if "discount" in discount_info.lower() and "no" not in discount_info.lower():
70
+ discount_display = TRAFFIC_DISCOUNT_DISPLAY.format(
71
+ discount_info=discount_info,
72
+ avg_vehicle_count=self.avg_vehicle_count,
73
+ observation_count=len(self.dataset_rows)
74
+ )
75
+
76
+ html = TRAFFIC_POPUP_BASE.format(
77
+ location_id=escape(str(self.key)),
78
+ discount_display=discount_display
79
+ )
80
+
81
+ recent_rows = self.recent_display_rows
82
+
83
+ if recent_rows:
84
+ html += TRAFFIC_RECORDS_HEADER.format(
85
+ recent_count=len(recent_rows),
86
+ total_count=len(self.dataset_rows)
87
+ )
88
+
89
+ for row in recent_rows:
90
  image_data = row.get('processed_image')
91
  image_html = ""
92
  if image_data:
93
  try:
 
94
  base64_encoded = base64.b64encode(image_data).decode('utf-8')
95
+ image_html = TRAFFIC_IMAGE_HTML.format(base64_encoded=base64_encoded)
 
 
 
 
 
96
  except Exception as e:
97
  logging.error(f"Error encoding image for {self.key}: {str(e)}")
98
  image_html = "<p>Image load failed</p>"
99
 
100
+ html += TRAFFIC_RECORD_ENTRY.format(
101
+ capture_time=escape(str(row['capture_time'])),
102
+ vehicle_count=escape(str(row['vehicle_count'])),
103
+ image_html=image_html
104
+ )
 
 
105
  else:
106
+ html += TRAFFIC_NO_RECORDS
107
 
108
  html += "</div>"
109
  return html
110
 
111
  def add_to_map(self, folium_map):
112
  if self.is_valid():
113
+
114
  if self.avg_vehicle_count < 2:
115
  color = 'blue' # Low traffic - 20% discount
116
  elif self.avg_vehicle_count < 5:
 
129
  def __init__(self, connection_params):
130
  self.connection_params = connection_params
131
  self.traffic_spots = []
132
+ self.spot_dict = {}
 
133
  self.load_limited_traffic_spots()
134
 
135
  def load_limited_traffic_spots(self, limit=10):
 
136
  try:
137
  dataset = load_dataset("slliac/isom5240-td-application-traffic-analysis", split="application")
138
+ dataset_list = list(dataset)
 
139
 
140
+ location_data = {}
 
 
141
  for row in dataset_list:
142
  loc_id = row['location_id']
143
+ if loc_id not in location_data:
144
+ location_data[loc_id] = []
145
+ location_data[loc_id].append(row)
146
 
147
+ if len(location_data) > limit:
148
+ recent_activities = {}
149
+ for loc_id, rows in location_data.items():
150
+ if rows:
151
+ most_recent = max(rows, key=lambda x: x['capture_time'])
152
+ recent_activities[loc_id] = most_recent['capture_time']
153
 
154
+ top_locations = sorted(recent_activities.items(), key=lambda x: x[1], reverse=True)[:limit]
155
+ selected_locations = [loc_id for loc_id, _ in top_locations]
156
 
157
+ location_data = {loc_id: location_data[loc_id] for loc_id in selected_locations}
158
+
159
+ if not location_data:
160
+ logging.warning("No locations found in dataset")
161
+ return
162
+
163
+ location_ids = tuple(location_data.keys())
164
 
165
  with oracledb.connect(**self.connection_params) as conn:
166
  cursor = conn.cursor()
167
+
168
+ placeholders = ','.join([':' + str(i + 1) for i in range(len(location_ids))])
169
+
170
+ query = GET_TRAFFIC_CAMERA_LOCATIONS.format(placeholders=placeholders)
 
 
 
171
 
172
  cursor.execute(query, location_ids)
173
  spots = cursor.fetchall()
174
 
175
  self.traffic_spots = [
176
+ TDTrafficSpot(
177
  spot[0],
178
  spot[1],
179
  spot[2],
180
+ location_data.get(spot[0], [])
181
  )
182
  for spot in spots
183
  ]
184
 
 
185
  for spot in self.traffic_spots:
186
  self.spot_dict[spot.key] = spot
187
 
188
+ logging.info(f"Loaded {len(self.traffic_spots)} traffic spots with full historical data")
 
189
 
190
  except Exception as e:
191
  logging.error(f"Error loading traffic spots: {str(e)}")
 
193
  self.spot_dict = {}
194
 
195
  def load_specific_traffic_spots(self, keys):
 
 
196
  needed_keys = [key for key in keys if key not in self.spot_dict]
197
 
198
  if not needed_keys:
 
200
 
201
  try:
202
  dataset = load_dataset("slliac/isom5240-td-application-traffic-analysis", split="application")
203
+ dataset_list = list(dataset)
 
204
 
205
+ location_data = {}
206
  for row in dataset_list:
207
  loc_id = row['location_id']
208
  if loc_id in needed_keys:
209
+ if loc_id not in location_data:
210
+ location_data[loc_id] = []
211
+ location_data[loc_id].append(row)
 
212
 
213
+ if location_data and needed_keys:
 
214
  with oracledb.connect(**self.connection_params) as conn:
215
  cursor = conn.cursor()
216
 
 
217
  placeholders = ','.join([':' + str(i + 1) for i in range(len(needed_keys))])
218
 
219
+ query = GET_TRAFFIC_CAMERA_LOCATIONS.format(placeholders=placeholders)
 
 
 
 
 
 
220
 
221
  cursor.execute(query, tuple(needed_keys))
222
  spots = cursor.fetchall()
223
 
224
  new_spots = [
225
+ TDTrafficSpot(
226
  spot[0],
227
  spot[1],
228
  spot[2],
229
+ location_data.get(spot[0], [])
230
  )
231
  for spot in spots
232
  ]
233
 
 
234
  for spot in new_spots:
235
  self.spot_dict[spot.key] = spot
236
  self.traffic_spots.append(spot)
237
 
238
+ logging.info(f"Loaded {len(new_spots)} additional traffic spots with full historical data")
 
239
 
240
  except Exception as e:
241
  logging.error(f"Error loading specific traffic spots: {str(e)}")
242
 
243
  def add_spots_to_map(self, folium_map, spot_keys=None):
 
244
  if spot_keys is None:
 
245
  for spot in self.traffic_spots:
246
  spot.add_to_map(folium_map)
247
  else:
 
248
  for key in spot_keys:
249
  if key in self.spot_dict:
250
  self.spot_dict[key].add_to_map(folium_map)
251
 
252
  def get_spot_by_key(self, key):
 
253
  if key in self.spot_dict:
254
  return self.spot_dict[key]
255
 
 
256
  self.load_specific_traffic_spots([key])
 
 
257
  return self.spot_dict.get(key)
app.py CHANGED
@@ -4,8 +4,20 @@ import streamlit as st
4
  from html import escape
5
  from streamlit_folium import st_folium, folium_static
6
  import math
7
- from AirbnbMapVisualiser import AirbnbMapVisualiser
8
  from huggingface_hub import login
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
 
11
  def load_css(css_file):
@@ -14,7 +26,6 @@ def load_css(css_file):
14
 
15
 
16
  def highlight_search_terms(text, search_query):
17
- """Highlight search terms in text"""
18
  if not search_query:
19
  return text
20
 
@@ -23,7 +34,6 @@ def highlight_search_terms(text, search_query):
23
 
24
  for term in search_terms:
25
  if term.strip():
26
- # Case-insensitive replacement with word boundaries
27
  pattern = f'(?i)\\b{term}\\b'
28
  replacement = f'<span class="highlight">{term}</span>'
29
  highlighted_text = re.sub(pattern, replacement, highlighted_text)
@@ -44,28 +54,24 @@ def render_review_dialog():
44
  try:
45
  review_date, reviewer_name, comments = review
46
 
47
- # Highlight search terms in comments if search query exists
48
  highlighted_comments = highlight_search_terms(
49
  str(comments),
50
  st.session_state.search_query
51
  )
52
 
53
- st.markdown(f"""
54
- <div class="review-card">
55
- <div class="review-header">
56
- {escape(str(reviewer_name))} - {escape(str(review_date))}
57
- </div>
58
- <div class="review-content">
59
- {highlighted_comments}
60
- </div>
61
- </div>
62
- """, unsafe_allow_html=True)
63
  except Exception as e:
64
  st.error(f"Error displaying review: {str(e)}")
65
  else:
66
  st.info("No reviews available for this listing.")
67
 
68
-
69
  def main():
70
  st.set_page_config(
71
  layout="wide",
@@ -73,8 +79,6 @@ def main():
73
  initial_sidebar_state="expanded"
74
  )
75
  load_css('style.css')
76
-
77
- # Initialize session state
78
  if 'center_lat' not in st.session_state:
79
  st.session_state.center_lat = None
80
  if 'center_lng' not in st.session_state:
@@ -102,142 +106,77 @@ def main():
102
  if 'show_search_explanation' not in st.session_state:
103
  st.session_state.show_search_explanation = False
104
  if 'listings_limit' not in st.session_state:
105
- st.session_state.listings_limit = 10 # Default to 10 listings
106
-
107
- # Initialize visualizer with loading message for tokenizer
108
  if 'visualizer' not in st.session_state:
109
  with st.spinner('Loading HKUST BNB+ ...'):
110
- st.session_state.visualizer = AirbnbMapVisualiser()
111
  st.session_state.tokenizer_loaded = True
112
-
113
  visualizer = st.session_state.visualizer
114
-
115
- # Check if visualizer is properly initialized
116
  if visualizer is None or not hasattr(visualizer, 'neighborhoods'):
117
  st.error("Error initializing the application. Please refresh the page.")
118
  return
119
-
120
- # Show explanations if requested
121
  if st.session_state.show_traffic_explanation:
122
  with st.expander("📊 Traffic-Based Discount System", expanded=True):
123
- st.markdown("""
124
- ### How HKUST BNB+ Acheived (E)SG , use Traffic Spot from Department of Transport and do traffic analysis hence provided discount according
125
- to the average traffic on the previous days.
126
-
127
- We use real-time traffic data to offer you the best possible rates:
128
-
129
- * **Blue Camera Icons**: Areas with very low traffic (less than 2 vehicles detected)
130
- * Enjoy a peaceful stay with **20% DISCOUNT** on these properties!
131
-
132
- * **Orange Camera Icons**: Areas with moderate traffic (2-5 vehicles detected)
133
- * Get a **10% DISCOUNT** on these properties!
134
-
135
- * **Purple Camera Icons**: Areas with heavier traffic (more than 5 vehicles)
136
- * Standard rates apply for these properties
137
-
138
- Look for the blue connecting lines on the map to see which traffic spot affects each property!
139
-
140
- Remark : Currently only few traffic spot avaliable, in the future will provide more.
141
- """)
142
  if st.button("Close", key="close_traffic_btn"):
143
  st.session_state.show_traffic_explanation = False
144
  st.rerun()
145
-
146
  if st.session_state.show_search_explanation:
147
  with st.expander("🔍 Smart Search System", expanded=True):
148
- st.markdown("""
149
- ### How HKUST BNB+ Acheived E(S)G , use keyword to provided semantic relevance analysis to matches the require need from HKUST Student
150
-
151
- Our advanced search technology goes beyond simple keyword matching to understand the meaning behind your search terms:
152
-
153
- When you search for terms like "quiet," "convenient," or "spacious," our system:
154
- 1. Analyzes both listing titles and actual guest reviews
155
- 2. Understands the context and meaning (not just matching exact words)
156
- 3. Ranks listings based on overall relevance to your search
157
-
158
- **Search Match Types:**
159
- * **"Strong match in title and reviews"** - Perfect matches in both property description and guest experiences
160
- * **"Strong match in listing title"** - Property description matches your needs very well
161
- * **"Strong match in reviews"** - Guest experiences align perfectly with what you're looking for
162
- * **"Better match in listing title/reviews"** - One source is more relevant than the other
163
- * **"Moderate semantic match"** - Some relevance but not a perfect match
164
-
165
- This helps you find properties that truly match what you're looking for, even if they don't use the exact words in your search!
166
- """)
167
  if st.button("Close", key="close_search_btn"):
168
  st.session_state.show_search_explanation = False
169
  st.rerun()
170
-
171
  with st.sidebar:
172
- st.markdown(
173
- '<p class="sidebar-header">HKUST BNB+<BR/></p>',
174
- unsafe_allow_html=True)
175
-
176
  search_query = st.text_input(
177
  "🔍 Search listings",
178
  value=st.session_state.search_query,
179
  placeholder="Try: 'cozy , quiet '"
180
  )
181
-
182
  if search_query != st.session_state.search_query:
183
  st.session_state.search_query = search_query
184
  st.session_state.current_page = 1
185
  st.session_state.show_review_dialog = False
186
-
187
- st.markdown('<hr style="margin: 20px 0; border: none; border-top: 1px solid #e0e0e0;">', unsafe_allow_html=True)
188
-
189
  neighborhood = st.selectbox(
190
  "Select Neighborhood",
191
  options=visualizer.neighborhoods,
192
  index=visualizer.neighborhoods.index("Kowloon City") if "Kowloon City" in visualizer.neighborhoods else 0
193
  )
194
-
195
- # Add dropdown for selecting number of listings to show
196
  listings_limit = st.selectbox(
197
  "Number of listings to show",
198
  options=[10, 20, 30, 40, 50],
199
- index=0, # Default to 10
200
  help="Select how many listings to display for this neighborhood"
201
  )
202
-
203
- # Update session state if listings_limit has changed
204
  if listings_limit != st.session_state.listings_limit:
205
  st.session_state.listings_limit = listings_limit
206
- # Reset to page 1 when changing the number of listings
207
  st.session_state.current_page = 1
208
  st.session_state.show_review_dialog = False
209
-
210
  show_traffic = st.checkbox("Show Traffic Cameras", value=True)
211
-
212
- st.markdown('<hr style="margin: 20px 0; border: none; border-top: 1px solid #e0e0e0;">', unsafe_allow_html=True)
213
-
214
- # Help section in sidebar
215
  st.markdown("### 💡 Help & Information")
216
-
217
  col1, col2 = st.columns(2)
218
  with col1:
219
  if st.button("Green Discount", key="traffic_info_btn"):
220
  st.session_state.show_traffic_explanation = True
221
  st.rerun()
222
-
223
  with col2:
224
  if st.button("Semantic Search", key="search_info_btn"):
225
  st.session_state.show_search_explanation = True
226
  st.rerun()
227
-
228
  if st.button("Reset All", key="reset_btn"):
229
  st.session_state.center_lat = None
230
  st.session_state.center_lng = None
231
  st.session_state.selected_id = None
232
  st.session_state.current_page = 1
233
  st.session_state.search_query = ""
234
- st.session_state.listings_limit = 10 # Reset to default
235
  st.session_state.show_review_dialog = False
236
  st.session_state.show_traffic_explanation = False
237
  st.session_state.show_search_explanation = False
238
  st.rerun()
239
-
240
- # Create map and get data - pass current page information and listings limit
241
  m, df = visualizer.create_map_and_data(
242
  neighborhood,
243
  show_traffic,
@@ -249,8 +188,6 @@ def main():
249
  st.session_state.items_per_page,
250
  st.session_state.listings_limit
251
  )
252
-
253
- # Handle neighborhood change
254
  if st.session_state.previous_neighborhood != neighborhood:
255
  st.session_state.current_page = 1
256
  if not df.empty:
@@ -260,87 +197,71 @@ def main():
260
  st.session_state.previous_neighborhood = neighborhood
261
  st.session_state.show_review_dialog = False
262
  st.rerun()
263
-
264
  if m is None:
265
  st.error("No data available for the selected neighborhood")
266
  return
267
-
268
  col1, col2 = st.columns([7, 3])
269
-
270
  with col1:
271
  st.markdown('<div class="map-container">', unsafe_allow_html=True)
272
  st_folium(m, width=None, height=700)
273
  st.markdown('</div>', unsafe_allow_html=True)
274
-
275
  with col2:
276
- # Display info about how many listings are being shown
277
  st.markdown(
278
- f"<p style='text-align:center; color:#4285f4;'>Showing {st.session_state.listings_limit} listings in {neighborhood}</p>",
279
- unsafe_allow_html=True)
280
-
 
 
 
281
  total_items = len(df)
282
  total_pages = math.ceil(total_items / st.session_state.items_per_page)
283
  st.session_state.current_page = min(max(1, st.session_state.current_page), total_pages)
284
  start_idx = (st.session_state.current_page - 1) * st.session_state.items_per_page
285
  end_idx = min(start_idx + st.session_state.items_per_page, total_items)
286
-
287
  st.markdown('<div class="scrollable-container">', unsafe_allow_html=True)
288
-
289
  for idx in range(start_idx, end_idx):
290
  row = df.iloc[idx]
291
  background_color = "#E3F2FD" if st.session_state.selected_id == row['id'] else "white"
292
-
293
- # Calculate discount based on nearest traffic spot
294
  discounted_price = row['price']
295
  discount_tag = ""
296
-
297
- # Find nearest traffic spot for this listing
298
  listing_lat = row['latitude']
299
  listing_lng = row['longitude']
300
-
301
- # Use the visualizer's method to find the nearest traffic spot
302
  nearest_spot, distance = visualizer.find_nearest_traffic_spot(listing_lat, listing_lng)
303
-
304
- # Apply discount if there's a nearest spot
305
  if nearest_spot:
306
  discount_rate = nearest_spot.get_discount_rate()
307
  if discount_rate > 0:
308
  discounted_price = row['price'] * (1 - discount_rate)
309
  discount_percentage = int(discount_rate * 100)
310
  discount_tag = f"""<span class="discount-tag">-{discount_percentage}%</span>"""
311
-
312
- # Price display logic
313
  if discount_tag:
314
- price_display = f"""<p class="listing-info">💰 <span class="original-price">${row['price']:.0f}</span> <span class="discounted-price">${discounted_price:.0f}</span> {discount_tag}</p>"""
 
 
 
 
315
  else:
316
- price_display = f"""<p class="listing-info">💰 ${row['price']:.0f}</p>"""
317
-
318
  relevance_info = ""
319
  if st.session_state.search_query and 'relevance_percentage' in row:
320
- relevance_info = f"""<p class="listing-info"> 🎯 Relevance: {row['relevance_percentage']:.0f}% </p>"""
321
- if 'matching_features' in row:
322
- matching_features = row['matching_features']
323
- if matching_features and matching_features != "No direct matches":
324
- relevance_info += f"""<p class="listing-info">✨ Matching: {escape(str(matching_features))}</p>"""
325
-
326
- st.markdown(f"""
327
- <div class="listing-card" style="background-color: {background_color}">
328
- <h4 class="listing-title">{escape(str(row['name']))}</h4>
329
- {price_display}
330
- <p class="listing-info">🏠 {escape(str(row['room_type']))}</p>
331
- <p class="listing-info">⭐ Reviews: {row['number_of_reviews']:.0f}</p>
332
- {relevance_info}</div>
333
- """, unsafe_allow_html=True)
334
-
335
  col_details, col_reviews = st.columns(2)
336
-
337
  with col_details:
338
  if st.button("View Details", key=f"btn_{row['id']}"):
339
  st.session_state.selected_id = row['id']
340
  st.session_state.center_lat = row['latitude']
341
  st.session_state.center_lng = row['longitude']
342
  st.rerun()
343
-
344
  with col_reviews:
345
  if st.button("View Reviews", key=f"review_btn_{row['id']}"):
346
  st.session_state.show_review_dialog = True
@@ -348,12 +269,8 @@ def main():
348
  st.session_state.current_review_listing_name = row['name']
349
  st.session_state.scroll_to_review = True
350
  st.rerun()
351
-
352
  st.markdown('</div>', unsafe_allow_html=True)
353
-
354
- # Pagination controls
355
  col_prev, col_select, col_next = st.columns([1, 1, 1])
356
-
357
  with col_select:
358
  page_options = list(range(1, total_pages + 1))
359
  new_page = st.selectbox(
@@ -363,7 +280,6 @@ def main():
363
  key="page_selector",
364
  label_visibility="collapsed"
365
  )
366
-
367
  if new_page != st.session_state.current_page:
368
  st.session_state.current_page = new_page
369
  new_start_idx = (new_page - 1) * st.session_state.items_per_page
@@ -373,7 +289,6 @@ def main():
373
  st.session_state.center_lng = df.iloc[new_start_idx]['longitude']
374
  st.session_state.show_review_dialog = False
375
  st.rerun()
376
-
377
  with col_prev:
378
  if st.button("← Previous", disabled=st.session_state.current_page <= 1):
379
  st.session_state.current_page -= 1
@@ -384,7 +299,6 @@ def main():
384
  st.session_state.center_lng = df.iloc[new_start_idx]['longitude']
385
  st.session_state.show_review_dialog = False
386
  st.rerun()
387
-
388
  with col_next:
389
  if st.button("Next →", disabled=st.session_state.current_page >= total_pages):
390
  st.session_state.current_page += 1
@@ -395,8 +309,6 @@ def main():
395
  st.session_state.center_lng = df.iloc[new_start_idx]['longitude']
396
  st.session_state.show_review_dialog = False
397
  st.rerun()
398
-
399
- # Show review dialog if active
400
  if st.session_state.show_review_dialog:
401
  render_review_dialog()
402
 
@@ -405,6 +317,4 @@ if __name__ == "__main__":
405
  token = os.environ.get("HF_TOKEN")
406
  if token:
407
  login(token=token)
408
- main()
409
- else:
410
- main()
 
4
  from html import escape
5
  from streamlit_folium import st_folium, folium_static
6
  import math
7
+ from HKUSTBNBVisualiser import HKUSTBNBVisualiser
8
  from huggingface_hub import login
9
+ from HKUSTBNBConstant import (
10
+ SIDEBAR_HEADER,
11
+ SIDEBAR_DIVIDER,
12
+ TRAFFIC_EXPLANATION,
13
+ SEARCH_EXPLANATION,
14
+ REVIEW_CARD_TEMPLATE,
15
+ LISTINGS_COUNT_INFO,
16
+ LISTING_CARD_TEMPLATE,
17
+ PRICE_DISPLAY_WITH_DISCOUNT,
18
+ PRICE_DISPLAY_NORMAL,
19
+ RELEVANCE_INFO_LISTING
20
+ )
21
 
22
 
23
  def load_css(css_file):
 
26
 
27
 
28
  def highlight_search_terms(text, search_query):
 
29
  if not search_query:
30
  return text
31
 
 
34
 
35
  for term in search_terms:
36
  if term.strip():
 
37
  pattern = f'(?i)\\b{term}\\b'
38
  replacement = f'<span class="highlight">{term}</span>'
39
  highlighted_text = re.sub(pattern, replacement, highlighted_text)
 
54
  try:
55
  review_date, reviewer_name, comments = review
56
 
 
57
  highlighted_comments = highlight_search_terms(
58
  str(comments),
59
  st.session_state.search_query
60
  )
61
 
62
+ st.markdown(
63
+ REVIEW_CARD_TEMPLATE.format(
64
+ reviewer_name=escape(str(reviewer_name)),
65
+ review_date=escape(str(review_date)),
66
+ highlighted_comments=highlighted_comments
67
+ ),
68
+ unsafe_allow_html=True
69
+ )
 
 
70
  except Exception as e:
71
  st.error(f"Error displaying review: {str(e)}")
72
  else:
73
  st.info("No reviews available for this listing.")
74
 
 
75
  def main():
76
  st.set_page_config(
77
  layout="wide",
 
79
  initial_sidebar_state="expanded"
80
  )
81
  load_css('style.css')
 
 
82
  if 'center_lat' not in st.session_state:
83
  st.session_state.center_lat = None
84
  if 'center_lng' not in st.session_state:
 
106
  if 'show_search_explanation' not in st.session_state:
107
  st.session_state.show_search_explanation = False
108
  if 'listings_limit' not in st.session_state:
109
+ st.session_state.listings_limit = 10
 
 
110
  if 'visualizer' not in st.session_state:
111
  with st.spinner('Loading HKUST BNB+ ...'):
112
+ st.session_state.visualizer = HKUSTBNBVisualiser()
113
  st.session_state.tokenizer_loaded = True
 
114
  visualizer = st.session_state.visualizer
 
 
115
  if visualizer is None or not hasattr(visualizer, 'neighborhoods'):
116
  st.error("Error initializing the application. Please refresh the page.")
117
  return
 
 
118
  if st.session_state.show_traffic_explanation:
119
  with st.expander("📊 Traffic-Based Discount System", expanded=True):
120
+ st.markdown(TRAFFIC_EXPLANATION)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  if st.button("Close", key="close_traffic_btn"):
122
  st.session_state.show_traffic_explanation = False
123
  st.rerun()
 
124
  if st.session_state.show_search_explanation:
125
  with st.expander("🔍 Smart Search System", expanded=True):
126
+ st.markdown(SEARCH_EXPLANATION)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  if st.button("Close", key="close_search_btn"):
128
  st.session_state.show_search_explanation = False
129
  st.rerun()
 
130
  with st.sidebar:
131
+ st.markdown(SIDEBAR_HEADER, unsafe_allow_html=True)
 
 
 
132
  search_query = st.text_input(
133
  "🔍 Search listings",
134
  value=st.session_state.search_query,
135
  placeholder="Try: 'cozy , quiet '"
136
  )
 
137
  if search_query != st.session_state.search_query:
138
  st.session_state.search_query = search_query
139
  st.session_state.current_page = 1
140
  st.session_state.show_review_dialog = False
141
+ st.markdown(SIDEBAR_DIVIDER, unsafe_allow_html=True)
 
 
142
  neighborhood = st.selectbox(
143
  "Select Neighborhood",
144
  options=visualizer.neighborhoods,
145
  index=visualizer.neighborhoods.index("Kowloon City") if "Kowloon City" in visualizer.neighborhoods else 0
146
  )
 
 
147
  listings_limit = st.selectbox(
148
  "Number of listings to show",
149
  options=[10, 20, 30, 40, 50],
150
+ index=0,
151
  help="Select how many listings to display for this neighborhood"
152
  )
 
 
153
  if listings_limit != st.session_state.listings_limit:
154
  st.session_state.listings_limit = listings_limit
 
155
  st.session_state.current_page = 1
156
  st.session_state.show_review_dialog = False
 
157
  show_traffic = st.checkbox("Show Traffic Cameras", value=True)
158
+ st.markdown(SIDEBAR_DIVIDER, unsafe_allow_html=True)
 
 
 
159
  st.markdown("### 💡 Help & Information")
 
160
  col1, col2 = st.columns(2)
161
  with col1:
162
  if st.button("Green Discount", key="traffic_info_btn"):
163
  st.session_state.show_traffic_explanation = True
164
  st.rerun()
 
165
  with col2:
166
  if st.button("Semantic Search", key="search_info_btn"):
167
  st.session_state.show_search_explanation = True
168
  st.rerun()
 
169
  if st.button("Reset All", key="reset_btn"):
170
  st.session_state.center_lat = None
171
  st.session_state.center_lng = None
172
  st.session_state.selected_id = None
173
  st.session_state.current_page = 1
174
  st.session_state.search_query = ""
175
+ st.session_state.listings_limit = 10
176
  st.session_state.show_review_dialog = False
177
  st.session_state.show_traffic_explanation = False
178
  st.session_state.show_search_explanation = False
179
  st.rerun()
 
 
180
  m, df = visualizer.create_map_and_data(
181
  neighborhood,
182
  show_traffic,
 
188
  st.session_state.items_per_page,
189
  st.session_state.listings_limit
190
  )
 
 
191
  if st.session_state.previous_neighborhood != neighborhood:
192
  st.session_state.current_page = 1
193
  if not df.empty:
 
197
  st.session_state.previous_neighborhood = neighborhood
198
  st.session_state.show_review_dialog = False
199
  st.rerun()
 
200
  if m is None:
201
  st.error("No data available for the selected neighborhood")
202
  return
 
203
  col1, col2 = st.columns([7, 3])
 
204
  with col1:
205
  st.markdown('<div class="map-container">', unsafe_allow_html=True)
206
  st_folium(m, width=None, height=700)
207
  st.markdown('</div>', unsafe_allow_html=True)
 
208
  with col2:
 
209
  st.markdown(
210
+ LISTINGS_COUNT_INFO.format(
211
+ listings_limit=st.session_state.listings_limit,
212
+ neighborhood=neighborhood
213
+ ),
214
+ unsafe_allow_html=True
215
+ )
216
  total_items = len(df)
217
  total_pages = math.ceil(total_items / st.session_state.items_per_page)
218
  st.session_state.current_page = min(max(1, st.session_state.current_page), total_pages)
219
  start_idx = (st.session_state.current_page - 1) * st.session_state.items_per_page
220
  end_idx = min(start_idx + st.session_state.items_per_page, total_items)
 
221
  st.markdown('<div class="scrollable-container">', unsafe_allow_html=True)
 
222
  for idx in range(start_idx, end_idx):
223
  row = df.iloc[idx]
224
  background_color = "#E3F2FD" if st.session_state.selected_id == row['id'] else "white"
 
 
225
  discounted_price = row['price']
226
  discount_tag = ""
 
 
227
  listing_lat = row['latitude']
228
  listing_lng = row['longitude']
 
 
229
  nearest_spot, distance = visualizer.find_nearest_traffic_spot(listing_lat, listing_lng)
 
 
230
  if nearest_spot:
231
  discount_rate = nearest_spot.get_discount_rate()
232
  if discount_rate > 0:
233
  discounted_price = row['price'] * (1 - discount_rate)
234
  discount_percentage = int(discount_rate * 100)
235
  discount_tag = f"""<span class="discount-tag">-{discount_percentage}%</span>"""
 
 
236
  if discount_tag:
237
+ price_display = PRICE_DISPLAY_WITH_DISCOUNT.format(
238
+ original_price=row['price'],
239
+ discounted_price=discounted_price,
240
+ discount_tag=discount_tag
241
+ )
242
  else:
243
+ price_display = PRICE_DISPLAY_NORMAL.format(price=row['price'])
 
244
  relevance_info = ""
245
  if st.session_state.search_query and 'relevance_percentage' in row:
246
+ relevance_info = RELEVANCE_INFO_LISTING.format(relevance_percentage=row['relevance_percentage'])
247
+ st.markdown(
248
+ LISTING_CARD_TEMPLATE.format(
249
+ background_color=background_color,
250
+ listing_name=escape(str(row['name'])),
251
+ price_display=price_display,
252
+ room_type=escape(str(row['room_type'])),
253
+ review_count=row['number_of_reviews'],
254
+ relevance_info=relevance_info
255
+ ),
256
+ unsafe_allow_html=True
257
+ )
 
 
 
258
  col_details, col_reviews = st.columns(2)
 
259
  with col_details:
260
  if st.button("View Details", key=f"btn_{row['id']}"):
261
  st.session_state.selected_id = row['id']
262
  st.session_state.center_lat = row['latitude']
263
  st.session_state.center_lng = row['longitude']
264
  st.rerun()
 
265
  with col_reviews:
266
  if st.button("View Reviews", key=f"review_btn_{row['id']}"):
267
  st.session_state.show_review_dialog = True
 
269
  st.session_state.current_review_listing_name = row['name']
270
  st.session_state.scroll_to_review = True
271
  st.rerun()
 
272
  st.markdown('</div>', unsafe_allow_html=True)
 
 
273
  col_prev, col_select, col_next = st.columns([1, 1, 1])
 
274
  with col_select:
275
  page_options = list(range(1, total_pages + 1))
276
  new_page = st.selectbox(
 
280
  key="page_selector",
281
  label_visibility="collapsed"
282
  )
 
283
  if new_page != st.session_state.current_page:
284
  st.session_state.current_page = new_page
285
  new_start_idx = (new_page - 1) * st.session_state.items_per_page
 
289
  st.session_state.center_lng = df.iloc[new_start_idx]['longitude']
290
  st.session_state.show_review_dialog = False
291
  st.rerun()
 
292
  with col_prev:
293
  if st.button("← Previous", disabled=st.session_state.current_page <= 1):
294
  st.session_state.current_page -= 1
 
299
  st.session_state.center_lng = df.iloc[new_start_idx]['longitude']
300
  st.session_state.show_review_dialog = False
301
  st.rerun()
 
302
  with col_next:
303
  if st.button("Next →", disabled=st.session_state.current_page >= total_pages):
304
  st.session_state.current_page += 1
 
309
  st.session_state.center_lng = df.iloc[new_start_idx]['longitude']
310
  st.session_state.show_review_dialog = False
311
  st.rerun()
 
 
312
  if st.session_state.show_review_dialog:
313
  render_review_dialog()
314
 
 
317
  token = os.environ.get("HF_TOKEN")
318
  if token:
319
  login(token=token)
320
+ main()