Gordon Li commited on
Commit
53d8ba3
·
1 Parent(s): 98e43b3

retrieve of size chosen

Browse files
Files changed (2) hide show
  1. AirbnbMapVisualiser.py +58 -11
  2. app.py +29 -2
AirbnbMapVisualiser.py CHANGED
@@ -44,7 +44,11 @@ class AirbnbMapVisualiser:
44
  try:
45
  self.neighborhoods = self.get_all_neighborhoods()
46
  self.cached_listings = {}
47
- self.cached_listings["Southern"] = self.get_neighborhood_listings("Southern")
 
 
 
 
48
  self.cached_embeddings = {} # Cache for listing embeddings
49
  except Exception as e:
50
  print(f"Initialization error: {str(e)}")
@@ -52,7 +56,7 @@ class AirbnbMapVisualiser:
52
  self.cached_listings = {}
53
  self.cached_embeddings = {}
54
 
55
- def find_nearest_traffic_spot(self, airbnb_lat, airbnb_lng, max_distance_km=1.0):
56
  """Find the nearest traffic spot within max_distance_km kilometers of an Airbnb listing"""
57
  nearest_spot = None
58
  min_distance = float('inf')
@@ -98,9 +102,28 @@ class AirbnbMapVisualiser:
98
  finally:
99
  self.pool.release(connection)
100
 
101
- def get_neighborhood_listings(self, neighborhood):
102
- if neighborhood in self.cached_listings:
103
- return self.cached_listings[neighborhood]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
  connection = self.pool.acquire()
106
  try:
@@ -121,11 +144,11 @@ class AirbnbMapVisualiser:
121
  m.LATITUDE, m.LONGITUDE, m.ROOM_TYPE, m.PRICE,
122
  m.REVIEWS_PER_MONTH, m.MINIMUM_NIGHTS, m.AVAILABILITY_365
123
  ORDER BY COUNT(r.LISTING_ID) DESC, m.PRICE ASC
124
- FETCH FIRST 100 ROWS ONLY
125
- """, neighborhood=neighborhood)
126
 
127
  listings = cursor.fetchall()
128
- self.cached_listings[neighborhood] = listings
129
  return listings
130
  except Exception as e:
131
  print(f"Database error: {str(e)}")
@@ -345,8 +368,10 @@ class AirbnbMapVisualiser:
345
  review_similarity = self.compute_similarity(query_embedding, review_embedding)
346
 
347
  # Determine which source matched better
348
- if title_similarity > 0.2 and review_similarity > 0.2:
349
  return "Strong match in title and reviews"
 
 
350
  elif title_similarity > 0.2:
351
  return "Strong match in listing title"
352
  elif review_similarity > 0.2:
@@ -362,8 +387,30 @@ class AirbnbMapVisualiser:
362
  return df.sort_values('relevance_score', ascending=False)
363
 
364
  def create_map_and_data(self, neighborhood="Sha Tin", show_traffic=True, center_lat=None, center_lng=None,
365
- selected_id=None, search_query=None, current_page=1, items_per_page=3):
366
- listings = self.get_neighborhood_listings(neighborhood)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
367
 
368
  if not listings:
369
  return None, None
 
44
  try:
45
  self.neighborhoods = self.get_all_neighborhoods()
46
  self.cached_listings = {}
47
+ # Initialize a nested dictionary to store listings by neighborhood and limit
48
+ self.cached_listings = {}
49
+ # Pre-cache Southern neighborhood with default limit of 10
50
+ self.cached_listings["Southern"] = {}
51
+ self.cached_listings["Southern"][10] = self.get_neighborhood_listings("Southern", 10)
52
  self.cached_embeddings = {} # Cache for listing embeddings
53
  except Exception as e:
54
  print(f"Initialization error: {str(e)}")
 
56
  self.cached_listings = {}
57
  self.cached_embeddings = {}
58
 
59
+ def find_nearest_traffic_spot(self, airbnb_lat, airbnb_lng, max_distance_km=0.7):
60
  """Find the nearest traffic spot within max_distance_km kilometers of an Airbnb listing"""
61
  nearest_spot = None
62
  min_distance = float('inf')
 
102
  finally:
103
  self.pool.release(connection)
104
 
105
+ def get_neighborhood_listings(self, neighborhood, limit=10):
106
+ """
107
+ Get listings for a neighborhood with a specified limit.
108
+
109
+ Args:
110
+ neighborhood: The neighborhood to get listings for
111
+ limit: Maximum number of listings to return (10, 20, 30, 40, or 50)
112
+
113
+ Returns:
114
+ List of listings data
115
+ """
116
+ # Ensure limit is one of the allowed values
117
+ if limit not in [10, 20, 30, 40, 50]:
118
+ limit = 10 # Default to 10 if invalid limit provided
119
+
120
+ # Check if we already have this neighborhood and limit cached
121
+ if neighborhood in self.cached_listings and limit in self.cached_listings[neighborhood]:
122
+ return self.cached_listings[neighborhood][limit]
123
+
124
+ # Initialize neighborhood in cache if needed
125
+ if neighborhood not in self.cached_listings:
126
+ self.cached_listings[neighborhood] = {}
127
 
128
  connection = self.pool.acquire()
129
  try:
 
144
  m.LATITUDE, m.LONGITUDE, m.ROOM_TYPE, m.PRICE,
145
  m.REVIEWS_PER_MONTH, m.MINIMUM_NIGHTS, m.AVAILABILITY_365
146
  ORDER BY COUNT(r.LISTING_ID) DESC, m.PRICE ASC
147
+ FETCH FIRST :limit ROWS ONLY
148
+ """, neighborhood=neighborhood, limit=limit)
149
 
150
  listings = cursor.fetchall()
151
+ self.cached_listings[neighborhood][limit] = listings
152
  return listings
153
  except Exception as e:
154
  print(f"Database error: {str(e)}")
 
368
  review_similarity = self.compute_similarity(query_embedding, review_embedding)
369
 
370
  # Determine which source matched better
371
+ if title_similarity > 0.2 and review_similarity > 0:
372
  return "Strong match in title and reviews"
373
+ elif title_similarity > 0.2 and review_similarity > 0.2:
374
+ return "Strong match in title and strong match in reviews"
375
  elif title_similarity > 0.2:
376
  return "Strong match in listing title"
377
  elif review_similarity > 0.2:
 
387
  return df.sort_values('relevance_score', ascending=False)
388
 
389
  def create_map_and_data(self, neighborhood="Sha Tin", show_traffic=True, center_lat=None, center_lng=None,
390
+ selected_id=None, search_query=None, current_page=1, items_per_page=3, listings_limit=10):
391
+ """
392
+ Create a map and dataframe of listings for a neighborhood
393
+
394
+ Args:
395
+ neighborhood: Neighborhood to get listings for
396
+ show_traffic: Whether to show traffic spots
397
+ center_lat: Center latitude for the map
398
+ center_lng: Center longitude for the map
399
+ selected_id: ID of the selected listing
400
+ search_query: Search query for filtering listings
401
+ current_page: Current page of pagination
402
+ items_per_page: Number of items per page
403
+ listings_limit: Maximum number of listings to display (10, 20, 30, 40, or 50)
404
+
405
+ Returns:
406
+ Tuple of (map, dataframe)
407
+ """
408
+ # Ensure listings_limit is valid
409
+ if listings_limit not in [10, 20, 30, 40, 50]:
410
+ listings_limit = 10
411
+
412
+ # Get the listings with the specified limit
413
+ listings = self.get_neighborhood_listings(neighborhood, listings_limit)
414
 
415
  if not listings:
416
  return None, None
app.py CHANGED
@@ -101,6 +101,8 @@ def main():
101
  st.session_state.show_traffic_explanation = False
102
  if 'show_search_explanation' not in st.session_state:
103
  st.session_state.show_search_explanation = False
 
 
104
 
105
  # Initialize visualizer with loading message for tokenizer
106
  if 'visualizer' not in st.session_state:
@@ -189,6 +191,22 @@ def main():
189
  options=visualizer.neighborhoods,
190
  index=visualizer.neighborhoods.index("Kowloon City") if "Kowloon City" in visualizer.neighborhoods else 0
191
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  show_traffic = st.checkbox("Show Traffic Cameras", value=True)
193
 
194
  st.markdown('<hr style="margin: 20px 0; border: none; border-top: 1px solid #e0e0e0;">', unsafe_allow_html=True)
@@ -213,19 +231,23 @@ def main():
213
  st.session_state.selected_id = None
214
  st.session_state.current_page = 1
215
  st.session_state.search_query = ""
 
216
  st.session_state.show_review_dialog = False
217
  st.session_state.show_traffic_explanation = False
218
  st.session_state.show_search_explanation = False
219
  st.rerun()
220
 
221
- # Create map and get data - pass current page information
222
  m, df = visualizer.create_map_and_data(
223
  neighborhood,
224
  show_traffic,
225
  st.session_state.center_lat,
226
  st.session_state.center_lng,
227
  st.session_state.selected_id,
228
- st.session_state.search_query
 
 
 
229
  )
230
 
231
  # Handle neighborhood change
@@ -251,6 +273,11 @@ def main():
251
  st.markdown('</div>', unsafe_allow_html=True)
252
 
253
  with col2:
 
 
 
 
 
254
  total_items = len(df)
255
  total_pages = math.ceil(total_items / st.session_state.items_per_page)
256
  st.session_state.current_page = min(max(1, st.session_state.current_page), total_pages)
 
101
  st.session_state.show_traffic_explanation = False
102
  if 'show_search_explanation' not in st.session_state:
103
  st.session_state.show_search_explanation = False
104
+ if 'listings_limit' not in st.session_state:
105
+ st.session_state.listings_limit = 10 # Default to 10 listings
106
 
107
  # Initialize visualizer with loading message for tokenizer
108
  if 'visualizer' not in st.session_state:
 
191
  options=visualizer.neighborhoods,
192
  index=visualizer.neighborhoods.index("Kowloon City") if "Kowloon City" in visualizer.neighborhoods else 0
193
  )
194
+
195
+ # Add dropdown for selecting number of listings to show
196
+ listings_limit = st.selectbox(
197
+ "Number of listings to show",
198
+ options=[10, 20, 30, 40, 50],
199
+ index=0, # Default to 10
200
+ help="Select how many listings to display for this neighborhood"
201
+ )
202
+
203
+ # Update session state if listings_limit has changed
204
+ if listings_limit != st.session_state.listings_limit:
205
+ st.session_state.listings_limit = listings_limit
206
+ # Reset to page 1 when changing the number of listings
207
+ st.session_state.current_page = 1
208
+ st.session_state.show_review_dialog = False
209
+
210
  show_traffic = st.checkbox("Show Traffic Cameras", value=True)
211
 
212
  st.markdown('<hr style="margin: 20px 0; border: none; border-top: 1px solid #e0e0e0;">', unsafe_allow_html=True)
 
231
  st.session_state.selected_id = None
232
  st.session_state.current_page = 1
233
  st.session_state.search_query = ""
234
+ st.session_state.listings_limit = 10 # Reset to default
235
  st.session_state.show_review_dialog = False
236
  st.session_state.show_traffic_explanation = False
237
  st.session_state.show_search_explanation = False
238
  st.rerun()
239
 
240
+ # Create map and get data - pass current page information and listings limit
241
  m, df = visualizer.create_map_and_data(
242
  neighborhood,
243
  show_traffic,
244
  st.session_state.center_lat,
245
  st.session_state.center_lng,
246
  st.session_state.selected_id,
247
+ st.session_state.search_query,
248
+ st.session_state.current_page,
249
+ st.session_state.items_per_page,
250
+ st.session_state.listings_limit
251
  )
252
 
253
  # Handle neighborhood change
 
273
  st.markdown('</div>', unsafe_allow_html=True)
274
 
275
  with col2:
276
+ # Display info about how many listings are being shown
277
+ st.markdown(
278
+ f"<p style='text-align:center; color:#4285f4;'>Showing {st.session_state.listings_limit} listings in {neighborhood}</p>",
279
+ unsafe_allow_html=True)
280
+
281
  total_items = len(df)
282
  total_pages = math.ceil(total_items / st.session_state.items_per_page)
283
  st.session_state.current_page = min(max(1, st.session_state.current_page), total_pages)