Spaces:
Sleeping
Sleeping
Gordon Li
commited on
Commit
·
53d8ba3
1
Parent(s):
98e43b3
retrieve of size chosen
Browse files- AirbnbMapVisualiser.py +58 -11
- app.py +29 -2
AirbnbMapVisualiser.py
CHANGED
@@ -44,7 +44,11 @@ class AirbnbMapVisualiser:
|
|
44 |
try:
|
45 |
self.neighborhoods = self.get_all_neighborhoods()
|
46 |
self.cached_listings = {}
|
47 |
-
|
|
|
|
|
|
|
|
|
48 |
self.cached_embeddings = {} # Cache for listing embeddings
|
49 |
except Exception as e:
|
50 |
print(f"Initialization error: {str(e)}")
|
@@ -52,7 +56,7 @@ class AirbnbMapVisualiser:
|
|
52 |
self.cached_listings = {}
|
53 |
self.cached_embeddings = {}
|
54 |
|
55 |
-
def find_nearest_traffic_spot(self, airbnb_lat, airbnb_lng, max_distance_km=
|
56 |
"""Find the nearest traffic spot within max_distance_km kilometers of an Airbnb listing"""
|
57 |
nearest_spot = None
|
58 |
min_distance = float('inf')
|
@@ -98,9 +102,28 @@ class AirbnbMapVisualiser:
|
|
98 |
finally:
|
99 |
self.pool.release(connection)
|
100 |
|
101 |
-
def get_neighborhood_listings(self, neighborhood):
|
102 |
-
|
103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
|
105 |
connection = self.pool.acquire()
|
106 |
try:
|
@@ -121,11 +144,11 @@ class AirbnbMapVisualiser:
|
|
121 |
m.LATITUDE, m.LONGITUDE, m.ROOM_TYPE, m.PRICE,
|
122 |
m.REVIEWS_PER_MONTH, m.MINIMUM_NIGHTS, m.AVAILABILITY_365
|
123 |
ORDER BY COUNT(r.LISTING_ID) DESC, m.PRICE ASC
|
124 |
-
FETCH FIRST
|
125 |
-
""", neighborhood=neighborhood)
|
126 |
|
127 |
listings = cursor.fetchall()
|
128 |
-
self.cached_listings[neighborhood] = listings
|
129 |
return listings
|
130 |
except Exception as e:
|
131 |
print(f"Database error: {str(e)}")
|
@@ -345,8 +368,10 @@ class AirbnbMapVisualiser:
|
|
345 |
review_similarity = self.compute_similarity(query_embedding, review_embedding)
|
346 |
|
347 |
# Determine which source matched better
|
348 |
-
if title_similarity > 0.2 and review_similarity > 0
|
349 |
return "Strong match in title and reviews"
|
|
|
|
|
350 |
elif title_similarity > 0.2:
|
351 |
return "Strong match in listing title"
|
352 |
elif review_similarity > 0.2:
|
@@ -362,8 +387,30 @@ class AirbnbMapVisualiser:
|
|
362 |
return df.sort_values('relevance_score', ascending=False)
|
363 |
|
364 |
def create_map_and_data(self, neighborhood="Sha Tin", show_traffic=True, center_lat=None, center_lng=None,
|
365 |
-
selected_id=None, search_query=None, current_page=1, items_per_page=3):
|
366 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
367 |
|
368 |
if not listings:
|
369 |
return None, None
|
|
|
44 |
try:
|
45 |
self.neighborhoods = self.get_all_neighborhoods()
|
46 |
self.cached_listings = {}
|
47 |
+
# Initialize a nested dictionary to store listings by neighborhood and limit
|
48 |
+
self.cached_listings = {}
|
49 |
+
# Pre-cache Southern neighborhood with default limit of 10
|
50 |
+
self.cached_listings["Southern"] = {}
|
51 |
+
self.cached_listings["Southern"][10] = self.get_neighborhood_listings("Southern", 10)
|
52 |
self.cached_embeddings = {} # Cache for listing embeddings
|
53 |
except Exception as e:
|
54 |
print(f"Initialization error: {str(e)}")
|
|
|
56 |
self.cached_listings = {}
|
57 |
self.cached_embeddings = {}
|
58 |
|
59 |
+
def find_nearest_traffic_spot(self, airbnb_lat, airbnb_lng, max_distance_km=0.7):
|
60 |
"""Find the nearest traffic spot within max_distance_km kilometers of an Airbnb listing"""
|
61 |
nearest_spot = None
|
62 |
min_distance = float('inf')
|
|
|
102 |
finally:
|
103 |
self.pool.release(connection)
|
104 |
|
105 |
+
def get_neighborhood_listings(self, neighborhood, limit=10):
|
106 |
+
"""
|
107 |
+
Get listings for a neighborhood with a specified limit.
|
108 |
+
|
109 |
+
Args:
|
110 |
+
neighborhood: The neighborhood to get listings for
|
111 |
+
limit: Maximum number of listings to return (10, 20, 30, 40, or 50)
|
112 |
+
|
113 |
+
Returns:
|
114 |
+
List of listings data
|
115 |
+
"""
|
116 |
+
# Ensure limit is one of the allowed values
|
117 |
+
if limit not in [10, 20, 30, 40, 50]:
|
118 |
+
limit = 10 # Default to 10 if invalid limit provided
|
119 |
+
|
120 |
+
# Check if we already have this neighborhood and limit cached
|
121 |
+
if neighborhood in self.cached_listings and limit in self.cached_listings[neighborhood]:
|
122 |
+
return self.cached_listings[neighborhood][limit]
|
123 |
+
|
124 |
+
# Initialize neighborhood in cache if needed
|
125 |
+
if neighborhood not in self.cached_listings:
|
126 |
+
self.cached_listings[neighborhood] = {}
|
127 |
|
128 |
connection = self.pool.acquire()
|
129 |
try:
|
|
|
144 |
m.LATITUDE, m.LONGITUDE, m.ROOM_TYPE, m.PRICE,
|
145 |
m.REVIEWS_PER_MONTH, m.MINIMUM_NIGHTS, m.AVAILABILITY_365
|
146 |
ORDER BY COUNT(r.LISTING_ID) DESC, m.PRICE ASC
|
147 |
+
FETCH FIRST :limit ROWS ONLY
|
148 |
+
""", neighborhood=neighborhood, limit=limit)
|
149 |
|
150 |
listings = cursor.fetchall()
|
151 |
+
self.cached_listings[neighborhood][limit] = listings
|
152 |
return listings
|
153 |
except Exception as e:
|
154 |
print(f"Database error: {str(e)}")
|
|
|
368 |
review_similarity = self.compute_similarity(query_embedding, review_embedding)
|
369 |
|
370 |
# Determine which source matched better
|
371 |
+
if title_similarity > 0.2 and review_similarity > 0:
|
372 |
return "Strong match in title and reviews"
|
373 |
+
elif title_similarity > 0.2 and review_similarity > 0.2:
|
374 |
+
return "Strong match in title and strong match in reviews"
|
375 |
elif title_similarity > 0.2:
|
376 |
return "Strong match in listing title"
|
377 |
elif review_similarity > 0.2:
|
|
|
387 |
return df.sort_values('relevance_score', ascending=False)
|
388 |
|
389 |
def create_map_and_data(self, neighborhood="Sha Tin", show_traffic=True, center_lat=None, center_lng=None,
|
390 |
+
selected_id=None, search_query=None, current_page=1, items_per_page=3, listings_limit=10):
|
391 |
+
"""
|
392 |
+
Create a map and dataframe of listings for a neighborhood
|
393 |
+
|
394 |
+
Args:
|
395 |
+
neighborhood: Neighborhood to get listings for
|
396 |
+
show_traffic: Whether to show traffic spots
|
397 |
+
center_lat: Center latitude for the map
|
398 |
+
center_lng: Center longitude for the map
|
399 |
+
selected_id: ID of the selected listing
|
400 |
+
search_query: Search query for filtering listings
|
401 |
+
current_page: Current page of pagination
|
402 |
+
items_per_page: Number of items per page
|
403 |
+
listings_limit: Maximum number of listings to display (10, 20, 30, 40, or 50)
|
404 |
+
|
405 |
+
Returns:
|
406 |
+
Tuple of (map, dataframe)
|
407 |
+
"""
|
408 |
+
# Ensure listings_limit is valid
|
409 |
+
if listings_limit not in [10, 20, 30, 40, 50]:
|
410 |
+
listings_limit = 10
|
411 |
+
|
412 |
+
# Get the listings with the specified limit
|
413 |
+
listings = self.get_neighborhood_listings(neighborhood, listings_limit)
|
414 |
|
415 |
if not listings:
|
416 |
return None, None
|
app.py
CHANGED
@@ -101,6 +101,8 @@ def main():
|
|
101 |
st.session_state.show_traffic_explanation = False
|
102 |
if 'show_search_explanation' not in st.session_state:
|
103 |
st.session_state.show_search_explanation = False
|
|
|
|
|
104 |
|
105 |
# Initialize visualizer with loading message for tokenizer
|
106 |
if 'visualizer' not in st.session_state:
|
@@ -189,6 +191,22 @@ def main():
|
|
189 |
options=visualizer.neighborhoods,
|
190 |
index=visualizer.neighborhoods.index("Kowloon City") if "Kowloon City" in visualizer.neighborhoods else 0
|
191 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
show_traffic = st.checkbox("Show Traffic Cameras", value=True)
|
193 |
|
194 |
st.markdown('<hr style="margin: 20px 0; border: none; border-top: 1px solid #e0e0e0;">', unsafe_allow_html=True)
|
@@ -213,19 +231,23 @@ def main():
|
|
213 |
st.session_state.selected_id = None
|
214 |
st.session_state.current_page = 1
|
215 |
st.session_state.search_query = ""
|
|
|
216 |
st.session_state.show_review_dialog = False
|
217 |
st.session_state.show_traffic_explanation = False
|
218 |
st.session_state.show_search_explanation = False
|
219 |
st.rerun()
|
220 |
|
221 |
-
# Create map and get data - pass current page information
|
222 |
m, df = visualizer.create_map_and_data(
|
223 |
neighborhood,
|
224 |
show_traffic,
|
225 |
st.session_state.center_lat,
|
226 |
st.session_state.center_lng,
|
227 |
st.session_state.selected_id,
|
228 |
-
st.session_state.search_query
|
|
|
|
|
|
|
229 |
)
|
230 |
|
231 |
# Handle neighborhood change
|
@@ -251,6 +273,11 @@ def main():
|
|
251 |
st.markdown('</div>', unsafe_allow_html=True)
|
252 |
|
253 |
with col2:
|
|
|
|
|
|
|
|
|
|
|
254 |
total_items = len(df)
|
255 |
total_pages = math.ceil(total_items / st.session_state.items_per_page)
|
256 |
st.session_state.current_page = min(max(1, st.session_state.current_page), total_pages)
|
|
|
101 |
st.session_state.show_traffic_explanation = False
|
102 |
if 'show_search_explanation' not in st.session_state:
|
103 |
st.session_state.show_search_explanation = False
|
104 |
+
if 'listings_limit' not in st.session_state:
|
105 |
+
st.session_state.listings_limit = 10 # Default to 10 listings
|
106 |
|
107 |
# Initialize visualizer with loading message for tokenizer
|
108 |
if 'visualizer' not in st.session_state:
|
|
|
191 |
options=visualizer.neighborhoods,
|
192 |
index=visualizer.neighborhoods.index("Kowloon City") if "Kowloon City" in visualizer.neighborhoods else 0
|
193 |
)
|
194 |
+
|
195 |
+
# Add dropdown for selecting number of listings to show
|
196 |
+
listings_limit = st.selectbox(
|
197 |
+
"Number of listings to show",
|
198 |
+
options=[10, 20, 30, 40, 50],
|
199 |
+
index=0, # Default to 10
|
200 |
+
help="Select how many listings to display for this neighborhood"
|
201 |
+
)
|
202 |
+
|
203 |
+
# Update session state if listings_limit has changed
|
204 |
+
if listings_limit != st.session_state.listings_limit:
|
205 |
+
st.session_state.listings_limit = listings_limit
|
206 |
+
# Reset to page 1 when changing the number of listings
|
207 |
+
st.session_state.current_page = 1
|
208 |
+
st.session_state.show_review_dialog = False
|
209 |
+
|
210 |
show_traffic = st.checkbox("Show Traffic Cameras", value=True)
|
211 |
|
212 |
st.markdown('<hr style="margin: 20px 0; border: none; border-top: 1px solid #e0e0e0;">', unsafe_allow_html=True)
|
|
|
231 |
st.session_state.selected_id = None
|
232 |
st.session_state.current_page = 1
|
233 |
st.session_state.search_query = ""
|
234 |
+
st.session_state.listings_limit = 10 # Reset to default
|
235 |
st.session_state.show_review_dialog = False
|
236 |
st.session_state.show_traffic_explanation = False
|
237 |
st.session_state.show_search_explanation = False
|
238 |
st.rerun()
|
239 |
|
240 |
+
# Create map and get data - pass current page information and listings limit
|
241 |
m, df = visualizer.create_map_and_data(
|
242 |
neighborhood,
|
243 |
show_traffic,
|
244 |
st.session_state.center_lat,
|
245 |
st.session_state.center_lng,
|
246 |
st.session_state.selected_id,
|
247 |
+
st.session_state.search_query,
|
248 |
+
st.session_state.current_page,
|
249 |
+
st.session_state.items_per_page,
|
250 |
+
st.session_state.listings_limit
|
251 |
)
|
252 |
|
253 |
# Handle neighborhood change
|
|
|
273 |
st.markdown('</div>', unsafe_allow_html=True)
|
274 |
|
275 |
with col2:
|
276 |
+
# Display info about how many listings are being shown
|
277 |
+
st.markdown(
|
278 |
+
f"<p style='text-align:center; color:#4285f4;'>Showing {st.session_state.listings_limit} listings in {neighborhood}</p>",
|
279 |
+
unsafe_allow_html=True)
|
280 |
+
|
281 |
total_items = len(df)
|
282 |
total_pages = math.ceil(total_items / st.session_state.items_per_page)
|
283 |
st.session_state.current_page = min(max(1, st.session_state.current_page), total_pages)
|