Gordon Li commited on
Commit
ecf5538
·
1 Parent(s): 9102710
app.py CHANGED
@@ -1,3 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import re
3
  import streamlit as st
@@ -22,11 +46,24 @@ from constant.hkust_bnb_constant import (
22
  )
23
 
24
 
 
 
 
 
 
25
  def load_css(css_file):
26
  with open(css_file) as f:
27
  st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
28
 
29
 
 
 
 
 
 
 
 
 
30
  def highlight_search_terms(text, search_query):
31
  if not search_query:
32
  return text
@@ -43,10 +80,17 @@ def highlight_search_terms(text, search_query):
43
  return highlighted_text
44
 
45
 
 
 
 
46
  def render_lottie_loading_animation():
47
  components.html(LOTTIE_HTML, height=750)
48
 
49
 
 
 
 
 
50
  def render_review_dialog():
51
  with st.container():
52
  col_title = st.columns([5, 1])
@@ -78,6 +122,10 @@ def render_review_dialog():
78
  st.info("No reviews available for this listing.")
79
 
80
 
 
 
 
 
81
  def initialize_session_state():
82
  default_states = {
83
  'center_lat': None,
@@ -108,6 +156,11 @@ def initialize_session_state():
108
  st.session_state.loading_complete = True
109
 
110
 
 
 
 
 
 
111
  def main():
112
  st.set_page_config(
113
  layout="wide",
@@ -352,6 +405,10 @@ def main():
352
  render_review_dialog()
353
 
354
 
 
 
 
 
355
  if __name__ == "__main__":
356
  token = os.environ.get("HF_TOKEN")
357
  if token:
 
1
+ """
2
+ app.py
3
+
4
+ This application provides a user interface for HKUST students to browse, search,
5
+ and find accommodations in different neighborhoods of Hong Kong. It features an interactive map
6
+ visualization, listing cards with pricing information, traffic-based discounts, and smart search
7
+ functionality to match user preferences with available properties.
8
+
9
+ Key features:
10
+ - Interactive map displaying BNB listings with location markers
11
+ - Neighborhood-based filtering of available accommodations
12
+ - Smart search system that highlights matching terms in descriptions and reviews
13
+ - Traffic-based discount system promoting eco-friendly housing options
14
+ - Detailed view of property reviews with highlighted search terms
15
+ - Responsive pagination for browsing through large sets of listings
16
+ - Loading animations and informative UI elements for better user experience
17
+
18
+ The application uses Folium for map visualization, Streamlit for the web interface
19
+
20
+ Author: Gordon Li (20317033)
21
+ Company : HKUST Sustainability
22
+ Date: March 2025
23
+ """
24
+
25
  import os
26
  import re
27
  import streamlit as st
 
46
  )
47
 
48
 
49
+ """
50
+ Loads CSS styles from a file and applies them to the Streamlit application.
51
+ Parameters:
52
+ css_file: Path to the CSS file to be loaded
53
+ """
54
  def load_css(css_file):
55
  with open(css_file) as f:
56
  st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
57
 
58
 
59
+ """
60
+ Highlights search terms within text by wrapping them in a span with highlight class.
61
+ Parameters:
62
+ text: The original text to process
63
+ search_query: The search terms to highlight within the text
64
+ Returns:
65
+ Text with highlighted search terms
66
+ """
67
  def highlight_search_terms(text, search_query):
68
  if not search_query:
69
  return text
 
80
  return highlighted_text
81
 
82
 
83
+ """
84
+ Renders a loading animation using Lottie animation in HTML format.
85
+ """
86
  def render_lottie_loading_animation():
87
  components.html(LOTTIE_HTML, height=750)
88
 
89
 
90
+ """
91
+ Renders a dialog containing reviews for the currently selected listing.
92
+ Displays reviewer name, review date, and comments with search terms highlighted.
93
+ """
94
  def render_review_dialog():
95
  with st.container():
96
  col_title = st.columns([5, 1])
 
122
  st.info("No reviews available for this listing.")
123
 
124
 
125
+ """
126
+ Initializes the session state with default values for various application parameters.
127
+ Sets up the visualizer and loads required resources for the application.
128
+ """
129
  def initialize_session_state():
130
  default_states = {
131
  'center_lat': None,
 
156
  st.session_state.loading_complete = True
157
 
158
 
159
+ """
160
+ Main function that sets up the Streamlit application interface.
161
+ Handles page configuration, sidebar setup, map rendering, listing display,
162
+ pagination, and user interactions with the application elements.
163
+ """
164
  def main():
165
  st.set_page_config(
166
  layout="wide",
 
405
  render_review_dialog()
406
 
407
 
408
+ """
409
+ Main entry point for the application. Authenticates with Hugging Face if a token is available,
410
+ then calls the main function to start the application.
411
+ """
412
  if __name__ == "__main__":
413
  token = os.environ.get("HF_TOKEN")
414
  if token:
cronjob/abstract_traffic_image_analyzer.py CHANGED
@@ -1,3 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import requests
2
  import oracledb
3
  from PIL import Image, ImageDraw, ImageFont
@@ -10,7 +25,19 @@ import os
10
  import random
11
 
12
 
 
13
  class AbstractTrafficImageAnalyzer:
 
 
 
 
 
 
 
 
 
 
 
14
  def __init__(self):
15
  self.connection_params = {
16
  'user': 'slliac',
@@ -33,10 +60,23 @@ class AbstractTrafficImageAnalyzer:
33
 
34
  self.setup_logging()
35
 
 
 
 
 
 
 
 
 
36
  def signal_handler(self, signum, frame):
37
  print("\nShutdown signal received. Completing current task...")
38
  self.running = False
39
 
 
 
 
 
 
40
  def setup_logging(self):
41
  logging.basicConfig(
42
  level=logging.INFO,
@@ -47,6 +87,13 @@ class AbstractTrafficImageAnalyzer:
47
  ]
48
  )
49
 
 
 
 
 
 
 
 
50
  def get_camera_locations(self):
51
  try:
52
  with oracledb.connect(**self.connection_params) as conn:
@@ -57,6 +104,16 @@ class AbstractTrafficImageAnalyzer:
57
  logging.error(f"Error fetching camera locations: {str(e)}")
58
  raise
59
 
 
 
 
 
 
 
 
 
 
 
60
  def download_image(self, url):
61
  try:
62
  response = requests.get(url)
@@ -66,6 +123,17 @@ class AbstractTrafficImageAnalyzer:
66
  logging.error(f"Error downloading image from {url}: {str(e)}")
67
  raise
68
 
 
 
 
 
 
 
 
 
 
 
 
69
  def detect_vehicles(self, image, confidence_threshold=0.7):
70
  try:
71
  if image.mode == 'RGBA':
@@ -110,6 +178,17 @@ class AbstractTrafficImageAnalyzer:
110
  logging.error(f"Error detecting vehicles: {str(e)}")
111
  raise
112
 
 
 
 
 
 
 
 
 
 
 
 
113
  def draw_detections(self, image, detections):
114
  try:
115
  draw_image = image.copy()
@@ -162,6 +241,17 @@ class AbstractTrafficImageAnalyzer:
162
  logging.error(f"Error drawing detections: {str(e)}")
163
  raise
164
 
 
 
 
 
 
 
 
 
 
 
 
165
  def process_traffic_cameras(self):
166
  try:
167
  current_timestamp = datetime.now()
@@ -248,15 +338,46 @@ class AbstractTrafficImageAnalyzer:
248
  logging.error(f"Error in process_traffic_cameras: {str(e)}")
249
  raise
250
 
 
 
 
 
 
 
 
 
 
251
  def update_huggingface_dataset(self, batch_data, timestamp_str):
252
  raise NotImplementedError("Subclasses must implement update_huggingface_dataset method")
253
 
 
 
 
 
 
 
 
 
 
254
  def create_coco_annotation_files(self, dataset_dict, timestamp_str):
255
  raise NotImplementedError("Subclasses must implement create_coco_annotation_files method")
256
 
 
 
 
 
 
 
 
 
 
257
  def update_readme(self, dataset_dict, timestamp_str):
258
  raise NotImplementedError("Subclasses must implement update_readme method")
259
 
 
 
 
 
260
  def run(self):
261
  try:
262
  self.process_traffic_cameras()
 
1
+ """
2
+ Traffic Image Analysis Module for HKUST BNB+ Platform
3
+
4
+ This module provides functionality for analyzing traffic camera images to detect and count vehicles.
5
+ It downloads images from traffic cameras, processes them using computer vision models, and records
6
+ traffic data that is used for traffic-based discount calculations in the BNB+ platform.
7
+
8
+ The analyzer connects to a database to retrieve camera locations, downloads and processes images,
9
+ detects vehicles, and stores the results for visualization and analysis.
10
+
11
+ Author: Gordon Li (20317033)
12
+ Date: March 2025
13
+ """
14
+
15
+
16
  import requests
17
  import oracledb
18
  from PIL import Image, ImageDraw, ImageFont
 
25
  import random
26
 
27
 
28
+
29
  class AbstractTrafficImageAnalyzer:
30
+ """
31
+ Initializes the traffic image analyzer with database connection, signal handlers, and directories.
32
+
33
+ Sets up:
34
+ - Database connection parameters
35
+ - Signal handlers for graceful shutdown
36
+ - Vehicle class identifiers for detection
37
+ - Directory structure for storing downloaded images
38
+ - Logging configuration
39
+ """
40
+
41
  def __init__(self):
42
  self.connection_params = {
43
  'user': 'slliac',
 
60
 
61
  self.setup_logging()
62
 
63
+ """
64
+ Handles termination signals to ensure graceful shutdown.
65
+
66
+ Parameters:
67
+ signum: Signal number
68
+ frame: Current stack frame
69
+ """
70
+
71
  def signal_handler(self, signum, frame):
72
  print("\nShutdown signal received. Completing current task...")
73
  self.running = False
74
 
75
+ """
76
+ Sets up logging configuration for the analyzer.
77
+ Creates log files with timestamps and configures console output.
78
+ """
79
+
80
  def setup_logging(self):
81
  logging.basicConfig(
82
  level=logging.INFO,
 
87
  ]
88
  )
89
 
90
+ """
91
+ Retrieves traffic camera locations and URLs from the database.
92
+
93
+ Returns:
94
+ List of tuples containing camera location key and URL
95
+ """
96
+
97
  def get_camera_locations(self):
98
  try:
99
  with oracledb.connect(**self.connection_params) as conn:
 
104
  logging.error(f"Error fetching camera locations: {str(e)}")
105
  raise
106
 
107
+ """
108
+ Downloads an image from a given URL.
109
+
110
+ Parameters:
111
+ url: URL of the traffic camera image
112
+
113
+ Returns:
114
+ PIL Image object
115
+ """
116
+
117
  def download_image(self, url):
118
  try:
119
  response = requests.get(url)
 
123
  logging.error(f"Error downloading image from {url}: {str(e)}")
124
  raise
125
 
126
+ """
127
+ Detects vehicles in an image using a computer vision model.
128
+
129
+ Parameters:
130
+ image: PIL Image object to analyze
131
+ confidence_threshold: Minimum confidence score for detections (default: 0.7)
132
+
133
+ Returns:
134
+ List of vehicle detection dictionaries with bounding boxes and scores
135
+ """
136
+
137
  def detect_vehicles(self, image, confidence_threshold=0.7):
138
  try:
139
  if image.mode == 'RGBA':
 
178
  logging.error(f"Error detecting vehicles: {str(e)}")
179
  raise
180
 
181
+ """
182
+ Draws vehicle detection bounding boxes and labels on the image.
183
+
184
+ Parameters:
185
+ image: Original PIL Image
186
+ detections: List of vehicle detection dictionaries
187
+
188
+ Returns:
189
+ New PIL Image with bounding boxes and labels drawn
190
+ """
191
+
192
  def draw_detections(self, image, detections):
193
  try:
194
  draw_image = image.copy()
 
241
  logging.error(f"Error drawing detections: {str(e)}")
242
  raise
243
 
244
+ """
245
+ Processes all traffic cameras, detects vehicles, and prepares data for storage.
246
+
247
+ This method:
248
+ 1. Gets all camera locations
249
+ 2. Downloads images from each camera
250
+ 3. Detects vehicles in each image
251
+ 4. Processes images to visualize detections
252
+ 5. Prepares data for storage
253
+ """
254
+
255
  def process_traffic_cameras(self):
256
  try:
257
  current_timestamp = datetime.now()
 
338
  logging.error(f"Error in process_traffic_cameras: {str(e)}")
339
  raise
340
 
341
+ """
342
+ Updates the HuggingFace dataset with new traffic data.
343
+ This method must be implemented by subclasses.
344
+
345
+ Parameters:
346
+ batch_data: Dictionary containing the batch data to add
347
+ timestamp_str: Timestamp string for the current batch
348
+ """
349
+
350
  def update_huggingface_dataset(self, batch_data, timestamp_str):
351
  raise NotImplementedError("Subclasses must implement update_huggingface_dataset method")
352
 
353
+ """
354
+ Creates COCO annotation files for the dataset.
355
+ This method must be implemented by subclasses.
356
+
357
+ Parameters:
358
+ dataset_dict: Dictionary containing the dataset
359
+ timestamp_str: Timestamp string for the current batch
360
+ """
361
+
362
  def create_coco_annotation_files(self, dataset_dict, timestamp_str):
363
  raise NotImplementedError("Subclasses must implement create_coco_annotation_files method")
364
 
365
+ """
366
+ Updates the README file for the dataset.
367
+ This method must be implemented by subclasses.
368
+
369
+ Parameters:
370
+ dataset_dict: Dictionary containing the dataset
371
+ timestamp_str: Timestamp string for the current batch
372
+ """
373
+
374
  def update_readme(self, dataset_dict, timestamp_str):
375
  raise NotImplementedError("Subclasses must implement update_readme method")
376
 
377
+ """
378
+ Runs the traffic image analyzer, processing all cameras and updating the dataset.
379
+ """
380
+
381
  def run(self):
382
  try:
383
  self.process_traffic_cameras()
cronjob/application_traffic_image_analyzer.py CHANGED
@@ -1,3 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from transformers import DetrImageProcessor, DetrForObjectDetection
2
  from datasets import Dataset, Features, Value, load_dataset, DatasetDict, concatenate_datasets
3
  from PIL import Image
@@ -10,6 +24,13 @@ import logging
10
 
11
 
12
  class ApplicationTrafficImageAnalyzer(AbstractTrafficImageAnalyzer):
 
 
 
 
 
 
 
13
  def __init__(self):
14
  super().__init__()
15
  self.processor = DetrImageProcessor.from_pretrained("slliac/detr-group37-liaujianjie-resnet-50",
@@ -19,6 +40,14 @@ class ApplicationTrafficImageAnalyzer(AbstractTrafficImageAnalyzer):
19
  self.application_dir = os.path.join(self.dataset_dir, "application")
20
  os.makedirs(self.application_dir, exist_ok=True)
21
 
 
 
 
 
 
 
 
 
22
  def update_huggingface_dataset(self, batch_data, timestamp_str):
23
  try:
24
  features = Features({
@@ -90,6 +119,14 @@ class ApplicationTrafficImageAnalyzer(AbstractTrafficImageAnalyzer):
90
  logging.error(f"Error in update_huggingface_dataset: {str(e)}")
91
  raise
92
 
 
 
 
 
 
 
 
 
93
  def create_coco_annotation_files(self, dataset_dict, timestamp_str):
94
  try:
95
  categories = [
@@ -206,6 +243,14 @@ class ApplicationTrafficImageAnalyzer(AbstractTrafficImageAnalyzer):
206
  except Exception as e:
207
  logging.error(f"Error creating COCO annotation files: {str(e)}")
208
 
 
 
 
 
 
 
 
 
209
  def main():
210
  analyzer = ApplicationTrafficImageAnalyzer()
211
  try:
@@ -234,5 +279,8 @@ def main():
234
  print("\nProgram terminated")
235
 
236
 
 
 
 
237
  if __name__ == "__main__":
238
  main()
 
1
+ """
2
+ Application Traffic Image Analyzer Module
3
+
4
+ This module extends the AbstractTrafficImageAnalyzer to provide specific implementation for
5
+ application-specific traffic analysis. It handles the processing of traffic camera images,
6
+ vehicle detection using the DETR model, and updating a HuggingFace dataset with the results.
7
+
8
+ The analyzer is used in the HKUST BNB+ platform to collect and analyze traffic data for
9
+ determining eco-friendly discounts based on traffic conditions.
10
+
11
+ Author: Gordon Li (20317033)
12
+ Date: March 2025
13
+ """
14
+
15
  from transformers import DetrImageProcessor, DetrForObjectDetection
16
  from datasets import Dataset, Features, Value, load_dataset, DatasetDict, concatenate_datasets
17
  from PIL import Image
 
24
 
25
 
26
  class ApplicationTrafficImageAnalyzer(AbstractTrafficImageAnalyzer):
27
+ """
28
+ Initializes the application traffic analyzer with the DETR model and processor.
29
+
30
+ Sets up:
31
+ - DETR image processor and model for vehicle detection
32
+ - Application-specific directory for storing results
33
+ """
34
  def __init__(self):
35
  super().__init__()
36
  self.processor = DetrImageProcessor.from_pretrained("slliac/detr-group37-liaujianjie-resnet-50",
 
40
  self.application_dir = os.path.join(self.dataset_dir, "application")
41
  os.makedirs(self.application_dir, exist_ok=True)
42
 
43
+ """
44
+ Updates the HuggingFace dataset with new traffic data.
45
+
46
+ Parameters:
47
+ batch_data: Dictionary containing batch data including capture time, location, images, and vehicle counts
48
+ timestamp_str: Timestamp string for the current batch
49
+ """
50
+
51
  def update_huggingface_dataset(self, batch_data, timestamp_str):
52
  try:
53
  features = Features({
 
119
  logging.error(f"Error in update_huggingface_dataset: {str(e)}")
120
  raise
121
 
122
+ """
123
+ Creates COCO annotation files for the dataset, which are standard format for object detection.
124
+
125
+ Parameters:
126
+ dataset_dict: Dictionary containing the dataset with traffic observations
127
+ timestamp_str: Timestamp string for the current batch
128
+ """
129
+
130
  def create_coco_annotation_files(self, dataset_dict, timestamp_str):
131
  try:
132
  categories = [
 
243
  except Exception as e:
244
  logging.error(f"Error creating COCO annotation files: {str(e)}")
245
 
246
+
247
+ """
248
+ Main function to execute the traffic image analysis process.
249
+ Initializes the analyzer, loads existing data if available, runs the analysis,
250
+ and displays dataset information before and after the process.
251
+ """
252
+
253
+
254
  def main():
255
  analyzer = ApplicationTrafficImageAnalyzer()
256
  try:
 
279
  print("\nProgram terminated")
280
 
281
 
282
+ """
283
+ Entry point for the script.
284
+ """
285
  if __name__ == "__main__":
286
  main()
cronjob/train_detr_traffic_image_analyzer.py CHANGED
@@ -1,3 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from transformers import DetrImageProcessor, DetrForObjectDetection
2
  from datasets import Dataset, Features, Value, load_dataset, concatenate_datasets, DatasetDict
3
  from PIL import Image
@@ -10,6 +26,15 @@ import logging
10
 
11
 
12
  class TrainDETRTrafficImageAnalyzer(AbstractTrafficImageAnalyzer):
 
 
 
 
 
 
 
 
 
13
  def __init__(self):
14
  super().__init__()
15
 
@@ -19,6 +44,14 @@ class TrainDETRTrafficImageAnalyzer(AbstractTrafficImageAnalyzer):
19
  self.fb_detr_dir = os.path.join(self.dataset_dir, "fb_detr_res_50")
20
  os.makedirs(self.fb_detr_dir, exist_ok=True)
21
 
 
 
 
 
 
 
 
 
22
  def update_huggingface_dataset(self, batch_data, timestamp_str):
23
  try:
24
  features = Features({
@@ -77,6 +110,14 @@ class TrainDETRTrafficImageAnalyzer(AbstractTrafficImageAnalyzer):
77
  logging.error(f"Error updating Hugging Face dataset: {str(e)}")
78
  raise
79
 
 
 
 
 
 
 
 
 
80
  def create_coco_annotation_files(self, dataset_dict, timestamp_str):
81
  try:
82
  categories = [
@@ -189,6 +230,13 @@ class TrainDETRTrafficImageAnalyzer(AbstractTrafficImageAnalyzer):
189
  logging.error(f"Error creating COCO annotation files: {str(e)}")
190
 
191
 
 
 
 
 
 
 
 
192
  def main():
193
  analyzer = TrainDETRTrafficImageAnalyzer()
194
  try:
@@ -216,5 +264,8 @@ def main():
216
  print("\nProgram terminated")
217
 
218
 
 
 
 
219
  if __name__ == "__main__":
220
  main()
 
1
+ """
2
+ Traffic Image Analyzer for DETR Model Training
3
+
4
+ This module extends the AbstractTrafficImageAnalyzer to provide implementation for training
5
+ data collection for the DETR object detection model. It processes traffic camera images,
6
+ detects vehicles using the pretrained Facebook DETR ResNet-50 model, and organizes the data
7
+ for model training purposes.
8
+
9
+ The data collected by this analyzer is used to train custom DETR models that improve vehicle
10
+ detection accuracy, which ultimately enhances the traffic analysis component of the HKUST BNB+
11
+ platform's eco-friendly discount system.
12
+
13
+ Author: Gordon Li (20317033)
14
+ Date: March 2025
15
+ """
16
+
17
  from transformers import DetrImageProcessor, DetrForObjectDetection
18
  from datasets import Dataset, Features, Value, load_dataset, concatenate_datasets, DatasetDict
19
  from PIL import Image
 
26
 
27
 
28
  class TrainDETRTrafficImageAnalyzer(AbstractTrafficImageAnalyzer):
29
+
30
+
31
+ """
32
+ Initializes the DETR training data collector with the Facebook pretrained model.
33
+
34
+ Sets up:
35
+ - Facebook DETR ResNet-50 image processor and model
36
+ - Directory structure for storing DETR training data
37
+ """
38
  def __init__(self):
39
  super().__init__()
40
 
 
44
  self.fb_detr_dir = os.path.join(self.dataset_dir, "fb_detr_res_50")
45
  os.makedirs(self.fb_detr_dir, exist_ok=True)
46
 
47
+ """
48
+ Updates the HuggingFace dataset with new traffic data for DETR model training.
49
+
50
+ Parameters:
51
+ batch_data: Dictionary containing traffic image data and annotations
52
+ timestamp_str: Timestamp string for the current batch
53
+ """
54
+
55
  def update_huggingface_dataset(self, batch_data, timestamp_str):
56
  try:
57
  features = Features({
 
110
  logging.error(f"Error updating Hugging Face dataset: {str(e)}")
111
  raise
112
 
113
+ """
114
+ Creates COCO annotation files for the DETR training dataset.
115
+
116
+ Parameters:
117
+ dataset_dict: Dictionary containing the dataset splits
118
+ timestamp_str: Timestamp string for the current batch
119
+ """
120
+
121
  def create_coco_annotation_files(self, dataset_dict, timestamp_str):
122
  try:
123
  categories = [
 
230
  logging.error(f"Error creating COCO annotation files: {str(e)}")
231
 
232
 
233
+ """
234
+ Main function to execute the DETR training data collection process.
235
+ Initializes the analyzer, loads existing data if available, runs the analysis,
236
+ and displays dataset information before and after the process.
237
+ """
238
+
239
+
240
  def main():
241
  analyzer = TrainDETRTrafficImageAnalyzer()
242
  try:
 
264
  print("\nProgram terminated")
265
 
266
 
267
+ """
268
+ Entry point for the script.
269
+ """
270
  if __name__ == "__main__":
271
  main()
visualiser/hkust_bnb_visualiser.py CHANGED
@@ -1,3 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import oracledb
2
  import pandas as pd
3
  import folium
@@ -19,8 +35,18 @@ from constant.hkust_bnb_constant import (
19
  MAP_SCRIPT
20
  )
21
 
22
-
23
  class HKUSTBNBVisualiser:
 
 
 
 
 
 
 
 
 
 
 
24
  def __init__(self):
25
  self.connection_params = {
26
  'user': 'slliac',
@@ -55,6 +81,18 @@ class HKUSTBNBVisualiser:
55
  self.cached_listings = {}
56
  self.cached_embeddings = {}
57
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  def find_nearest_traffic_spot(self, airbnb_lat, airbnb_lng, max_distance_km=0.7):
59
  nearest_spot = None
60
  min_distance = float('inf')
@@ -73,6 +111,13 @@ class HKUSTBNBVisualiser:
73
  else:
74
  return None, None
75
 
 
 
 
 
 
 
 
76
  def get_all_neighborhoods(self):
77
  connection = self.pool.acquire()
78
  try:
@@ -88,6 +133,17 @@ class HKUSTBNBVisualiser:
88
  finally:
89
  self.pool.release(connection)
90
 
 
 
 
 
 
 
 
 
 
 
 
91
  def get_neighborhood_listings(self, neighborhood, limit=10):
92
  if limit not in [10, 20, 30, 40, 50]:
93
  limit = 10
@@ -118,6 +174,16 @@ class HKUSTBNBVisualiser:
118
  finally:
119
  self.pool.release(connection)
120
 
 
 
 
 
 
 
 
 
 
 
121
  def get_listing_reviews(self, listing_id):
122
  connection = self.pool.acquire()
123
  try:
@@ -145,6 +211,16 @@ class HKUSTBNBVisualiser:
145
  finally:
146
  self.pool.release(connection)
147
 
 
 
 
 
 
 
 
 
 
 
148
  def get_listing_reviews_for_search(self, listing_id):
149
  connection = self.pool.acquire()
150
  try:
@@ -170,6 +246,17 @@ class HKUSTBNBVisualiser:
170
  finally:
171
  self.pool.release(connection)
172
 
 
 
 
 
 
 
 
 
 
 
 
173
  def compute_similarity(self, query_embedding, target_embedding):
174
  if query_embedding is None or target_embedding is None:
175
  return 0.0
@@ -180,6 +267,17 @@ class HKUSTBNBVisualiser:
180
  print(f"Error computing similarity: {str(e)}")
181
  return 0.0
182
 
 
 
 
 
 
 
 
 
 
 
 
183
  def compute_search_scores(self, df, search_query):
184
  if not search_query or self.model is None:
185
  return [0.0] * len(df)
@@ -219,6 +317,17 @@ class HKUSTBNBVisualiser:
219
  print(f"Error in search scoring: {str(e)}")
220
  return [0.0] * len(df)
221
 
 
 
 
 
 
 
 
 
 
 
 
222
  def sort_by_relevance(self, df, search_query):
223
  if not search_query:
224
  return df
@@ -227,6 +336,24 @@ class HKUSTBNBVisualiser:
227
  df['relevance_percentage'] = df['relevance_score'] * 100
228
  return df.sort_values('relevance_score', ascending=False)
229
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  def create_map_and_data(self, neighborhood="Sha Tin", show_traffic=True, center_lat=None, center_lng=None,
231
  selected_id=None, search_query=None, current_page=1, items_per_page=3, listings_limit=10):
232
  if listings_limit not in [10, 20, 30, 40, 50]:
 
1
+ """
2
+ hkust_bnb_visualiser.py
3
+
4
+ This module provides the main visualization for the HKUST BNB+ platform.
5
+ It handles database connections, data retrieval, search relevance calculation, and map visualization
6
+ for BNB listings across different neighborhoods in Hong Kong. The class integrates with traffic data
7
+ to provide eco-friendly discount calculations based on traffic conditions.
8
+
9
+ Key capabilities:
10
+ - Semantic search functionality using sentence transformers
11
+ - Traffic spot integration for eco-friendly discount calculations
12
+
13
+ Author: Gordon Li (20317033)
14
+ Date: March 2025
15
+ """
16
+
17
  import oracledb
18
  import pandas as pd
19
  import folium
 
35
  MAP_SCRIPT
36
  )
37
 
 
38
  class HKUSTBNBVisualiser:
39
+ """
40
+ Main class for BNB data visualization and management.
41
+ Handles database connections, data retrieval, and rendering of interactive maps.
42
+ """
43
+
44
+ """
45
+ Initializes the BNB visualizer with database connection, traffic spot manager, and NLP model.
46
+ Sets up connection pool, loads traffic data, initializes sentence transformer model,
47
+ and prepares neighborhood data with caching structures.
48
+ """
49
+
50
  def __init__(self):
51
  self.connection_params = {
52
  'user': 'slliac',
 
81
  self.cached_listings = {}
82
  self.cached_embeddings = {}
83
 
84
+ """
85
+ Finds the nearest traffic spot to a given BNB listing location.
86
+
87
+ Parameters:
88
+ airbnb_lat: The latitude of the BNB listing
89
+ airbnb_lng: The longitude of the BNB listing
90
+ max_distance_km: Maximum distance in kilometers to consider a traffic spot (default: 0.7)
91
+
92
+ Returns:
93
+ Tuple containing (nearest_traffic_spot, distance_in_km) or (None, None) if no spot is found
94
+ """
95
+
96
  def find_nearest_traffic_spot(self, airbnb_lat, airbnb_lng, max_distance_km=0.7):
97
  nearest_spot = None
98
  min_distance = float('inf')
 
111
  else:
112
  return None, None
113
 
114
+ """
115
+ Retrieves all available neighborhoods from the database.
116
+
117
+ Returns:
118
+ List of neighborhood names as strings
119
+ """
120
+
121
  def get_all_neighborhoods(self):
122
  connection = self.pool.acquire()
123
  try:
 
133
  finally:
134
  self.pool.release(connection)
135
 
136
+ """
137
+ Retrieves BNB listings for a specific neighborhood with caching.
138
+
139
+ Parameters:
140
+ neighborhood: The neighborhood name to retrieve listings for
141
+ limit: Maximum number of listings to retrieve (default: 10)
142
+
143
+ Returns:
144
+ List of listing data rows from the database
145
+ """
146
+
147
  def get_neighborhood_listings(self, neighborhood, limit=10):
148
  if limit not in [10, 20, 30, 40, 50]:
149
  limit = 10
 
174
  finally:
175
  self.pool.release(connection)
176
 
177
+ """
178
+ Retrieves reviews for a specific listing ID.
179
+
180
+ Parameters:
181
+ listing_id: The ID of the listing to get reviews for
182
+
183
+ Returns:
184
+ List of tuples containing (review_date, reviewer_name, comments)
185
+ """
186
+
187
  def get_listing_reviews(self, listing_id):
188
  connection = self.pool.acquire()
189
  try:
 
211
  finally:
212
  self.pool.release(connection)
213
 
214
+ """
215
+ Retrieves review content for search functionality.
216
+
217
+ Parameters:
218
+ listing_id: The ID of the listing to get reviews for
219
+
220
+ Returns:
221
+ List of review comment strings for semantic search
222
+ """
223
+
224
  def get_listing_reviews_for_search(self, listing_id):
225
  connection = self.pool.acquire()
226
  try:
 
246
  finally:
247
  self.pool.release(connection)
248
 
249
+ """
250
+ Computes cosine similarity between two embeddings.
251
+
252
+ Parameters:
253
+ query_embedding: Embedding tensor for the search query
254
+ target_embedding: Embedding tensor for the target text
255
+
256
+ Returns:
257
+ Float value representing similarity (0.0-1.0)
258
+ """
259
+
260
  def compute_similarity(self, query_embedding, target_embedding):
261
  if query_embedding is None or target_embedding is None:
262
  return 0.0
 
267
  print(f"Error computing similarity: {str(e)}")
268
  return 0.0
269
 
270
+ """
271
+ Computes relevance scores for listings based on search query.
272
+
273
+ Parameters:
274
+ df: DataFrame containing listing data
275
+ search_query: User's search query string
276
+
277
+ Returns:
278
+ List of relevance scores for each listing in the DataFrame
279
+ """
280
+
281
  def compute_search_scores(self, df, search_query):
282
  if not search_query or self.model is None:
283
  return [0.0] * len(df)
 
317
  print(f"Error in search scoring: {str(e)}")
318
  return [0.0] * len(df)
319
 
320
+ """
321
+ Sorts a DataFrame of listings by their relevance to a search query.
322
+
323
+ Parameters:
324
+ df: DataFrame containing listing data
325
+ search_query: User's search query string
326
+
327
+ Returns:
328
+ DataFrame sorted by relevance to the search query
329
+ """
330
+
331
  def sort_by_relevance(self, df, search_query):
332
  if not search_query:
333
  return df
 
336
  df['relevance_percentage'] = df['relevance_score'] * 100
337
  return df.sort_values('relevance_score', ascending=False)
338
 
339
+ """
340
+ Creates an interactive map and DataFrame for display in the UI.
341
+
342
+ Parameters:
343
+ neighborhood: The neighborhood to display listings for (default: "Sha Tin")
344
+ show_traffic: Whether to show traffic spots on the map (default: True)
345
+ center_lat: Latitude to center the map on (default: None, will use mean of listings)
346
+ center_lng: Longitude to center the map on (default: None, will use mean of listings)
347
+ selected_id: ID of the currently selected listing (default: None)
348
+ search_query: User's search query string (default: None)
349
+ current_page: Current page number for pagination (default: 1)
350
+ items_per_page: Number of items to show per page (default: 3)
351
+ listings_limit: Maximum number of listings to retrieve (default: 10)
352
+
353
+ Returns:
354
+ Tuple containing (folium_map, listings_dataframe)
355
+ """
356
+
357
  def create_map_and_data(self, neighborhood="Sha Tin", show_traffic=True, center_lat=None, center_lng=None,
358
  selected_id=None, search_query=None, current_page=1, items_per_page=3, listings_limit=10):
359
  if listings_limit not in [10, 20, 30, 40, 50]:
visualiser/td_traffic_spot_visualiser.py CHANGED
@@ -1,3 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import folium
2
  import oracledb
3
  import logging
@@ -17,6 +31,16 @@ from constant.hkust_bnb_constant import (
17
 
18
 
19
  class TDTrafficSpot:
 
 
 
 
 
 
 
 
 
 
20
  def __init__(self, key, latitude, longitude, dataset_rows=None):
21
  self.key = key
22
  self.latitude = float(latitude) if latitude is not None else None
@@ -25,9 +49,26 @@ class TDTrafficSpot:
25
  self.avg_vehicle_count = self.calculate_avg_vehicle_count()
26
  self.recent_display_rows = self.get_recent_display_rows()
27
 
 
 
 
 
 
 
 
28
  def is_valid(self):
29
  return self.latitude is not None and self.longitude is not None
30
 
 
 
 
 
 
 
 
 
 
 
31
  def get_recent_display_rows(self, max_display=2):
32
  if not self.dataset_rows:
33
  return []
@@ -35,6 +76,13 @@ class TDTrafficSpot:
35
  sorted_rows = sorted(self.dataset_rows, key=lambda x: x['capture_time'], reverse=True)
36
  return sorted_rows[:max_display]
37
 
 
 
 
 
 
 
 
38
  def calculate_avg_vehicle_count(self):
39
  if not self.dataset_rows:
40
  return 0
@@ -46,6 +94,13 @@ class TDTrafficSpot:
46
 
47
  return np.mean(vehicle_counts)
48
 
 
 
 
 
 
 
 
49
  def get_discount_rate(self):
50
  if self.avg_vehicle_count < 2:
51
  return 0.20
@@ -54,6 +109,13 @@ class TDTrafficSpot:
54
  else:
55
  return 0.0
56
 
 
 
 
 
 
 
 
57
  def get_discount_info(self):
58
  discount_rate = self.get_discount_rate()
59
 
@@ -62,6 +124,13 @@ class TDTrafficSpot:
62
 
63
  return f"{int(discount_rate * 100)}% discount! Low traffic area"
64
 
 
 
 
 
 
 
 
65
  def create_popup_content(self):
66
  discount_info = self.get_discount_info()
67
  discount_display = ""
@@ -108,6 +177,13 @@ class TDTrafficSpot:
108
  html += "</div>"
109
  return html
110
 
 
 
 
 
 
 
 
111
  def add_to_map(self, folium_map):
112
  if self.is_valid():
113
 
@@ -126,12 +202,30 @@ class TDTrafficSpot:
126
 
127
 
128
  class TrafficSpotManager:
 
 
 
 
 
 
 
 
 
 
 
129
  def __init__(self, connection_params):
130
  self.connection_params = connection_params
131
  self.traffic_spots = []
132
  self.spot_dict = {}
133
  self.load_limited_traffic_spots()
134
 
 
 
 
 
 
 
 
135
  def load_limited_traffic_spots(self, limit=10):
136
  try:
137
  dataset = load_dataset("slliac/isom5240-td-application-traffic-analysis", split="application")
@@ -192,6 +286,13 @@ class TrafficSpotManager:
192
  self.traffic_spots = []
193
  self.spot_dict = {}
194
 
 
 
 
 
 
 
 
195
  def load_specific_traffic_spots(self, keys):
196
  needed_keys = [key for key in keys if key not in self.spot_dict]
197
 
@@ -240,6 +341,14 @@ class TrafficSpotManager:
240
  except Exception as e:
241
  logging.error(f"Error loading specific traffic spots: {str(e)}")
242
 
 
 
 
 
 
 
 
 
243
  def add_spots_to_map(self, folium_map, spot_keys=None):
244
  if spot_keys is None:
245
  for spot in self.traffic_spots:
@@ -249,6 +358,16 @@ class TrafficSpotManager:
249
  if key in self.spot_dict:
250
  self.spot_dict[key].add_to_map(folium_map)
251
 
 
 
 
 
 
 
 
 
 
 
252
  def get_spot_by_key(self, key):
253
  if key in self.spot_dict:
254
  return self.spot_dict[key]
 
1
+ """
2
+ td_traffic_spot_visualiser.py
3
+
4
+ This module handles traffic data integration for the BNB+ platform, providing traffic-based
5
+ discount calculations and map visualization of traffic spots. It includes classes for
6
+ individual traffic spots and a manager to handle collections of spots.
7
+
8
+ The module integrates with a dataset of traffic observations to determine traffic conditions
9
+ and calculate eco-friendly discounts for BNB listings based on local traffic volume.
10
+
11
+ Author: Gordon Li (20317033)
12
+ Date: March 2025
13
+
14
+ """
15
  import folium
16
  import oracledb
17
  import logging
 
31
 
32
 
33
  class TDTrafficSpot:
34
+ """
35
+ Initializes a traffic spot with location and historical traffic data.
36
+
37
+ Parameters:
38
+ key: Unique identifier for the traffic spot
39
+ latitude: Geographic latitude of the traffic spot
40
+ longitude: Geographic longitude of the traffic spot
41
+ dataset_rows: List of dictionaries containing historical traffic observations (default: None)
42
+ """
43
+
44
  def __init__(self, key, latitude, longitude, dataset_rows=None):
45
  self.key = key
46
  self.latitude = float(latitude) if latitude is not None else None
 
49
  self.avg_vehicle_count = self.calculate_avg_vehicle_count()
50
  self.recent_display_rows = self.get_recent_display_rows()
51
 
52
+ """
53
+ Checks if the traffic spot has valid geographic coordinates.
54
+
55
+ Returns:
56
+ Boolean indicating whether latitude and longitude are valid
57
+ """
58
+
59
  def is_valid(self):
60
  return self.latitude is not None and self.longitude is not None
61
 
62
+ """
63
+ Gets the most recent traffic observations for display purposes.
64
+
65
+ Parameters:
66
+ max_display: Maximum number of recent records to return (default: 2)
67
+
68
+ Returns:
69
+ List of the most recent traffic observation records
70
+ """
71
+
72
  def get_recent_display_rows(self, max_display=2):
73
  if not self.dataset_rows:
74
  return []
 
76
  sorted_rows = sorted(self.dataset_rows, key=lambda x: x['capture_time'], reverse=True)
77
  return sorted_rows[:max_display]
78
 
79
+ """
80
+ Calculates the average vehicle count based on historical traffic observations.
81
+
82
+ Returns:
83
+ Float representing the average number of vehicles observed
84
+ """
85
+
86
  def calculate_avg_vehicle_count(self):
87
  if not self.dataset_rows:
88
  return 0
 
94
 
95
  return np.mean(vehicle_counts)
96
 
97
+ """
98
+ Determines the discount rate based on average traffic volume.
99
+
100
+ Returns:
101
+ Float representing the discount rate (0.0 to 0.20)
102
+ """
103
+
104
  def get_discount_rate(self):
105
  if self.avg_vehicle_count < 2:
106
  return 0.20
 
109
  else:
110
  return 0.0
111
 
112
+ """
113
+ Generates a human-readable description of the traffic-based discount.
114
+
115
+ Returns:
116
+ String describing the discount, if any
117
+ """
118
+
119
  def get_discount_info(self):
120
  discount_rate = self.get_discount_rate()
121
 
 
124
 
125
  return f"{int(discount_rate * 100)}% discount! Low traffic area"
126
 
127
+ """
128
+ Creates HTML content for the traffic spot's popup on the map.
129
+
130
+ Returns:
131
+ HTML string for the Folium popup
132
+ """
133
+
134
  def create_popup_content(self):
135
  discount_info = self.get_discount_info()
136
  discount_display = ""
 
177
  html += "</div>"
178
  return html
179
 
180
+ """
181
+ Adds the traffic spot to a Folium map with appropriate styling.
182
+
183
+ Parameters:
184
+ folium_map: Folium map object to add the marker to
185
+ """
186
+
187
  def add_to_map(self, folium_map):
188
  if self.is_valid():
189
 
 
202
 
203
 
204
  class TrafficSpotManager:
205
+ """
206
+ Manages a collection of traffic spots, handling data loading and map integration.
207
+ """
208
+
209
+ """
210
+ Initializes the manager with database connection parameters and loads initial traffic spots.
211
+
212
+ Parameters:
213
+ connection_params: Dictionary containing Oracle database connection parameters
214
+ """
215
+
216
  def __init__(self, connection_params):
217
  self.connection_params = connection_params
218
  self.traffic_spots = []
219
  self.spot_dict = {}
220
  self.load_limited_traffic_spots()
221
 
222
+ """
223
+ Loads a limited number of traffic spots for initial display.
224
+
225
+ Parameters:
226
+ limit: Maximum number of traffic spots to load initially (default: 10)
227
+ """
228
+
229
  def load_limited_traffic_spots(self, limit=10):
230
  try:
231
  dataset = load_dataset("slliac/isom5240-td-application-traffic-analysis", split="application")
 
286
  self.traffic_spots = []
287
  self.spot_dict = {}
288
 
289
+ """
290
+ Loads specific traffic spots by their keys when needed.
291
+
292
+ Parameters:
293
+ keys: List of traffic spot keys to load
294
+ """
295
+
296
  def load_specific_traffic_spots(self, keys):
297
  needed_keys = [key for key in keys if key not in self.spot_dict]
298
 
 
341
  except Exception as e:
342
  logging.error(f"Error loading specific traffic spots: {str(e)}")
343
 
344
+ """
345
+ Adds traffic spots to a Folium map.
346
+
347
+ Parameters:
348
+ folium_map: Folium map object to add markers to
349
+ spot_keys: Optional list of specific spot keys to add (default: None, adds all spots)
350
+ """
351
+
352
  def add_spots_to_map(self, folium_map, spot_keys=None):
353
  if spot_keys is None:
354
  for spot in self.traffic_spots:
 
358
  if key in self.spot_dict:
359
  self.spot_dict[key].add_to_map(folium_map)
360
 
361
+ """
362
+ Retrieves a traffic spot by its key, loading it if necessary.
363
+
364
+ Parameters:
365
+ key: The unique identifier of the traffic spot
366
+
367
+ Returns:
368
+ TDTrafficSpot object or None if not found
369
+ """
370
+
371
  def get_spot_by_key(self, key):
372
  if key in self.spot_dict:
373
  return self.spot_dict[key]