Gordon Li commited on
Commit
6582d09
·
1 Parent(s): ffcb8a0

dataset cronjob

Browse files
app.py CHANGED
@@ -2,11 +2,11 @@ import os
2
  import re
3
  import streamlit as st
4
  from html import escape
5
- from streamlit_folium import st_folium, folium_static
6
  import math
7
- from HKUSTBNBVisualiser import HKUSTBNBVisualiser
8
  from huggingface_hub import login
9
- from HKUSTBNBConstant import (
10
  SIDEBAR_HEADER,
11
  SIDEBAR_DIVIDER,
12
  TRAFFIC_EXPLANATION,
 
2
  import re
3
  import streamlit as st
4
  from html import escape
5
+ from streamlit_folium import st_folium
6
  import math
7
+ from visualiser.hkust_bnb_visualiser import HKUSTBNBVisualiser
8
  from huggingface_hub import login
9
+ from constant.hkust_bnb_constant import (
10
  SIDEBAR_HEADER,
11
  SIDEBAR_DIVIDER,
12
  TRAFFIC_EXPLANATION,
HKUSTBNBConstant.py → constant/hkust_bnb_constant.py RENAMED
@@ -1,4 +1,4 @@
1
- # HKUSTBNBConstant.py
2
 
3
  GET_ALL_NEIGHBORHOODS = """
4
  SELECT DISTINCT NEIGHBOURHOOD
 
1
+ # hkust_bnb_constant.py
2
 
3
  GET_ALL_NEIGHBORHOODS = """
4
  SELECT DISTINCT NEIGHBOURHOOD
cronjob/abstract_traffic_image_analyzer.py ADDED
@@ -0,0 +1,267 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import oracledb
3
+ from PIL import Image, ImageDraw, ImageFont
4
+ import signal
5
+ import io
6
+ from datetime import datetime
7
+ import logging
8
+ import json
9
+ import os
10
+ import random
11
+
12
+
13
+ class AbstractTrafficImageAnalyzer:
14
+ def __init__(self):
15
+ self.connection_params = {
16
+ 'user': 'slliac',
17
+ 'password': '7033',
18
+ 'dsn': 'imz409.ust.hk:1521/imz409'
19
+ }
20
+ self.running = True
21
+
22
+ signal.signal(signal.SIGINT, self.signal_handler)
23
+ signal.signal(signal.SIGTERM, self.signal_handler)
24
+
25
+ self.vehicle_classes = {2, 3, 4, 5, 6, 7, 8}
26
+
27
+ self.dataset_dir = "traffic_dataset"
28
+ self.images_dir = os.path.join(self.dataset_dir, "images")
29
+
30
+ os.makedirs(self.images_dir, exist_ok=True)
31
+
32
+ random.seed(42)
33
+
34
+ self.setup_logging()
35
+
36
+ def signal_handler(self, signum, frame):
37
+ print("\nShutdown signal received. Completing current task...")
38
+ self.running = False
39
+
40
+ def setup_logging(self):
41
+ logging.basicConfig(
42
+ level=logging.INFO,
43
+ format='%(asctime)s - %(levelname)s - %(message)s',
44
+ handlers=[
45
+ logging.FileHandler(f'traffic_analysis_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log'),
46
+ logging.StreamHandler()
47
+ ]
48
+ )
49
+
50
+ def get_camera_locations(self):
51
+ try:
52
+ with oracledb.connect(**self.connection_params) as conn:
53
+ cursor = conn.cursor()
54
+ cursor.execute("SELECT KEY, URL FROM TD_TRAFFIC_CAMERA_LOCATION")
55
+ return cursor.fetchall()
56
+ except Exception as e:
57
+ logging.error(f"Error fetching camera locations: {str(e)}")
58
+ raise
59
+
60
+ def download_image(self, url):
61
+ try:
62
+ response = requests.get(url)
63
+ response.raise_for_status()
64
+ return Image.open(io.BytesIO(response.content))
65
+ except Exception as e:
66
+ logging.error(f"Error downloading image from {url}: {str(e)}")
67
+ raise
68
+
69
+ def detect_vehicles(self, image, confidence_threshold=0.7):
70
+ try:
71
+ if image.mode == 'RGBA':
72
+ image = image.convert('RGB')
73
+
74
+ width, height = image.size
75
+ inputs = self.processor(images=image, return_tensors="pt")
76
+ outputs = self.model(**inputs)
77
+
78
+ probas = outputs.logits.softmax(-1)[0, :, :-1]
79
+ keep = probas.max(-1).values > confidence_threshold
80
+
81
+ probas_to_keep = probas[keep]
82
+ boxes_to_keep = outputs.pred_boxes[0][keep]
83
+
84
+ scores = probas_to_keep.max(-1)
85
+ labels = probas_to_keep.argmax(-1)
86
+
87
+ vehicle_detections = []
88
+ for score, label, box in zip(scores.values, labels, boxes_to_keep):
89
+ x_c, y_c, w, h = box.tolist()
90
+
91
+ w_abs = w * width
92
+ h_abs = h * height
93
+
94
+ x = (x_c - w / 2) * width
95
+ y = (y_c - h / 2) * height
96
+
97
+ category_id = label.item()
98
+
99
+ if category_id in self.vehicle_classes:
100
+ vehicle_detections.append({
101
+ 'bbox': [float(x), float(y), float(w_abs), float(h_abs)],
102
+ 'category_id': int(category_id),
103
+ 'area': float(w_abs * h_abs),
104
+ 'iscrowd': 0,
105
+ 'score': float(score.item())
106
+ })
107
+
108
+ return vehicle_detections
109
+ except Exception as e:
110
+ logging.error(f"Error detecting vehicles: {str(e)}")
111
+ raise
112
+
113
+ def draw_detections(self, image, detections):
114
+ try:
115
+ draw_image = image.copy()
116
+ draw = ImageDraw.Draw(draw_image)
117
+
118
+ category_names = {
119
+ 2: "bicycle",
120
+ 3: "car",
121
+ 4: "motorcycle",
122
+ 5: "airplane",
123
+ 6: "bus",
124
+ 7: "train",
125
+ 8: "truck"
126
+ }
127
+
128
+ try:
129
+ font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 20)
130
+ except:
131
+ font = ImageFont.load_default()
132
+
133
+ for detection in detections:
134
+ x, y, w, h = detection['bbox']
135
+ score = detection['score']
136
+ category_id = detection['category_id']
137
+
138
+ category_name = category_names.get(category_id, f"Vehicle-{category_id}")
139
+
140
+ draw.rectangle(
141
+ [(x, y), (x + w, y + h)],
142
+ outline='red',
143
+ width=3
144
+ )
145
+
146
+ label_text = f"{category_name}: {score:.2f}"
147
+ label_bbox = draw.textbbox((x, y - 25), label_text, font=font)
148
+ draw.rectangle(
149
+ [label_bbox[0] - 5, label_bbox[1] - 5, label_bbox[2] + 5, label_bbox[3] + 5],
150
+ fill='red'
151
+ )
152
+
153
+ draw.text(
154
+ (x, y - 25),
155
+ label_text,
156
+ fill='white',
157
+ font=font
158
+ )
159
+
160
+ return draw_image
161
+ except Exception as e:
162
+ logging.error(f"Error drawing detections: {str(e)}")
163
+ raise
164
+
165
+ def process_traffic_cameras(self):
166
+ try:
167
+ current_timestamp = datetime.now()
168
+ timestamp_str = current_timestamp.strftime("%Y%m%d_%H%M%S")
169
+
170
+ logging.info(f"Starting traffic image analysis for all cameras at {timestamp_str}")
171
+ camera_locations = self.get_camera_locations()
172
+
173
+ batch_data = {
174
+ 'capture_time': [],
175
+ 'location_id': [],
176
+ 'image_id': [],
177
+ 'original_image': [],
178
+ 'vehicle_count': [],
179
+ 'processed_image': [],
180
+ 'coco_annotations': []
181
+ }
182
+
183
+ for image_id, (key, url) in enumerate(camera_locations, start=1):
184
+ if not self.running:
185
+ break
186
+
187
+ try:
188
+ logging.info(f"Processing camera at location {key}")
189
+
190
+ img_timestamp = datetime.now()
191
+ img_timestamp_str = img_timestamp.strftime("%Y%m%d_%H%M%S")
192
+ filename = f"{key}_{img_timestamp_str}.jpg"
193
+ file_path = os.path.join(self.images_dir, filename)
194
+
195
+ image = self.download_image(url)
196
+
197
+ image.save(file_path)
198
+
199
+ orig_img_byte_arr = io.BytesIO()
200
+ image.save(orig_img_byte_arr, format='JPEG')
201
+ original_image_data = orig_img_byte_arr.getvalue()
202
+
203
+ vehicle_detections = self.detect_vehicles(image)
204
+
205
+ processed_image = self.draw_detections(image, vehicle_detections)
206
+
207
+ processed_file_path = os.path.join(self.images_dir, f"processed_{filename}")
208
+ processed_image.save(processed_file_path)
209
+
210
+ proc_img_byte_arr = io.BytesIO()
211
+ processed_image.save(proc_img_byte_arr, format='JPEG')
212
+ processed_image_data = proc_img_byte_arr.getvalue()
213
+
214
+ simplified_annotations = []
215
+ for detection in vehicle_detections:
216
+ simplified_annotations.append({
217
+ "bbox": detection['bbox'],
218
+ "category_id": detection['category_id'],
219
+ "area": detection['area'],
220
+ "iscrowd": detection['iscrowd']
221
+ })
222
+
223
+ coco_annotation = {
224
+ "image_id": image_id,
225
+ "annotations": simplified_annotations,
226
+ "date": img_timestamp.strftime("%Y-%m-%d"),
227
+ "timestamp": img_timestamp.strftime("%Y-%m-%d %H:%M:%S")
228
+ }
229
+
230
+ batch_data['capture_time'].append(img_timestamp.isoformat())
231
+ batch_data['location_id'].append(key)
232
+ batch_data['image_id'].append(image_id)
233
+ batch_data['original_image'].append(original_image_data)
234
+ batch_data['vehicle_count'].append(len(vehicle_detections))
235
+ batch_data['processed_image'].append(processed_image_data)
236
+ batch_data['coco_annotations'].append(json.dumps(coco_annotation))
237
+
238
+ logging.info(f"Completed analysis for location {key}, detected {len(vehicle_detections)} vehicles")
239
+ except Exception as e:
240
+ logging.error(f"Error processing location {key}: {str(e)}")
241
+ continue
242
+
243
+ self.update_huggingface_dataset(batch_data, timestamp_str)
244
+
245
+ logging.info(f"Completed traffic image analysis for all cameras. Data saved to {self.dataset_dir}")
246
+
247
+ except Exception as e:
248
+ logging.error(f"Error in process_traffic_cameras: {str(e)}")
249
+ raise
250
+
251
+ def update_huggingface_dataset(self, batch_data, timestamp_str):
252
+ raise NotImplementedError("Subclasses must implement update_huggingface_dataset method")
253
+
254
+ def create_coco_annotation_files(self, dataset_dict, timestamp_str):
255
+ raise NotImplementedError("Subclasses must implement create_coco_annotation_files method")
256
+
257
+ def update_readme(self, dataset_dict, timestamp_str):
258
+ raise NotImplementedError("Subclasses must implement update_readme method")
259
+
260
+ def run(self):
261
+ try:
262
+ self.process_traffic_cameras()
263
+ logging.info(f"Analysis completed and dataset updated.")
264
+ except Exception as e:
265
+ logging.error(f"Scheduler error: {str(e)}")
266
+ finally:
267
+ print("\nScheduler stopped")
cronjob/application_traffic_image_analyzer.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import DetrImageProcessor, DetrForObjectDetection
2
+ from datasets import Dataset, Features, Value, load_dataset, DatasetDict, concatenate_datasets
3
+ from PIL import Image
4
+ from datetime import datetime
5
+ from abstract_traffic_image_analyzer import AbstractTrafficImageAnalyzer
6
+ import io
7
+ import json
8
+ import os
9
+ import logging
10
+
11
+
12
+ class ApplicationTrafficImageAnalyzer(AbstractTrafficImageAnalyzer):
13
+ def __init__(self):
14
+ super().__init__()
15
+ self.processor = DetrImageProcessor.from_pretrained("slliac/detr-group37-liaujianjie-resnet-50",
16
+ revision="main")
17
+ self.model = DetrForObjectDetection.from_pretrained("slliac/detr-group37-liaujianjie-resnet-50")
18
+
19
+ self.application_dir = os.path.join(self.dataset_dir, "application")
20
+ os.makedirs(self.application_dir, exist_ok=True)
21
+
22
+ def update_huggingface_dataset(self, batch_data, timestamp_str):
23
+ try:
24
+ features = Features({
25
+ 'capture_time': Value(dtype='string'),
26
+ 'location_id': Value(dtype='string'),
27
+ 'image_id': Value(dtype='int32'),
28
+ 'original_image': Value(dtype='binary'),
29
+ 'vehicle_count': Value(dtype='int32'),
30
+ 'processed_image': Value(dtype='binary'),
31
+ 'coco_annotations': Value(dtype='string')
32
+ })
33
+
34
+ valid_indices = [i for i, count in enumerate(batch_data['vehicle_count']) if count > 0]
35
+ if not valid_indices:
36
+ logging.info("No vehicles detected in any images. Skipping dataset update.")
37
+ return
38
+
39
+ filtered_data = {
40
+ 'capture_time': [batch_data['capture_time'][i] for i in valid_indices],
41
+ 'location_id': [batch_data['location_id'][i] for i in valid_indices],
42
+ 'image_id': [batch_data['image_id'][i] for i in valid_indices],
43
+ 'original_image': [batch_data['original_image'][i] for i in valid_indices],
44
+ 'vehicle_count': [batch_data['vehicle_count'][i] for i in valid_indices],
45
+ 'processed_image': [batch_data['processed_image'][i] for i in valid_indices],
46
+ 'coco_annotations': [batch_data['coco_annotations'][i] for i in valid_indices]
47
+ }
48
+
49
+ new_dataset = Dataset.from_dict(filtered_data, features=features)
50
+
51
+ try:
52
+ try:
53
+ existing_dataset = load_dataset(
54
+ "slliac/isom5240-td-application-traffic-analysis",
55
+ revision="main"
56
+ )
57
+ logging.info(f"Found existing dataset in 'application' branch")
58
+
59
+ if 'application' in existing_dataset:
60
+ combined_dataset = concatenate_datasets([existing_dataset['application'], new_dataset])
61
+
62
+ dataset_dict = DatasetDict({
63
+ "application": combined_dataset
64
+ })
65
+ else:
66
+ dataset_dict = DatasetDict({
67
+ "application": new_dataset
68
+ })
69
+ except Exception as e:
70
+ logging.info(f"Error loading existing dataset: {str(e)}")
71
+ dataset_dict = DatasetDict({
72
+ "application": new_dataset
73
+ })
74
+
75
+ dataset_dict.push_to_hub(
76
+ "slliac/isom5240-td-application-traffic-analysis",
77
+ private=False
78
+ )
79
+
80
+ logging.info(f"Successfully updated dataset on 'application' branch.")
81
+ logging.info(f"Application split: {len(dataset_dict['application'])} records")
82
+
83
+ self.create_coco_annotation_files(dataset_dict, timestamp_str)
84
+
85
+ except Exception as e:
86
+ logging.error(f"Error updating Hugging Face dataset: {str(e)}")
87
+ raise
88
+
89
+ except Exception as e:
90
+ logging.error(f"Error in update_huggingface_dataset: {str(e)}")
91
+ raise
92
+
93
+ def create_coco_annotation_files(self, dataset_dict, timestamp_str):
94
+ try:
95
+ categories = [
96
+ {"id": 2, "name": "bicycle", "supercategory": "vehicle"},
97
+ {"id": 3, "name": "car", "supercategory": "vehicle"},
98
+ {"id": 4, "name": "motorcycle", "supercategory": "vehicle"},
99
+ {"id": 5, "name": "airplane", "supercategory": "vehicle"},
100
+ {"id": 6, "name": "bus", "supercategory": "vehicle"},
101
+ {"id": 7, "name": "train", "supercategory": "vehicle"},
102
+ {"id": 8, "name": "truck", "supercategory": "vehicle"}
103
+ ]
104
+
105
+ current_datetime = datetime.now()
106
+ current_date = current_datetime.strftime("%Y-%m-%d")
107
+ current_datetime_str = current_datetime.strftime("%Y-%m-%d %H:%M:%S")
108
+
109
+ images_data = []
110
+ annotations_data = []
111
+ annotation_id = 1
112
+
113
+ for i, record in enumerate(dataset_dict['application']):
114
+ image_id = record['image_id']
115
+
116
+ coco_data = json.loads(record['coco_annotations'])
117
+
118
+ img = Image.open(io.BytesIO(record['original_image']))
119
+ width, height = img.size
120
+
121
+ image_entry = {
122
+ "id": image_id,
123
+ "width": width,
124
+ "height": height,
125
+ "file_name": f"{record['location_id']}_{image_id}.jpg",
126
+ "license": 1,
127
+ "date_captured": record['capture_time'],
128
+ "capture_date": datetime.fromisoformat(record['capture_time']).strftime('%Y-%m-%d'),
129
+ "capture_timestamp": datetime.fromisoformat(record['capture_time']).strftime(
130
+ '%Y-%m-%d %H:%M:%S')
131
+ }
132
+ images_data.append(image_entry)
133
+
134
+ for ann in coco_data['annotations']:
135
+ annotation_entry = {
136
+ "id": annotation_id,
137
+ "image_id": image_id,
138
+ "category_id": ann['category_id'],
139
+ "bbox": ann['bbox'],
140
+ "area": ann['area'],
141
+ "iscrowd": ann['iscrowd'],
142
+ "segmentation": []
143
+ }
144
+ annotations_data.append(annotation_entry)
145
+ annotation_id += 1
146
+
147
+ coco_output = {
148
+ "info": {
149
+ "year": current_datetime.year,
150
+ "version": "1.0",
151
+ "description": "Hong Kong Traffic Camera Dataset - Application data",
152
+ "contributor": "ISOM5240 Group 37",
153
+ "url": "",
154
+ "date_created": current_datetime.isoformat(),
155
+ "collection_date": current_date,
156
+ "collection_timestamp": current_datetime_str,
157
+ "batch_timestamp": timestamp_str
158
+ },
159
+ "licenses": [
160
+ {
161
+ "id": 1,
162
+ "name": "Attribution-NonCommercial",
163
+ "url": "http://creativecommons.org/licenses/by-nc/2.0/"
164
+ }
165
+ ],
166
+ "images": images_data,
167
+ "annotations": annotations_data,
168
+ "categories": categories
169
+ }
170
+
171
+ annotation_file = os.path.join(self.application_dir, f"application_labels_{timestamp_str}.json")
172
+ with open(annotation_file, 'w') as f:
173
+ json.dump(coco_output, f, indent=2)
174
+
175
+ standard_annotation_file = os.path.join(self.application_dir, "application_labels.json")
176
+ with open(standard_annotation_file, 'w') as f:
177
+ json.dump(coco_output, f, indent=2)
178
+
179
+ logging.info(f"Created COCO annotation files for application data in {self.application_dir}")
180
+
181
+ try:
182
+ from huggingface_hub import HfApi
183
+ api = HfApi()
184
+
185
+ api.upload_file(
186
+ path_or_fileobj=annotation_file,
187
+ path_in_repo=f"application/application_labels_{timestamp_str}.json",
188
+ repo_id="slliac/isom5240-td-application-traffic-analysis",
189
+ repo_type="dataset",
190
+ revision="main"
191
+ )
192
+
193
+ api.upload_file(
194
+ path_or_fileobj=standard_annotation_file,
195
+ path_in_repo=f"application/application_labels.json",
196
+ repo_id="slliac/isom5240-td-application-traffic-analysis",
197
+ repo_type="dataset",
198
+ revision="main"
199
+ )
200
+
201
+ logging.info(
202
+ f"Uploaded COCO annotation files to Hugging Face repository in 'application/' directory")
203
+ except Exception as e:
204
+ logging.error(f"Error uploading COCO annotations file: {str(e)}")
205
+
206
+ except Exception as e:
207
+ logging.error(f"Error creating COCO annotation files: {str(e)}")
208
+
209
+ def main():
210
+ analyzer = ApplicationTrafficImageAnalyzer()
211
+ try:
212
+ try:
213
+ initial_dataset = load_dataset("slliac/isom5240-td-application-traffic-analysis", revision="main")
214
+ print("\nInitial Dataset Info (from 'application' branch):")
215
+ for split in initial_dataset:
216
+ print(f"Number of {split} records: {len(initial_dataset[split])}")
217
+ except Exception as e:
218
+ print(f"\nNo existing dataset found in 'application' branch: {str(e)}")
219
+ print("Will create new dataset with 'application' split in 'application' branch.")
220
+
221
+ analyzer.run()
222
+
223
+ try:
224
+ final_dataset = load_dataset("slliac/isom5240-td-application-traffic-analysis", revision="main")
225
+ print("\nFinal Dataset Info (from 'application' branch):")
226
+ for split in final_dataset:
227
+ print(f"Number of {split} records: {len(final_dataset[split])}")
228
+ except Exception as e:
229
+ print(f"\nError accessing final dataset from 'application' branch: {str(e)}")
230
+
231
+ except Exception as e:
232
+ logging.error(f"Main execution error: {str(e)}")
233
+ finally:
234
+ print("\nProgram terminated")
235
+
236
+
237
+ if __name__ == "__main__":
238
+ main()
cronjob/readme.md ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ here is the dataset cronjob for simulate training and application usage dataset:
2
+
3
+ 0 */3 * * * /Users/gordonli/Desktop/test/.venv3.12/bin/python /Users/gordonli/Desktop/5240-frontend/cronjob/train_detr_traffic_image_analyzer.py >> /Users/gordonli/Desktop/test/train.log 2>&1
4
+
5
+ 0 */1 * * * /Users/gordonli/Desktop/test/.venv3.12/bin/python /Users/gordonli/Desktop/5240-frontend/cronjob/application_traffic_image_analyzer.py >> /Users/gordonli/Desktop/test/app.log 2>&1
cronjob/train_detr_traffic_image_analyzer.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import DetrImageProcessor, DetrForObjectDetection
2
+ from datasets import Dataset, Features, Value, load_dataset, concatenate_datasets, DatasetDict
3
+ from PIL import Image
4
+ from datetime import datetime
5
+ from abstract_traffic_image_analyzer import AbstractTrafficImageAnalyzer
6
+ import io
7
+ import json
8
+ import os
9
+ import logging
10
+
11
+ class TrainDETRTrafficImageAnalyzer(AbstractTrafficImageAnalyzer):
12
+ def __init__(self):
13
+ super().__init__()
14
+
15
+ self.processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
16
+ self.model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
17
+
18
+ self.fb_detr_dir = os.path.join(self.dataset_dir, "fb_detr_res_50")
19
+ os.makedirs(self.fb_detr_dir, exist_ok=True)
20
+
21
+ def update_huggingface_dataset(self, batch_data, timestamp_str):
22
+ try:
23
+ features = Features({
24
+ 'capture_time': Value(dtype='string'),
25
+ 'location_id': Value(dtype='string'),
26
+ 'image_id': Value(dtype='int32'),
27
+ 'original_image': Value(dtype='binary'),
28
+ 'vehicle_count': Value(dtype='int32'),
29
+ 'processed_image': Value(dtype='binary'),
30
+ 'coco_annotations': Value(dtype='string')
31
+ })
32
+
33
+ valid_indices = [i for i, count in enumerate(batch_data['vehicle_count']) if count > 0]
34
+ if not valid_indices:
35
+ logging.info("No vehicles detected in any images. Skipping dataset update.")
36
+ return
37
+
38
+ filtered_data = {
39
+ 'capture_time': [batch_data['capture_time'][i] for i in valid_indices],
40
+ 'location_id': [batch_data['location_id'][i] for i in valid_indices],
41
+ 'image_id': [batch_data['image_id'][i] for i in valid_indices],
42
+ 'original_image': [batch_data['original_image'][i] for i in valid_indices],
43
+ 'vehicle_count': [batch_data['vehicle_count'][i] for i in valid_indices],
44
+ 'processed_image': [batch_data['processed_image'][i] for i in valid_indices],
45
+ 'coco_annotations': [batch_data['coco_annotations'][i] for i in valid_indices]
46
+ }
47
+
48
+ new_dataset = Dataset.from_dict(filtered_data, features=features)
49
+
50
+ try:
51
+ existing_dataset = load_dataset("slliac/isom5240-td-traffic-analysis")
52
+ logging.info(f"Found existing dataset")
53
+
54
+ if 'fbDetr50Train' in existing_dataset:
55
+ all_data = []
56
+
57
+ if 'fbDetr50Train' in existing_dataset:
58
+ all_data.append(existing_dataset['fbDetr50Train'])
59
+ logging.info(
60
+ f"Found existing fbDetr50Train data: {len(existing_dataset['fbDetr50Train'])} records")
61
+
62
+ all_data.append(new_dataset)
63
+
64
+ combined_dataset = concatenate_datasets(all_data)
65
+ logging.info(f"Combined dataset has {len(combined_dataset)} records")
66
+
67
+ combined_dataset = combined_dataset.shuffle(seed=42)
68
+
69
+ train_test_split = combined_dataset.train_test_split(test_size=0.2, shuffle=True, seed=42)
70
+
71
+ dataset_dict = DatasetDict({
72
+ "fbDetr50Train": train_test_split['train']
73
+ })
74
+
75
+ else:
76
+ train_test_split = new_dataset.train_test_split(test_size=0.2, shuffle=True, seed=42)
77
+ dataset_dict = DatasetDict({
78
+ "fbDetr50Train": train_test_split['train']
79
+ })
80
+ logging.info(f"Successfully prepared dataset with fb-detr-res-50 splits")
81
+ except Exception as e:
82
+ logging.info(f"Creating new dataset with fb-detr-res-50 splits: {str(e)}")
83
+ train_test_split = new_dataset.train_test_split(test_size=0.2, shuffle=True, seed=42)
84
+ dataset_dict = DatasetDict({
85
+ "fbDetr50Train": train_test_split['train']
86
+ })
87
+
88
+ dataset_dict.push_to_hub(
89
+ "slliac/isom5240-td-traffic-analysis",
90
+ private=True
91
+ )
92
+
93
+ logging.info(f"Successfully updated dataset with fb-detr-res-50 splits.")
94
+ logging.info(f"fbDetr50Train split: {len(dataset_dict['fbDetr50Train'])} records")
95
+
96
+ self.create_coco_annotation_files(dataset_dict, timestamp_str)
97
+
98
+ except Exception as e:
99
+ logging.error(f"Error updating Hugging Face dataset: {str(e)}")
100
+ raise
101
+
102
+ def create_coco_annotation_files(self, dataset_dict, timestamp_str):
103
+ try:
104
+ categories = [
105
+ {"id": 2, "name": "bicycle", "supercategory": "vehicle"},
106
+ {"id": 3, "name": "car", "supercategory": "vehicle"},
107
+ {"id": 4, "name": "motorcycle", "supercategory": "vehicle"},
108
+ {"id": 5, "name": "airplane", "supercategory": "vehicle"},
109
+ {"id": 6, "name": "bus", "supercategory": "vehicle"},
110
+ {"id": 7, "name": "train", "supercategory": "vehicle"},
111
+ {"id": 8, "name": "truck", "supercategory": "vehicle"}
112
+ ]
113
+
114
+ current_datetime = datetime.now()
115
+ current_date = current_datetime.strftime("%Y-%m-%d")
116
+ current_datetime_str = current_datetime.strftime("%Y-%m-%d %H:%M:%S")
117
+
118
+ for split in ['fbDetr50Train']:
119
+ images_data = []
120
+ annotations_data = []
121
+ annotation_id = 1
122
+
123
+ for i, record in enumerate(dataset_dict[split]):
124
+ image_id = record['image_id']
125
+ coco_data = json.loads(record['coco_annotations'])
126
+ img = Image.open(io.BytesIO(record['original_image']))
127
+ width, height = img.size
128
+
129
+ image_entry = {
130
+ "id": image_id,
131
+ "width": width,
132
+ "height": height,
133
+ "file_name": f"{record['location_id']}_{image_id}.jpg",
134
+ "license": 1,
135
+ "date_captured": record['capture_time'],
136
+ "capture_date": datetime.fromisoformat(record['capture_time']).strftime('%Y-%m-%d'),
137
+ "capture_timestamp": datetime.fromisoformat(record['capture_time']).strftime(
138
+ '%Y-%m-%d %H:%M:%S')
139
+ }
140
+ images_data.append(image_entry)
141
+
142
+ for ann in coco_data['annotations']:
143
+ annotation_entry = {
144
+ "id": annotation_id,
145
+ "image_id": image_id,
146
+ "category_id": ann['category_id'],
147
+ "bbox": ann['bbox'],
148
+ "area": ann['area'],
149
+ "iscrowd": ann['iscrowd'],
150
+ "segmentation": []
151
+ }
152
+ annotations_data.append(annotation_entry)
153
+ annotation_id += 1
154
+
155
+ coco_output = {
156
+ "info": {
157
+ "year": current_datetime.year,
158
+ "version": "1.0",
159
+ "description": f"Hong Kong Traffic Camera Dataset - {split} split using Facebook DETR ResNet-50",
160
+ "contributor": "ISOM5240 Group 37",
161
+ "url": "",
162
+ "date_created": current_datetime.isoformat(),
163
+ "collection_date": current_date,
164
+ "collection_timestamp": current_datetime_str,
165
+ "batch_timestamp": timestamp_str
166
+ },
167
+ "licenses": [
168
+ {
169
+ "id": 1,
170
+ "name": "Attribution-NonCommercial",
171
+ "url": "http://creativecommons.org/licenses/by-nc/2.0/"
172
+ }
173
+ ],
174
+ "images": images_data,
175
+ "annotations": annotations_data,
176
+ "categories": categories
177
+ }
178
+
179
+ split_filename = split.replace("-", "_")
180
+ annotation_file = os.path.join(self.fb_detr_dir, f"{split_filename}_labels_{timestamp_str}.json")
181
+ with open(annotation_file, 'w') as f:
182
+ json.dump(coco_output, f, indent=2)
183
+
184
+ logging.info(f"Created COCO annotation file for {split} split: {annotation_file}")
185
+
186
+ try:
187
+ from huggingface_hub import HfApi
188
+ api = HfApi()
189
+
190
+ api.upload_file(
191
+ path_or_fileobj=annotation_file,
192
+ path_in_repo=f"fb_detr_res_50/{split_filename}_labels_{timestamp_str}.json",
193
+ repo_id="slliac/isom5240-td-traffic-analysis",
194
+ repo_type="dataset"
195
+ )
196
+
197
+ api.upload_file(
198
+ path_or_fileobj=annotation_file,
199
+ path_in_repo=f"fb_detr_res_50/{split_filename}_labels.json",
200
+ repo_id="slliac/isom5240-td-traffic-analysis",
201
+ repo_type="dataset"
202
+ )
203
+
204
+ logging.info(
205
+ f"Uploaded FB DETR annotation files for {split} to Hugging Face repository")
206
+ except Exception as e:
207
+ logging.error(f"Error uploading COCO annotations file: {str(e)}")
208
+
209
+ except Exception as e:
210
+ logging.error(f"Error creating COCO annotation files: {str(e)}")
211
+
212
+
213
+ def main():
214
+ analyzer = TrainDETRTrafficImageAnalyzer()
215
+ try:
216
+ try:
217
+ initial_dataset = load_dataset("slliac/isom5240-td-traffic-analysis")
218
+ print("\nInitial Dataset Info:")
219
+ for split in initial_dataset:
220
+ print(f"Number of {split} records: {len(initial_dataset[split])}")
221
+ except Exception as e:
222
+ print("\nNo existing dataset found. Will create new dataset with fb-detr-res-50 splits.")
223
+
224
+ analyzer.run()
225
+
226
+ try:
227
+ final_dataset = load_dataset("slliac/isom5240-td-traffic-analysis")
228
+ print("\nFinal Dataset Info:")
229
+ for split in final_dataset:
230
+ print(f"Number of {split} records: {len(final_dataset[split])}")
231
+ except Exception as e:
232
+ print("\nError accessing final dataset:", str(e))
233
+
234
+ except Exception as e:
235
+ logging.error(f"Main execution error: {str(e)}")
236
+ finally:
237
+ print("\nProgram terminated")
238
+
239
+
240
+ if __name__ == "__main__":
241
+ main()
HKUSTBNBVisualiser.py → visualiser/hkust_bnb_visualiser.py RENAMED
@@ -6,8 +6,8 @@ from sentence_transformers import SentenceTransformer, util
6
  from geopy.distance import geodesic
7
  import logging
8
 
9
- from TDTrafficSpot import TrafficSpotManager
10
- from HKUSTBNBConstant import (
11
  GET_ALL_NEIGHBORHOODS,
12
  GET_NEIGHBORHOOD_LISTINGS,
13
  GET_LISTING_REVIEWS,
 
6
  from geopy.distance import geodesic
7
  import logging
8
 
9
+ from td_traffic_spot_visualiser import TrafficSpotManager
10
+ from constant.hkust_bnb_constant import (
11
  GET_ALL_NEIGHBORHOODS,
12
  GET_NEIGHBORHOOD_LISTINGS,
13
  GET_LISTING_REVIEWS,
TDTrafficSpot.py → visualiser/td_traffic_spot_visualiser.py RENAMED
@@ -5,7 +5,7 @@ import base64
5
  import numpy as np
6
  from html import escape
7
  from datasets import load_dataset
8
- from HKUSTBNBConstant import (
9
  GET_TRAFFIC_CAMERA_LOCATIONS,
10
  TRAFFIC_DISCOUNT_DISPLAY,
11
  TRAFFIC_POPUP_BASE,
 
5
  import numpy as np
6
  from html import escape
7
  from datasets import load_dataset
8
+ from constant.hkust_bnb_constant import (
9
  GET_TRAFFIC_CAMERA_LOCATIONS,
10
  TRAFFIC_DISCOUNT_DISPLAY,
11
  TRAFFIC_POPUP_BASE,