Spaces:

DawnC
/

VisionScout

Running on Zero

App Files Files Community

VisionScout / spatial_analyzer.py

DawnC

Upload 31 files

4d1f920 verified 7 days ago

raw

history blame

80 kB


	import os
	import numpy as np
	from typing import Dict, List, Tuple, Any, Optional

	from scene_type import SCENE_TYPES
	from enhance_scene_describer import EnhancedSceneDescriber

	class SpatialAnalyzer:
	"""
	Analyzes spatial relationships between objects in an image.
	Handles region assignment, object positioning, and functional zone identification.
	"""

	def __init__(self, class_names: Dict[int, str] = None, object_categories=None):
	"""Initialize the spatial analyzer with image regions"""
	# Define regions of the image (3x3 grid)
	self.regions = {
	"top_left": (0, 0, 1/3, 1/3),
	"top_center": (1/3, 0, 2/3, 1/3),
	"top_right": (2/3, 0, 1, 1/3),
	"middle_left": (0, 1/3, 1/3, 2/3),
	"middle_center": (1/3, 1/3, 2/3, 2/3),
	"middle_right": (2/3, 1/3, 1, 2/3),
	"bottom_left": (0, 2/3, 1/3, 1),
	"bottom_center": (1/3, 2/3, 2/3, 1),
	"bottom_right": (2/3, 2/3, 1, 1)
	}

	self.class_names = class_names
	self.OBJECT_CATEGORIES = object_categories or {}
	self.enhance_descriptor = EnhancedSceneDescriber(scene_types=SCENE_TYPES)

	# Distances thresholds for proximity analysis (normalized)
	self.proximity_threshold = 0.2


	def _determine_region(self, x: float, y: float) -> str:
	"""
	Determine which region a point falls into.

	Args:
	x: Normalized x-coordinate (0-1)
	y: Normalized y-coordinate (0-1)

	Returns:
	Region name
	"""
	for region_name, (x1, y1, x2, y2) in self.regions.items():
	if x1 <= x < x2 and y1 <= y < y2:
	return region_name

	return "unknown"

	def _analyze_regions(self, detected_objects: List[Dict]) -> Dict:
	"""
	Analyze object distribution across image regions.

	Args:
	detected_objects: List of detected objects with position information

	Returns:
	Dictionary with region analysis
	"""
	# Count objects in each region
	region_counts = {region: 0 for region in self.regions.keys()}
	region_objects = {region: [] for region in self.regions.keys()}

	for obj in detected_objects:
	region = obj["region"]
	if region in region_counts:
	region_counts[region] += 1
	region_objects[region].append({
	"class_id": obj["class_id"],
	"class_name": obj["class_name"]
	})

	# Determine main focus regions (top 1-2 regions by object count)
	sorted_regions = sorted(region_counts.items(), key=lambda x: x[1], reverse=True)
	main_regions = [region for region, count in sorted_regions if count > 0][:2]

	return {
	"counts": region_counts,
	"main_focus": main_regions,
	"objects_by_region": region_objects
	}

	def _extract_detected_objects(self, detection_result: Any, confidence_threshold: float = 0.25) -> List[Dict]:
	"""
	Extract detected objects from detection result with position information.

	Args:
	detection_result: Detection result from YOLOv8
	confidence_threshold: Minimum confidence threshold

	Returns:
	List of dictionaries with detected object information
	"""
	boxes = detection_result.boxes.xyxy.cpu().numpy()
	classes = detection_result.boxes.cls.cpu().numpy().astype(int)
	confidences = detection_result.boxes.conf.cpu().numpy()

	# Image dimensions
	img_height, img_width = detection_result.orig_shape[:2]

	detected_objects = []
	for box, class_id, confidence in zip(boxes, classes, confidences):
	# Skip objects with confidence below threshold
	if confidence < confidence_threshold:
	continue

	x1, y1, x2, y2 = box
	width = x2 - x1
	height = y2 - y1

	# Center point
	center_x = (x1 + x2) / 2
	center_y = (y1 + y2) / 2

	# Normalized positions (0-1)
	norm_x = center_x / img_width
	norm_y = center_y / img_height
	norm_width = width / img_width
	norm_height = height / img_height

	# Area calculation
	area = width * height
	norm_area = area / (img_width * img_height)

	# Region determination
	object_region = self._determine_region(norm_x, norm_y)

	detected_objects.append({
	"class_id": int(class_id),
	"class_name": self.class_names[int(class_id)],
	"confidence": float(confidence),
	"box": [float(x1), float(y1), float(x2), float(y2)],
	"center": [float(center_x), float(center_y)],
	"normalized_center": [float(norm_x), float(norm_y)],
	"size": [float(width), float(height)],
	"normalized_size": [float(norm_width), float(norm_height)],
	"area": float(area),
	"normalized_area": float(norm_area),
	"region": object_region
	})

	return detected_objects


	def _detect_scene_viewpoint(self, detected_objects: List[Dict]) -> Dict:
	"""
	檢測場景視角並識別特殊場景模式。

	Args:
	detected_objects: 檢測到的物體列表

	Returns:
	Dict: 包含視角和場景模式信息的字典
	"""
	if not detected_objects:
	return {"viewpoint": "eye_level", "patterns": []}

	# 從物體位置中提取信息
	patterns = []

	# 檢測行人位置模式
	pedestrian_objs = [obj for obj in detected_objects if obj["class_id"] == 0]

	# 檢查是否有足夠的行人來識別模式
	if len(pedestrian_objs) >= 4:
	pedestrian_positions = [obj["normalized_center"] for obj in pedestrian_objs]

	# 檢測十字交叉模式
	if self._detect_cross_pattern(pedestrian_positions):
	patterns.append("crosswalk_intersection")

	# 檢測多方向行人流
	directions = self._analyze_movement_directions(pedestrian_positions)
	if len(directions) >= 2:
	patterns.append("multi_directional_movement")

	# 檢查物體的大小一致性 - 在空中俯視圖中，物體大小通常更一致
	if len(detected_objects) >= 5:
	sizes = [obj.get("normalized_area", 0) for obj in detected_objects]
	size_variance = np.var(sizes) / (np.mean(sizes) ** 2) # 標準化變異數，不會受到平均值影響

	if size_variance < 0.3: # 低變異表示大小一致
	patterns.append("consistent_object_size")

	# 基本視角檢測
	viewpoint = self.enhance_descriptor._detect_viewpoint(detected_objects)

	# 根據檢測到的模式增強視角判斷
	if "crosswalk_intersection" in patterns and viewpoint != "aerial":
	# 如果檢測到斑馬線交叉但視角判斷不是空中視角，優先採用模式判斷
	viewpoint = "aerial"

	return {
	"viewpoint": viewpoint,
	"patterns": patterns
	}

	def _detect_cross_pattern(self, positions):
	"""
	檢測位置中的十字交叉模式

	Args:
	positions: 位置列表 [[x1, y1], [x2, y2], ...]

	Returns:
	bool: 是否檢測到十字交叉模式
	"""
	if len(positions) < 8: # 需要足夠多的點
	return False

	# 提取 x 和 y 坐標
	x_coords = [pos[0] for pos in positions]
	y_coords = [pos[1] for pos in positions]

	# 檢測 x 和 y 方向的聚類
	x_clusters = []
	y_clusters = []

	# 簡化的聚類分析
	x_mean = np.mean(x_coords)
	y_mean = np.mean(y_coords)

	# 計算在中心線附近的點
	near_x_center = sum(1 for x in x_coords if abs(x - x_mean) < 0.1)
	near_y_center = sum(1 for y in y_coords if abs(y - y_mean) < 0.1)

	# 如果有足夠的點在中心線附近，可能是十字交叉
	return near_x_center >= 3 and near_y_center >= 3

	def _analyze_movement_directions(self, positions):
	"""
	分析位置中的移動方向

	Args:
	positions: 位置列表 [[x1, y1], [x2, y2], ...]

	Returns:
	list: 檢測到的主要方向
	"""
	if len(positions) < 6:
	return []

	# extract x 和 y 坐標
	x_coords = [pos[0] for pos in positions]
	y_coords = [pos[1] for pos in positions]

	directions = []

	# horizontal move (left --> right)
	x_std = np.std(x_coords)
	x_range = max(x_coords) - min(x_coords)

	# vertical move(up --> down)
	y_std = np.std(y_coords)
	y_range = max(y_coords) - min(y_coords)

	# 足夠大的範圍表示該方向有運動
	if x_range > 0.4:
	directions.append("horizontal")
	if y_range > 0.4:
	directions.append("vertical")

	return directions

	def _identify_functional_zones(self, detected_objects: List[Dict], scene_type: str) -> Dict:
	"""
	Identify functional zones within the scene with improved detection for different viewpoints
	and cultural contexts.

	Args:
	detected_objects: List of detected objects
	scene_type: Identified scene type

	Returns:
	Dictionary of functional zones with their descriptions
	"""
	# Group objects by category and region
	category_regions = {}

	if not getattr(self, 'enable_landmark', True):
	detected_objects = [obj for obj in detected_objects if not obj.get("is_landmark", False)]

	# 過濾地標相關場景類型
	if scene_type in ["tourist_landmark", "natural_landmark", "historical_monument"]:
	scene_type = "city_street"

	# MODIFIED: Smart threshold evaluation instead of fixed values
	should_identify = self._evaluate_zone_identification_feasibility(detected_objects, scene_type)

	if not should_identify:
	return {}

	# MODIFIED: Build category_regions mapping (was missing in original)
	for obj in detected_objects:
	category = self._categorize_object(obj)
	if not category:
	continue

	if category not in category_regions:
	category_regions[category] = {}

	region = obj.get("region", "center")
	if region not in category_regions[category]:
	category_regions[category][region] = []

	category_regions[category][region].append(obj)

	# Identify zones based on object groupings
	zones = {}

	# Detect viewpoint to adjust zone identification strategy
	viewpoint = self._detect_scene_viewpoint(detected_objects)

	# Choose appropriate zone identification strategy based on scene type and viewpoint
	if scene_type in ["living_room", "bedroom", "dining_area", "kitchen", "office_workspace", "meeting_room"]:
	# Indoor scenes
	zones.update(self._identify_indoor_zones(category_regions, detected_objects, scene_type))
	elif scene_type in ["city_street", "parking_lot", "park_area"]:
	# Outdoor general scenes
	zones.update(self._identify_outdoor_general_zones(category_regions, detected_objects, scene_type))
	elif "aerial" in scene_type or viewpoint == "aerial":
	# Aerial viewpoint scenes
	zones.update(self._identify_aerial_view_zones(category_regions, detected_objects, scene_type))
	elif "asian" in scene_type:
	# Asian cultural context scenes
	zones.update(self._identify_asian_cultural_zones(category_regions, detected_objects, scene_type))
	elif scene_type == "urban_intersection":
	# Specific urban intersection logic
	zones.update(self._identify_intersection_zones(category_regions, detected_objects, viewpoint))
	elif scene_type == "financial_district":
	# Financial district specific logic
	zones.update(self._identify_financial_district_zones(category_regions, detected_objects))
	elif scene_type == "upscale_dining":
	# Upscale dining specific logic
	zones.update(self._identify_upscale_dining_zones(category_regions, detected_objects))
	elif scene_type == "tourist_landmark" or "landmark" in scene_type:
	# 處理地標場景類型
	landmark_objects = [obj for obj in detected_objects if obj.get("is_landmark", False)]
	if landmark_objects:
	landmark_zones = self._identify_landmark_zones(landmark_objects)
	zones.update(landmark_zones)
	else:
	# Default zone identification for other scene types
	zones.update(self._identify_default_zones(category_regions, detected_objects))

	# 檢查是否有地標物體但場景類型不是地標類型
	if scene_type != "tourist_landmark" and "landmark" not in scene_type:
	landmark_objects = [obj for obj in detected_objects if obj.get("is_landmark", False)]
	if landmark_objects:
	# 添加地標功能區，但不覆蓋已有的功能區
	landmark_zones = self._identify_landmark_zones(landmark_objects)
	# 確保地標區域不會覆蓋已識別的其他重要功能區
	for zone_id, zone_info in landmark_zones.items():
	if zone_id not in zones:
	zones[zone_id] = zone_info

	# MODIFIED: Enhanced fallback strategy - try simplified identification if no zones found
	if not zones:
	zones.update(self._identify_default_zones(category_regions, detected_objects))

	# Final fallback: create basic zones from high-confidence objects
	if not zones:
	zones.update(self._create_basic_zones_from_objects(detected_objects, scene_type))

	return zones

	def _identify_core_objects_for_scene(self, detected_objects: List[Dict], scene_type: str) -> List[Dict]:
	"""
	Identify core objects that define a particular scene type.

	Args:
	detected_objects: List of detected objects
	scene_type: Scene type

	Returns:
	List of core objects for the scene
	"""
	core_objects = []

	scene_core_mapping = {
	"bedroom": [59], # bed
	"kitchen": [68, 69, 71, 72], # microwave, oven, sink, refrigerator
	"living_room": [57, 58, 62], # sofa, chair, tv
	"dining_area": [60, 46, 47], # dining table, fork, knife
	"office_workspace": [63, 64, 66, 73] # laptop, mouse, keyboard, book
	}

	if scene_type in scene_core_mapping:
	core_class_ids = scene_core_mapping[scene_type]
	for obj in detected_objects:
	if obj["class_id"] in core_class_ids and obj.get("confidence", 0) >= 0.4:
	core_objects.append(obj)

	return core_objects

	def _get_object_categories(self, detected_objects: List[Dict]) -> set:
	"""Get unique object categories from detected objects."""
	object_categories = set()
	for obj in detected_objects:
	category = self._categorize_object(obj)
	if category:
	object_categories.add(category)
	return object_categories

	def _create_basic_zones_from_objects(self, detected_objects: List[Dict], scene_type: str) -> Dict:
	"""
	Create basic functional zones from individual high-confidence objects.
	This is a fallback when standard zone identification fails.

	Args:
	detected_objects: List of detected objects
	scene_type: Scene type

	Returns:
	Dictionary of basic zones
	"""
	zones = {}

	# Focus on high-confidence objects
	high_conf_objects = [obj for obj in detected_objects if obj.get("confidence", 0) >= 0.6]

	if not high_conf_objects:
	high_conf_objects = detected_objects # Fallback to all objects

	# Create zones based on individual important objects
	for i, obj in enumerate(high_conf_objects[:3]): # Limit to top 3 objects
	class_name = obj["class_name"]
	region = obj.get("region", "center")

	# Create descriptive zone based on object type
	zone_description = self._get_basic_zone_description(class_name, scene_type)

	if zone_description:
	zones[f"functional_area_{i+1}"] = {
	"region": region,
	"objects": [class_name],
	"description": zone_description
	}

	return zones

	def _get_basic_zone_description(self, class_name: str, scene_type: str) -> str:
	"""Generate basic zone description based on object and scene type."""

	# Object-specific descriptions
	descriptions = {
	"bed": "Sleeping and rest area",
	"sofa": "Seating and relaxation area",
	"chair": "Seating area",
	"dining table": "Dining and meal area",
	"tv": "Entertainment and media area",
	"laptop": "Work and computing area",
	"potted plant": "Decorative and green space area",
	"refrigerator": "Food storage and kitchen area",
	"car": "Vehicle and transportation area",
	"person": "Activity and social area"
	}

	return descriptions.get(class_name, f"Functional area with {class_name}")

	def _categorize_object(self, obj: Dict) -> str:
	"""
	Categorize detected objects into functional categories for zone identification.
	"""
	class_id = obj.get("class_id", -1)
	class_name = obj.get("class_name", "").lower()

	# Use existing category mapping if available
	if hasattr(self, 'OBJECT_CATEGORIES') and self.OBJECT_CATEGORIES:
	for category, ids in self.OBJECT_CATEGORIES.items():
	if class_id in ids:
	return category

	# Fallback categorization based on class names for common COCO classes
	furniture_items = ["chair", "couch", "bed", "dining table", "toilet"]
	plant_items = ["potted plant"]
	electronic_items = ["tv", "laptop", "mouse", "remote", "keyboard", "cell phone"]
	vehicle_items = ["bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat"]
	person_items = ["person"]
	kitchen_items = ["bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
	"banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog",
	"pizza", "donut", "cake", "refrigerator", "oven", "toaster", "sink", "microwave"]
	sports_items = ["frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
	"baseball glove", "skateboard", "surfboard", "tennis racket"]
	personal_items = ["handbag", "tie", "suitcase", "umbrella", "backpack"]

	if any(item in class_name for item in furniture_items):
	return "furniture"
	elif any(item in class_name for item in plant_items):
	return "plant"
	elif any(item in class_name for item in electronic_items):
	return "electronics"
	elif any(item in class_name for item in vehicle_items):
	return "vehicle"
	elif any(item in class_name for item in person_items):
	return "person"
	elif any(item in class_name for item in kitchen_items):
	return "kitchen_items"
	elif any(item in class_name for item in sports_items):
	return "sports"
	elif any(item in class_name for item in personal_items):
	return "personal_items"
	else:
	return "misc"

	def _evaluate_zone_identification_feasibility(self, detected_objects: List[Dict], scene_type: str) -> bool:
	"""
	基於物件關聯性和分布特徵的彈性可行性評估
	"""
	if len(detected_objects) < 2:
	return False

	# 計算不同置信度層級的物件分布
	high_conf_objects = [obj for obj in detected_objects if obj.get("confidence", 0) >= 0.6]
	medium_conf_objects = [obj for obj in detected_objects if obj.get("confidence", 0) >= 0.4]

	# 基礎條件：至少需要一定數量的可信物件
	if len(medium_conf_objects) < 2:
	return False

	# evalure relationships
	functional_relationships = self._calculate_functional_relationships(detected_objects)

	# 評估space的分布多樣性
	spatial_diversity = self._calculate_spatial_diversity(detected_objects)

	# 綜合評分機制
	feasibility_score = 0

	# 物件數量的貢獻值（權重30%）
	object_count_score = min(len(detected_objects) / 5.0, 1.0) * 0.3

	# 信心度質量貢獻（權重25%）
	confidence_score = len(high_conf_objects) / max(len(detected_objects), 1) * 0.25

	# 功能關聯性貢獻（權重25%）
	relationship_score = functional_relationships * 0.25

	# space多樣性貢獻（權重20%）
	diversity_score = spatial_diversity * 0.20

	feasibility_score = object_count_score + confidence_score + relationship_score + diversity_score

	# 動態閾值：基於場景複雜度調整
	complexity_threshold = self._get_complexity_threshold(scene_type)

	return feasibility_score >= complexity_threshold

	def _calculate_functional_relationships(self, detected_objects: List[Dict]) -> float:
	"""
	計算物件間的功能關聯性評分
	基於常見的物件組合模式評估功能相關性
	"""
	relationship_pairs = {
	# 家具組合關係
	frozenset([56, 60]): 1.0, # 椅子+桌子 (dining/work area)
	frozenset([57, 62]): 0.9, # 沙發+電視 (living area)
	frozenset([59, 58]): 0.7, # 床+植物 (bedroom decor)

	# 工作相關組合
	frozenset([63, 66]): 0.9, # 筆電+鍵盤 (workspace)
	frozenset([63, 64]): 0.8, # 筆電+滑鼠 (workspace)
	frozenset([60, 63]): 0.8, # 桌子+筆電 (workspace)

	# 廚房相關組合
	frozenset([68, 72]): 0.9, # 微波爐+冰箱 (kitchen)
	frozenset([69, 71]): 0.8, # 烤箱+水槽 (kitchen)

	# 用餐相關組合
	frozenset([60, 40]): 0.8, # 桌子+酒杯 (dining)
	frozenset([60, 41]): 0.8, # 桌子+杯子 (dining)
	frozenset([56, 40]): 0.7, # 椅子+酒杯 (dining)

	# 交通相關組合
	frozenset([2, 9]): 0.8, # 汽車+交通燈 (traffic)
	frozenset([0, 9]): 0.7, # 行人+交通燈 (crosswalk)
	}

	detected_class_ids = set(obj["class_id"] for obj in detected_objects)
	max_possible_score = 0
	actual_score = 0

	for pair, score in relationship_pairs.items():
	max_possible_score += score
	if pair.issubset(detected_class_ids):
	actual_score += score

	return actual_score / max_possible_score if max_possible_score > 0 else 0

	def _calculate_spatial_diversity(self, detected_objects: List[Dict]) -> float:
	"""
	計算物件空間分布的多樣性
	評估物件是否分散在不同區域，避免所有物件集中在單一區域
	"""
	regions = set(obj.get("region", "center") for obj in detected_objects)
	unique_regions = len(regions)

	return min(unique_regions / 2.0, 1.0)

	def _get_complexity_threshold(self, scene_type: str) -> float:
	"""
	可根據場景類型返回適當的複雜度閾值
	平衡不同場景的區域劃分需求
	"""
	# 較簡單場景需要較高分數才進行區域劃分
	simple_scenes = ["bedroom", "bathroom", "closet"]
	# 較複雜場景可以較低分數進行區域劃分
	complex_scenes = ["living_room", "kitchen", "office_workspace", "dining_area"]

	if scene_type in simple_scenes:
	return 0.65 # 較高閾值，避免過度細分
	elif scene_type in complex_scenes:
	return 0.45 # 較低閾值，允許合理劃分
	else:
	return 0.55 # 中等閾值，平衡策略

	def _identify_indoor_zones(self, category_regions: Dict, detected_objects: List[Dict], scene_type: str) -> Dict:
	"""
	平衡化的室內功能區域識別
	採用通用的物件關聯性分析，避免場景特定的硬編碼
	"""
	zones = {}

	# 辨識到主要功能區域（基於物件關聯性而非場景類型）
	primary_zone = self._identify_primary_functional_area(detected_objects)
	if primary_zone:
	zones["primary_area"] = primary_zone

	# 只有明確證據且物件數量足夠時創建次要功能區域
	if len(zones) >= 1 and len(detected_objects) >= 6:
	secondary_zone = self._identify_secondary_functional_area(detected_objects, zones)
	if secondary_zone:
	zones["secondary_area"] = secondary_zone

	return zones

	def _identify_primary_functional_area(self, detected_objects: List[Dict]) -> Dict:
	"""
	辨識主要功能區域，基於最強的物件關聯性組合
	採用通用邏輯處理各種室內場景
	"""
	# 用餐區域檢測（桌椅組合）
	dining_area = self._detect_functional_combination(
	detected_objects,
	primary_objects=[60], # dining table
	supporting_objects=[56, 40, 41, 42, 43], # chair, wine glass, cup, fork, knife
	min_supporting=2,
	description_template="Dining area with table and seating arrangement"
	)
	if dining_area:
	return dining_area

	# 休息區域檢測（沙發電視組合或床）
	seating_area = self._detect_functional_combination(
	detected_objects,
	primary_objects=[57, 59], # sofa, bed
	supporting_objects=[62, 58, 56], # tv, potted plant, chair
	min_supporting=1,
	description_template="Seating and relaxation area"
	)
	if seating_area:
	return seating_area

	# 工作區域檢測（電子設備與家具組合）
	work_area = self._detect_functional_combination(
	detected_objects,
	primary_objects=[63, 66], # laptop, keyboard
	supporting_objects=[60, 56, 64], # dining table, chair, mouse
	min_supporting=2,
	description_template="Workspace area with electronics and furniture"
	)
	if work_area:
	return work_area

	return None

	def _identify_secondary_functional_area(self, detected_objects: List[Dict], existing_zones: Dict) -> Dict:
	"""
	識別次要功能區域，避免與主要區域重疊
	"""
	# 獲取已使用的區域
	used_regions = set(zone["region"] for zone in existing_zones.values())

	# 裝飾區域檢測（植物集中區域）
	decorative_area = self._detect_functional_combination(
	detected_objects,
	primary_objects=[58], # potted plant
	supporting_objects=[75], # vase
	min_supporting=0,
	min_primary=3, # 至少需要3個植物
	description_template="Decorative area with plants and ornamental items",
	exclude_regions=used_regions
	)
	if decorative_area:
	return decorative_area

	# 儲存區域檢測（廚房電器組合）
	storage_area = self._detect_functional_combination(
	detected_objects,
	primary_objects=[72, 68, 69], # refrigerator, microwave, oven
	supporting_objects=[71], # sink
	min_supporting=0,
	min_primary=2,
	description_template="Kitchen appliance and storage area",
	exclude_regions=used_regions
	)
	if storage_area:
	return storage_area

	return None

	def _detect_functional_combination(self, detected_objects: List[Dict], primary_objects: List[int],
	supporting_objects: List[int], min_supporting: int,
	description_template: str, min_primary: int = 1,
	exclude_regions: set = None) -> Dict:
	"""
	通用的功能組合檢測方法
	基於主要物件和支持物件的組合判斷功能區域

	Args:
	detected_objects: 檢測到的物件列表
	primary_objects: 主要物件的class_id列表
	supporting_objects: 支持物件的class_id列表
	min_supporting: 最少需要的支持物件數量
	description_template: 描述模板
	min_primary: 最少需要的主要物件數量
	exclude_regions: 需要排除的區域集合

	Returns:
	Dict: 功能區域資訊，如果不符合條件則返回None
	"""
	if exclude_regions is None:
	exclude_regions = set()

	# 收集主要物件
	primary_objs = [obj for obj in detected_objects
	if obj["class_id"] in primary_objects and obj.get("confidence", 0) >= 0.4]

	# 收集支持物件
	supporting_objs = [obj for obj in detected_objects
	if obj["class_id"] in supporting_objects and obj.get("confidence", 0) >= 0.4]

	# 檢查是否滿足最少數量要求
	if len(primary_objs) < min_primary or len(supporting_objs) < min_supporting:
	return None

	# 按區域組織物件
	region_combinations = {}
	all_relevant_objs = primary_objs + supporting_objs

	for obj in all_relevant_objs:
	region = obj["region"]

	# 排除指定區域
	if region in exclude_regions:
	continue

	if region not in region_combinations:
	region_combinations[region] = {"primary": [], "supporting": [], "all": []}

	region_combinations[region]["all"].append(obj)

	if obj["class_id"] in primary_objects:
	region_combinations[region]["primary"].append(obj)
	else:
	region_combinations[region]["supporting"].append(obj)

	# 找到最佳區域組合
	best_region = None
	best_score = 0

	for region, objs in region_combinations.items():
	# 計算該區域的評分
	primary_count = len(objs["primary"])
	supporting_count = len(objs["supporting"])

	# 必須滿足最低要求
	if primary_count < min_primary or supporting_count < min_supporting:
	continue

	# 計算組合評分（主要物件權重較高）
	score = primary_count * 2 + supporting_count

	if score > best_score:
	best_score = score
	best_region = region

	if best_region is None:
	return None

	best_combination = region_combinations[best_region]
	all_objects = [obj["class_name"] for obj in best_combination["all"]]

	return {
	"region": best_region,
	"objects": all_objects,
	"description": description_template
	}

	def _identify_intersection_zones(self, category_regions: Dict, detected_objects: List[Dict], viewpoint: str) -> Dict:
	"""
	Identify functional zones for urban intersections with enhanced spatial awareness.

	Args:
	category_regions: Objects grouped by category and region
	detected_objects: List of detected objects
	viewpoint: Detected viewpoint

	Returns:
	Dict: Refined intersection functional zones
	"""
	zones = {}

	# Get pedestrians, vehicles and traffic signals
	pedestrian_objs = [obj for obj in detected_objects if obj["class_id"] == 0]
	vehicle_objs = [obj for obj in detected_objects if obj["class_id"] in [1, 2, 3, 5, 7]] # bicycle, car, motorcycle, bus, truck
	traffic_light_objs = [obj for obj in detected_objects if obj["class_id"] == 9]

	# Create distribution maps for better spatial understanding
	regions_distribution = self._create_distribution_map(detected_objects)

	# Analyze pedestrian crossing patterns
	crossing_zones = self._analyze_crossing_patterns(pedestrian_objs, traffic_light_objs, regions_distribution)
	zones.update(crossing_zones)

	# Analyze vehicle traffic zones with directional awareness
	traffic_zones = self._analyze_traffic_zones(vehicle_objs, regions_distribution)
	zones.update(traffic_zones)

	# Identify traffic control zones based on signal placement
	if traffic_light_objs:
	# Group traffic lights by region for better organization
	signal_regions = {}
	for obj in traffic_light_objs:
	region = obj["region"]
	if region not in signal_regions:
	signal_regions[region] = []
	signal_regions[region].append(obj)

	# Create traffic control zones for each region with signals
	for idx, (region, signals) in enumerate(signal_regions.items()):
	# Check if this region has a directional name
	direction = self._get_directional_description(region)

	zones[f"traffic_control_zone_{idx+1}"] = {
	"region": region,
	"objects": ["traffic light"] * len(signals),
	"description": f"Traffic control area with {len(signals)} traffic signals" +
	(f" in {direction} area" if direction else "")
	}

	return zones

	def _identify_landmark_zones(self, landmark_objects: List[Dict]) -> Dict:
	"""
	識別與地標相關的功能區域

	Args:
	landmark_objects: 被識別為地標的物體列表

	Returns:
	Dict: 地標相關的功能區域
	"""
	landmark_zones = {}

	if not landmark_objects:
	print("Warning: No landmark objects provided to _identify_landmark_zones")
	return landmark_zones

	try:
	for i, landmark in enumerate(landmark_objects):
	if not isinstance(landmark, dict):
	print(f"Warning: Landmark object at index {i} is not a dictionary: {type(landmark)}")
	continue

	landmark_id = landmark.get("landmark_id")
	if not landmark_id:
	print(f"Warning: Missing landmark_id for landmark at index {i}")
	landmark_id = f"unknown_landmark_{i}"

	landmark_name = landmark.get("class_name", "Landmark")
	landmark_type = landmark.get("landmark_type", "architectural")
	landmark_region = landmark.get("region", "middle_center")

	# 為地標創建主要觀景區
	zone_id = f"landmark_zone_{i+1}"
	zone_name = f"{landmark_name} Viewing Area"

	# 根據地標類型調整描述
	if landmark_type == "natural":
	zone_description = f"Scenic viewpoint for observing {landmark_name}, a notable natural landmark in {landmark.get('location', 'this area')}."
	primary_function = "Nature observation and photography"
	elif landmark_type == "monument":
	zone_description = f"Viewing area around {landmark_name}, a significant monument in {landmark.get('location', 'this area')}."
	primary_function = "Historical appreciation and cultural tourism"
	else: # architectural
	zone_description = f"Area centered around {landmark_name}, where visitors can observe and appreciate this iconic structure in {landmark.get('location', 'this area')}."
	primary_function = "Architectural tourism and photography"

	# 確定與地標相關的物體
	related_objects = ["person", "camera", "cell phone", "backpack"]

	# 創建功能區域
	landmark_zones[zone_id] = {
	"name": zone_name,
	"description": zone_description,
	"objects": ["landmark"] + [obj for obj in related_objects if obj in [o.get("class_name") for o in landmark_objects]],
	"region": landmark_region,
	"primary_function": primary_function
	}

	# 如果有建造年份信息，加到描述中
	if "year_built" in landmark:
	landmark_zones[zone_id]["description"] += f" Built in {landmark['year_built']}."

	# 如果有建築風格信息，加到描述中
	if "architectural_style" in landmark:
	landmark_zones[zone_id]["description"] += f" Features {landmark['architectural_style']} architectural style."

	# 如果有重要性信息，加到描述中
	if "significance" in landmark:
	landmark_zones[zone_id]["description"] += f" {landmark['significance']}."

	try:
	# 創建照相區
	photo_region = landmark_region # 默認與地標在同一區域

	# 根據地標位置調整照相區位置（地標前方通常是照相區）
	region_mapping = {
	"top_left": "bottom_right",
	"top_center": "bottom_center",
	"top_right": "bottom_left",
	"middle_left": "middle_right",
	"middle_center": "bottom_center",
	"middle_right": "middle_left",
	"bottom_left": "top_right",
	"bottom_center": "top_center",
	"bottom_right": "top_left"
	}

	if landmark_region in region_mapping:
	photo_region = region_mapping[landmark_region]

	landmark_zones[f"photo_spot_{i+1}"] = {
	"name": f"{landmark_name} Photography Spot",
	"description": f"Popular position for photographing {landmark_name} with optimal viewing angle.",
	"objects": ["camera", "person", "cell phone"],
	"region": photo_region,
	"primary_function": "Tourist photography"
	}
	except Exception as e:
	print(f"Error creating photo spot zone: {e}")

	try:
	# 如果是著名地標，可能有紀念品販售區
	if landmark.get("confidence", 0) > 0.7: # 高置信度地標更可能有紀念品區
	# 根據地標位置找到適合的紀念品區位置（通常在地標附近但不直接在地標上）
	adjacent_regions = {
	"top_left": ["top_center", "middle_left"],
	"top_center": ["top_left", "top_right"],
	"top_right": ["top_center", "middle_right"],
	"middle_left": ["top_left", "bottom_left"],
	"middle_center": ["middle_left", "middle_right"],
	"middle_right": ["top_right", "bottom_right"],
	"bottom_left": ["middle_left", "bottom_center"],
	"bottom_center": ["bottom_left", "bottom_right"],
	"bottom_right": ["bottom_center", "middle_right"]
	}

	if landmark_region in adjacent_regions:
	souvenir_region = adjacent_regions[landmark_region][0] # 選擇第一個相鄰區域

	landmark_zones[f"souvenir_area_{i+1}"] = {
	"name": f"{landmark_name} Souvenir Area",
	"description": f"Area where visitors can purchase souvenirs and memorabilia related to {landmark_name}.",
	"objects": ["person", "handbag", "backpack"],
	"region": souvenir_region,
	"primary_function": "Tourism commerce"
	}
	except Exception as e:
	print(f"Error creating souvenir area zone: {e}")

	except Exception as e:
	print(f"Error in _identify_landmark_zones: {e}")
	import traceback
	traceback.print_exc()

	return landmark_zones

	def _analyze_crossing_patterns(self, pedestrians: List[Dict], traffic_lights: List[Dict],
	region_distribution: Dict) -> Dict:
	"""
	Analyze pedestrian crossing patterns to identify crosswalk zones.

	Args:
	pedestrians: List of pedestrian objects
	traffic_lights: List of traffic light objects
	region_distribution: Distribution of objects by region

	Returns:
	Dict: Identified crossing zones
	"""
	crossing_zones = {}

	if not pedestrians:
	return crossing_zones

	# Group pedestrians by region
	pedestrian_regions = {}
	for p in pedestrians:
	region = p["region"]
	if region not in pedestrian_regions:
	pedestrian_regions[region] = []
	pedestrian_regions[region].append(p)

	# Sort regions by pedestrian count to find main crossing areas
	sorted_regions = sorted(pedestrian_regions.items(), key=lambda x: len(x[1]), reverse=True)

	# Create crossing zones for regions with pedestrians
	for idx, (region, peds) in enumerate(sorted_regions[:2]): # Focus on top 2 regions
	# Check if there are traffic lights nearby to indicate a crosswalk
	has_nearby_signals = any(t["region"] == region for t in traffic_lights)

	# Create crossing zone with descriptive naming
	zone_name = f"crossing_zone_{idx+1}"
	direction = self._get_directional_description(region)

	description = f"Pedestrian crossing area with {len(peds)} "
	description += "person" if len(peds) == 1 else "people"
	if direction:
	description += f" in {direction} direction"
	if has_nearby_signals:
	description += " near traffic signals"

	crossing_zones[zone_name] = {
	"region": region,
	"objects": ["pedestrian"] * len(peds),
	"description": description
	}

	return crossing_zones

	def _analyze_traffic_zones(self, vehicles: List[Dict], region_distribution: Dict) -> Dict:
	"""
	Analyze vehicle distribution to identify traffic zones with directional awareness.

	Args:
	vehicles: List of vehicle objects
	region_distribution: Distribution of objects by region

	Returns:
	Dict: Identified traffic zones
	"""
	traffic_zones = {}

	if not vehicles:
	return traffic_zones

	# 把運輸工具歸成一區
	vehicle_regions = {}
	for v in vehicles:
	region = v["region"]
	if region not in vehicle_regions:
	vehicle_regions[region] = []
	vehicle_regions[region].append(v)

	# Create traffic zones for regions with vehicles
	main_traffic_region = max(vehicle_regions.items(), key=lambda x: len(x[1]), default=(None, []))

	if main_traffic_region[0] is not None:
	region = main_traffic_region[0]
	vehicles_in_region = main_traffic_region[1]

	# Get a list of vehicle types for description
	vehicle_types = [v["class_name"] for v in vehicles_in_region]
	unique_types = list(set(vehicle_types))

	# Get directional description
	direction = self._get_directional_description(region)

	# Create descriptive zone
	traffic_zones["vehicle_zone"] = {
	"region": region,
	"objects": vehicle_types,
	"description": f"Vehicle traffic area with {', '.join(unique_types[:3])}" +
	(f" in {direction} area" if direction else "")
	}

	# If vehicles are distributed across multiple regions, create secondary zones
	if len(vehicle_regions) > 1:
	# Get second most populated region
	sorted_regions = sorted(vehicle_regions.items(), key=lambda x: len(x[1]), reverse=True)
	if len(sorted_regions) > 1:
	second_region, second_vehicles = sorted_regions[1]
	direction = self._get_directional_description(second_region)
	vehicle_types = [v["class_name"] for v in second_vehicles]
	unique_types = list(set(vehicle_types))

	traffic_zones["secondary_vehicle_zone"] = {
	"region": second_region,
	"objects": vehicle_types,
	"description": f"Secondary traffic area with {', '.join(unique_types[:2])}" +
	(f" in {direction} direction" if direction else "")
	}

	return traffic_zones

	def _get_directional_description(self, region: str) -> str:
	"""
	把方向轉換成方位(東西南北)

	Args:
	region: Region name from the grid

	Returns:
	str: Directional description
	"""
	if "top" in region and "left" in region:
	return "northwest"
	elif "top" in region and "right" in region:
	return "northeast"
	elif "bottom" in region and "left" in region:
	return "southwest"
	elif "bottom" in region and "right" in region:
	return "southeast"
	elif "top" in region:
	return "north"
	elif "bottom" in region:
	return "south"
	elif "left" in region:
	return "west"
	elif "right" in region:
	return "east"
	else:
	return "central"

	def _create_distribution_map(self, detected_objects: List[Dict]) -> Dict:
	"""
	Create a distribution map of objects across regions for spatial analysis.

	Args:
	detected_objects: List of detected objects

	Returns:
	Dict: Distribution map of objects by region and class
	"""
	distribution = {}

	# Initialize all regions
	for region in self.regions.keys():
	distribution[region] = {
	"total": 0,
	"objects": {},
	"density": 0
	}

	# Populate the distribution
	for obj in detected_objects:
	region = obj["region"]
	class_id = obj["class_id"]
	class_name = obj["class_name"]

	distribution[region]["total"] += 1

	if class_id not in distribution[region]["objects"]:
	distribution[region]["objects"][class_id] = {
	"name": class_name,
	"count": 0,
	"positions": []
	}

	distribution[region]["objects"][class_id]["count"] += 1

	# Store position for spatial relationship analysis
	if "normalized_center" in obj:
	distribution[region]["objects"][class_id]["positions"].append(obj["normalized_center"])

	# Calculate object density for each region
	for region, data in distribution.items():
	# Assuming all regions are equal size in the grid
	data["density"] = data["total"] / 1

	return distribution

	def _identify_asian_cultural_zones(self, category_regions: Dict, detected_objects: List[Dict], scene_type: str) -> Dict:
	"""
	Identify functional zones for scenes with Asian cultural context.

	Args:
	category_regions: Objects grouped by category and region
	detected_objects: List of detected objects
	scene_type: Specific scene type

	Returns:
	Dict: Asian cultural functional zones
	"""
	zones = {}

	# Identify storefront zone
	storefront_items = []
	storefront_regions = {}

	# Since storefronts aren't directly detectable, infer from context
	# For example, look for regions with signs, people, and smaller objects
	sign_regions = set()
	for obj in detected_objects:
	if obj["class_id"] == 0: # Person
	region = obj["region"]
	if region not in storefront_regions:
	storefront_regions[region] = []
	storefront_regions[region].append(obj)

	# Add regions with people as potential storefront areas
	sign_regions.add(region)

	# Use the areas with most people as storefront zones
	if storefront_regions:
	main_storefront_regions = sorted(storefront_regions.items(),
	key=lambda x: len(x[1]),
	reverse=True)[:2] # Top 2 regions

	for idx, (region, objs) in enumerate(main_storefront_regions):
	zones[f"commercial_zone_{idx+1}"] = {
	"region": region,
	"objects": [obj["class_name"] for obj in objs],
	"description": f"Asian commercial storefront with pedestrian activity"
	}

	# Identify pedestrian pathway - enhanced to better detect linear pathways
	pathway_items = []
	pathway_regions = {}

	# Extract people for pathway analysis
	people_objs = [obj for obj in detected_objects if obj["class_id"] == 0]

	# Analyze if people form a line (typical of shopping streets)
	people_positions = [obj["normalized_center"] for obj in people_objs]

	structured_path = False
	if len(people_positions) >= 3:
	# Check if people are arranged along a similar y-coordinate (horizontal path)
	y_coords = [pos[1] for pos in people_positions]
	y_mean = sum(y_coords) / len(y_coords)
	y_variance = sum((y - y_mean)**2 for y in y_coords) / len(y_coords)

	horizontal_path = y_variance < 0.05 # Low variance indicates horizontal alignment

	# Check if people are arranged along a similar x-coordinate (vertical path)
	x_coords = [pos[0] for pos in people_positions]
	x_mean = sum(x_coords) / len(x_coords)
	x_variance = sum((x - x_mean)**2 for x in x_coords) / len(x_coords)

	vertical_path = x_variance < 0.05 # Low variance indicates vertical alignment

	structured_path = horizontal_path or vertical_path
	path_direction = "horizontal" if horizontal_path else "vertical" if vertical_path else "meandering"

	# Collect pathway objects (people, bicycles, motorcycles in middle area)
	for obj in detected_objects:
	if obj["class_id"] in [0, 1, 3]: # Person, bicycle, motorcycle
	y_pos = obj["normalized_center"][1]
	# Group by vertical position (middle of image likely pathway)
	if 0.25 <= y_pos <= 0.75:
	region = obj["region"]
	if region not in pathway_regions:
	pathway_regions[region] = []
	pathway_regions[region].append(obj)
	pathway_items.append(obj["class_name"])

	if pathway_items:
	path_desc = "Pedestrian walkway with people moving through the commercial area"
	if structured_path:
	path_desc = f"{path_direction.capitalize()} pedestrian walkway with organized foot traffic"

	zones["pedestrian_pathway"] = {
	"region": "middle_center", # Assumption: pathway often in middle
	"objects": list(set(pathway_items)),
	"description": path_desc
	}

	# Identify vendor zone (small stalls/shops - inferred from context)
	has_small_objects = any(obj["class_id"] in [24, 26, 39, 41] for obj in detected_objects) # bags, bottles, cups
	has_people = any(obj["class_id"] == 0 for obj in detected_objects)

	if has_small_objects and has_people:
	# Likely vendor areas are where people and small objects cluster
	small_obj_regions = {}

	for obj in detected_objects:
	if obj["class_id"] in [24, 26, 39, 41, 67]: # bags, bottles, cups, phones
	region = obj["region"]
	if region not in small_obj_regions:
	small_obj_regions[region] = []
	small_obj_regions[region].append(obj)

	if small_obj_regions:
	main_vendor_region = max(small_obj_regions.items(),
	key=lambda x: len(x[1]),
	default=(None, []))

	if main_vendor_region[0] is not None:
	vendor_items = [obj["class_name"] for obj in main_vendor_region[1]]
	zones["vendor_zone"] = {
	"region": main_vendor_region[0],
	"objects": list(set(vendor_items)),
	"description": "Vendor or market stall area with small merchandise"
	}

	# For night markets, identify illuminated zones
	if scene_type == "asian_night_market":
	# Night markets typically have bright spots for food stalls
	# This would be enhanced with lighting analysis integration
	zones["food_stall_zone"] = {
	"region": "middle_center",
	"objects": ["inferred food stalls"],
	"description": "Food stall area typical of Asian night markets"
	}

	return zones

	def _identify_upscale_dining_zones(self, category_regions: Dict, detected_objects: List[Dict]) -> Dict:
	"""
	Identify functional zones for upscale dining settings.

	Args:
	category_regions: Objects grouped by category and region
	detected_objects: List of detected objects

	Returns:
	Dict: Upscale dining functional zones
	"""
	zones = {}

	# Identify dining table zone
	dining_items = []
	dining_regions = {}

	for obj in detected_objects:
	if obj["class_id"] in [40, 41, 42, 43, 44, 45, 60]: # Wine glass, cup, fork, knife, spoon, bowl, table
	region = obj["region"]
	if region not in dining_regions:
	dining_regions[region] = []
	dining_regions[region].append(obj)
	dining_items.append(obj["class_name"])

	if dining_items:
	main_dining_region = max(dining_regions.items(),
	key=lambda x: len(x[1]),
	default=(None, []))

	if main_dining_region[0] is not None:
	zones["formal_dining_zone"] = {
	"region": main_dining_region[0],
	"objects": list(set(dining_items)),
	"description": f"Formal dining area with {', '.join(list(set(dining_items))[:3])}"
	}

	# Identify decorative zone with enhanced detection
	decor_items = []
	decor_regions = {}

	# Look for decorative elements (vases, wine glasses, unused dishes)
	for obj in detected_objects:
	if obj["class_id"] in [75, 40]: # Vase, wine glass
	region = obj["region"]
	if region not in decor_regions:
	decor_regions[region] = []
	decor_regions[region].append(obj)
	decor_items.append(obj["class_name"])

	if decor_items:
	main_decor_region = max(decor_regions.items(),
	key=lambda x: len(x[1]),
	default=(None, []))

	if main_decor_region[0] is not None:
	zones["decorative_zone"] = {
	"region": main_decor_region[0],
	"objects": list(set(decor_items)),
	"description": f"Decorative area with {', '.join(list(set(decor_items)))}"
	}

	# Identify seating arrangement zone
	chairs = [obj for obj in detected_objects if obj["class_id"] == 56] # chairs
	if len(chairs) >= 2:
	chair_regions = {}
	for obj in chairs:
	region = obj["region"]
	if region not in chair_regions:
	chair_regions[region] = []
	chair_regions[region].append(obj)

	if chair_regions:
	main_seating_region = max(chair_regions.items(),
	key=lambda x: len(x[1]),
	default=(None, []))

	if main_seating_region[0] is not None:
	zones["dining_seating_zone"] = {
	"region": main_seating_region[0],
	"objects": ["chair"] * len(main_seating_region[1]),
	"description": f"Formal dining seating arrangement with {len(main_seating_region[1])} chairs"
	}

	# Identify serving area (if different from dining area)
	serving_items = []
	serving_regions = {}

	# Serving areas might have bottles, bowls, containers
	for obj in detected_objects:
	if obj["class_id"] in [39, 45]: # Bottle, bowl
	# Check if it's in a different region from the main dining table
	if "formal_dining_zone" in zones and obj["region"] != zones["formal_dining_zone"]["region"]:
	region = obj["region"]
	if region not in serving_regions:
	serving_regions[region] = []
	serving_regions[region].append(obj)
	serving_items.append(obj["class_name"])

	if serving_items:
	main_serving_region = max(serving_regions.items(),
	key=lambda x: len(x[1]),
	default=(None, []))

	if main_serving_region[0] is not None:
	zones["serving_zone"] = {
	"region": main_serving_region[0],
	"objects": list(set(serving_items)),
	"description": f"Serving or sideboard area with {', '.join(list(set(serving_items)))}"
	}

	return zones

	def _identify_financial_district_zones(self, category_regions: Dict, detected_objects: List[Dict]) -> Dict:
	"""
	Identify functional zones for financial district scenes.

	Args:
	category_regions: Objects grouped by category and region
	detected_objects: List of detected objects

	Returns:
	Dict: Financial district functional zones
	"""
	zones = {}

	# Identify traffic zone
	traffic_items = []
	traffic_regions = {}

	for obj in detected_objects:
	if obj["class_id"] in [1, 2, 3, 5, 6, 7, 9]: # Various vehicles and traffic lights
	region = obj["region"]
	if region not in traffic_regions:
	traffic_regions[region] = []
	traffic_regions[region].append(obj)
	traffic_items.append(obj["class_name"])

	if traffic_items:
	main_traffic_region = max(traffic_regions.items(),
	key=lambda x: len(x[1]),
	default=(None, []))

	if main_traffic_region[0] is not None:
	zones["traffic_zone"] = {
	"region": main_traffic_region[0],
	"objects": list(set(traffic_items)),
	"description": f"Urban traffic area with {', '.join(list(set(traffic_items))[:3])}"
	}

	# Building zones on the sides (inferred from scene context)
	# Enhanced to check if there are actual regions that might contain buildings
	# Check for regions without vehicles or pedestrians - likely building areas
	left_side_regions = ["top_left", "middle_left", "bottom_left"]
	right_side_regions = ["top_right", "middle_right", "bottom_right"]

	# Check left side
	left_building_evidence = True
	for region in left_side_regions:
	# If many vehicles or people in this region, less likely to be buildings
	vehicle_in_region = any(obj["region"] == region and obj["class_id"] in [1, 2, 3, 5, 7]
	for obj in detected_objects)
	people_in_region = any(obj["region"] == region and obj["class_id"] == 0
	for obj in detected_objects)

	if vehicle_in_region or people_in_region:
	left_building_evidence = False
	break

	# Check right side
	right_building_evidence = True
	for region in right_side_regions:
	# If many vehicles or people in this region, less likely to be buildings
	vehicle_in_region = any(obj["region"] == region and obj["class_id"] in [1, 2, 3, 5, 7]
	for obj in detected_objects)
	people_in_region = any(obj["region"] == region and obj["class_id"] == 0
	for obj in detected_objects)

	if vehicle_in_region or people_in_region:
	right_building_evidence = False
	break

	# Add building zones if evidence supports them
	if left_building_evidence:
	zones["building_zone_left"] = {
	"region": "middle_left",
	"objects": ["building"], # Inferred
	"description": "Tall buildings line the left side of the street"
	}

	if right_building_evidence:
	zones["building_zone_right"] = {
	"region": "middle_right",
	"objects": ["building"], # Inferred
	"description": "Tall buildings line the right side of the street"
	}

	# Identify pedestrian zone if people are present
	people_objs = [obj for obj in detected_objects if obj["class_id"] == 0]
	if people_objs:
	people_regions = {}
	for obj in people_objs:
	region = obj["region"]
	if region not in people_regions:
	people_regions[region] = []
	people_regions[region].append(obj)

	if people_regions:
	main_pedestrian_region = max(people_regions.items(),
	key=lambda x: len(x[1]),
	default=(None, []))

	if main_pedestrian_region[0] is not None:
	zones["pedestrian_zone"] = {
	"region": main_pedestrian_region[0],
	"objects": ["person"] * len(main_pedestrian_region[1]),
	"description": f"Pedestrian area with {len(main_pedestrian_region[1])} people navigating the financial district"
	}

	return zones

	def _identify_aerial_view_zones(self, category_regions: Dict, detected_objects: List[Dict], scene_type: str) -> Dict:
	"""
	Identify functional zones for scenes viewed from an aerial perspective.

	Args:
	category_regions: Objects grouped by category and region
	detected_objects: List of detected objects
	scene_type: Specific scene type

	Returns:
	Dict: Aerial view functional zones
	"""
	zones = {}

	# For aerial views, we focus on patterns and flows rather than specific zones

	# Identify pedestrian patterns
	people_objs = [obj for obj in detected_objects if obj["class_id"] == 0]
	if people_objs:
	# Convert positions to arrays for pattern analysis
	positions = np.array([obj["normalized_center"] for obj in people_objs])

	if len(positions) >= 3:
	# Calculate distribution metrics
	x_coords = positions[:, 0]
	y_coords = positions[:, 1]

	x_mean = np.mean(x_coords)
	y_mean = np.mean(y_coords)
	x_std = np.std(x_coords)
	y_std = np.std(y_coords)

	# Determine if people are organized in a linear pattern
	if x_std < 0.1 or y_std < 0.1:
	# Linear distribution along one axis
	pattern_direction = "vertical" if x_std < y_std else "horizontal"

	zones["pedestrian_pattern"] = {
	"region": "central",
	"objects": ["person"] * len(people_objs),
	"description": f"Aerial view shows a {pattern_direction} pedestrian movement pattern"
	}
	else:
	# More dispersed pattern
	zones["pedestrian_distribution"] = {
	"region": "wide",
	"objects": ["person"] * len(people_objs),
	"description": f"Aerial view shows pedestrians distributed across the area"
	}

	# Identify vehicle patterns for traffic analysis
	vehicle_objs = [obj for obj in detected_objects if obj["class_id"] in [1, 2, 3, 5, 6, 7]]
	if vehicle_objs:
	# Convert positions to arrays for pattern analysis
	positions = np.array([obj["normalized_center"] for obj in vehicle_objs])

	if len(positions) >= 2:
	# Calculate distribution metrics
	x_coords = positions[:, 0]
	y_coords = positions[:, 1]

	x_mean = np.mean(x_coords)
	y_mean = np.mean(y_coords)
	x_std = np.std(x_coords)
	y_std = np.std(y_coords)

	# Determine if vehicles are organized in lanes
	if x_std < y_std * 0.5:
	# Vehicles aligned vertically - indicates north-south traffic
	zones["vertical_traffic_flow"] = {
	"region": "central_vertical",
	"objects": [obj["class_name"] for obj in vehicle_objs[:5]],
	"description": "North-south traffic flow visible from aerial view"
	}
	elif y_std < x_std * 0.5:
	# Vehicles aligned horizontally - indicates east-west traffic
	zones["horizontal_traffic_flow"] = {
	"region": "central_horizontal",
	"objects": [obj["class_name"] for obj in vehicle_objs[:5]],
	"description": "East-west traffic flow visible from aerial view"
	}
	else:
	# Vehicles in multiple directions - indicates intersection
	zones["intersection_traffic"] = {
	"region": "central",
	"objects": [obj["class_name"] for obj in vehicle_objs[:5]],
	"description": "Multi-directional traffic at intersection visible from aerial view"
	}

	# For intersection specific aerial views, identify crossing patterns
	if "intersection" in scene_type:
	# Check for traffic signals
	traffic_light_objs = [obj for obj in detected_objects if obj["class_id"] == 9]
	if traffic_light_objs:
	zones["traffic_control_pattern"] = {
	"region": "intersection",
	"objects": ["traffic light"] * len(traffic_light_objs),
	"description": f"Intersection traffic control with {len(traffic_light_objs)} signals visible from above"
	}

	# Crosswalks are inferred from context in aerial views
	zones["crossing_pattern"] = {
	"region": "central",
	"objects": ["inferred crosswalk"],
	"description": "Crossing pattern visible from aerial perspective"
	}

	# For plaza aerial views, identify gathering patterns
	if "plaza" in scene_type:
	# Plazas typically have central open area with people
	if people_objs:
	# Check if people are clustered in central region
	central_people = [obj for obj in people_objs
	if "middle" in obj["region"]]

	if central_people:
	zones["central_gathering"] = {
	"region": "middle_center",
	"objects": ["person"] * len(central_people),
	"description": f"Central plaza gathering area with {len(central_people)} people viewed from above"
	}

	return zones

	def _identify_outdoor_general_zones(self, category_regions: Dict, detected_objects: List[Dict], scene_type: str) -> Dict:
	"""
	Identify functional zones for general outdoor scenes.

	Args:
	category_regions: Objects grouped by category and region
	detected_objects: List of detected objects
	scene_type: Specific outdoor scene type

	Returns:
	Dict: Outdoor functional zones
	"""
	zones = {}

	# Identify pedestrian zones
	people_objs = [obj for obj in detected_objects if obj["class_id"] == 0]
	if people_objs:
	people_regions = {}
	for obj in people_objs:
	region = obj["region"]
	if region not in people_regions:
	people_regions[region] = []
	people_regions[region].append(obj)

	if people_regions:
	# Find main pedestrian areas
	main_people_regions = sorted(people_regions.items(),
	key=lambda x: len(x[1]),
	reverse=True)[:2] # Top 2 regions

	for idx, (region, objs) in enumerate(main_people_regions):
	if len(objs) > 0:
	zones[f"pedestrian_zone_{idx+1}"] = {
	"region": region,
	"objects": ["person"] * len(objs),
	"description": f"Pedestrian area with {len(objs)} {'people' if len(objs) > 1 else 'person'}"
	}

	# Identify vehicle zones for streets and parking lots
	vehicle_objs = [obj for obj in detected_objects if obj["class_id"] in [1, 2, 3, 5, 6, 7]]
	if vehicle_objs:
	vehicle_regions = {}
	for obj in vehicle_objs:
	region = obj["region"]
	if region not in vehicle_regions:
	vehicle_regions[region] = []
	vehicle_regions[region].append(obj)

	if vehicle_regions:
	main_vehicle_region = max(vehicle_regions.items(),
	key=lambda x: len(x[1]),
	default=(None, []))

	if main_vehicle_region[0] is not None:
	vehicle_types = [obj["class_name"] for obj in main_vehicle_region[1]]
	zones["vehicle_zone"] = {
	"region": main_vehicle_region[0],
	"objects": vehicle_types,
	"description": f"Traffic area with {', '.join(list(set(vehicle_types))[:3])}"
	}

	# For park areas, identify recreational zones
	if scene_type == "park_area":
	# Look for recreational objects (sports balls, kites, etc.)
	rec_items = []
	rec_regions = {}

	for obj in detected_objects:
	if obj["class_id"] in [32, 33, 34, 35, 38]: # sports ball, kite, baseball bat, glove, tennis racket
	region = obj["region"]
	if region not in rec_regions:
	rec_regions[region] = []
	rec_regions[region].append(obj)
	rec_items.append(obj["class_name"])

	if rec_items:
	main_rec_region = max(rec_regions.items(),
	key=lambda x: len(x[1]),
	default=(None, []))

	if main_rec_region[0] is not None:
	zones["recreational_zone"] = {
	"region": main_rec_region[0],
	"objects": list(set(rec_items)),
	"description": f"Recreational area with {', '.join(list(set(rec_items)))}"
	}

	# For parking lots, identify parking zones
	if scene_type == "parking_lot":
	# Look for parked cars with consistent spacing
	car_objs = [obj for obj in detected_objects if obj["class_id"] == 2] # cars

	if len(car_objs) >= 3:
	# Check if cars are arranged in patterns (simplified)
	car_positions = [obj["normalized_center"] for obj in car_objs]

	# Check for row patterns by analyzing vertical positions
	y_coords = [pos[1] for pos in car_positions]
	y_clusters = {}

	# Simplified clustering - group cars by similar y-coordinates
	for i, y in enumerate(y_coords):
	assigned = False
	for cluster_y in y_clusters.keys():
	if abs(y - cluster_y) < 0.1: # Within 10% of image height
	y_clusters[cluster_y].append(i)
	assigned = True
	break

	if not assigned:
	y_clusters[y] = [i]

	# If we have row patterns
	if max(len(indices) for indices in y_clusters.values()) >= 2:
	zones["parking_row"] = {
	"region": "central",
	"objects": ["car"] * len(car_objs),
	"description": f"Organized parking area with vehicles arranged in rows"
	}
	else:
	zones["parking_area"] = {
	"region": "wide",
	"objects": ["car"] * len(car_objs),
	"description": f"Parking area with {len(car_objs)} vehicles"
	}

	return zones

	def _identify_default_zones(self, category_regions: Dict, detected_objects: List[Dict]) -> Dict:
	"""
	Identify general functional zones when no specific scene type is matched.

	Args:
	category_regions: Objects grouped by category and region
	detected_objects: List of detected objects

	Returns:
	Dict: Default functional zones
	"""
	zones = {}

	# Group objects by category and find main concentrations
	for category, regions in category_regions.items():
	if not regions:
	continue

	# Find region with most objects in this category
	main_region = max(regions.items(),
	key=lambda x: len(x[1]),
	default=(None, []))

	if main_region[0] is None or len(main_region[1]) < 2:
	continue

	# Create zone based on object category
	zone_objects = [obj["class_name"] for obj in main_region[1]]

	# Skip if too few objects
	if len(zone_objects) < 2:
	continue

	# Create appropriate zone name and description based on category
	if category == "furniture":
	zones["furniture_zone"] = {
	"region": main_region[0],
	"objects": zone_objects,
	"description": f"Area with furniture including {', '.join(zone_objects[:3])}"
	}
	elif category == "electronics":
	zones["electronics_zone"] = {
	"region": main_region[0],
	"objects": zone_objects,
	"description": f"Area with electronic devices including {', '.join(zone_objects[:3])}"
	}
	elif category == "kitchen_items":
	zones["dining_zone"] = {
	"region": main_region[0],
	"objects": zone_objects,
	"description": f"Dining or food area with {', '.join(zone_objects[:3])}"
	}
	elif category == "vehicles":
	zones["vehicle_zone"] = {
	"region": main_region[0],
	"objects": zone_objects,
	"description": f"Area with vehicles including {', '.join(zone_objects[:3])}"
	}
	elif category == "personal_items":
	zones["personal_items_zone"] = {
	"region": main_region[0],
	"objects": zone_objects,
	"description": f"Area with personal items including {', '.join(zone_objects[:3])}"
	}

	# Check for people groups
	people_objs = [obj for obj in detected_objects if obj["class_id"] == 0]
	if len(people_objs) >= 2:
	people_regions = {}
	for obj in people_objs:
	region = obj["region"]
	if region not in people_regions:
	people_regions[region] = []
	people_regions[region].append(obj)

	if people_regions:
	main_people_region = max(people_regions.items(),
	key=lambda x: len(x[1]),
	default=(None, []))

	if main_people_region[0] is not None:
	zones["people_zone"] = {
	"region": main_people_region[0],
	"objects": ["person"] * len(main_people_region[1]),
	"description": f"Area with {len(main_people_region[1])} people"
	}

	return zones

	def _find_main_region(self, region_objects_dict: Dict) -> str:
	"""Find the main region with the most objects"""
	if not region_objects_dict:
	return "unknown"

	return max(region_objects_dict.items(),
	key=lambda x: len(x[1]),
	default=("unknown", []))[0]