import logging import traceback import numpy as np from typing import Dict, List, Optional, Any, Tuple class SpatialLocationHandler: """ 空間位置處理器 - 專門處理空間描述生成和排列模式分析 負責生成物件的空間位置描述、分析排列模式以及與 RegionAnalyzer 的整合 """ def __init__(self, region_analyzer: Optional[Any] = None): """ 初始化空間位置處理器 Args: region_analyzer: RegionAnalyzer實例 """ self.logger = logging.getLogger(self.__class__.__name__) self.region_analyzer = region_analyzer def set_region_analyzer(self, region_analyzer: Any) -> None: """ 設置RegionAnalyzer,用於標準化空間描述生成 Args: region_analyzer: RegionAnalyzer實例 """ try: self.region_analyzer = region_analyzer self.logger.info("RegionAnalyzer instance set for SpatialLocationHandler") except Exception as e: self.logger.warning(f"Error setting RegionAnalyzer: {str(e)}") def generate_spatial_description(self, obj: Dict, image_width: Optional[int] = None, image_height: Optional[int] = None, region_analyzer: Optional[Any] = None) -> str: """ 為物件生成空間位置描述 Args: obj: 物件字典 image_width: 可選的圖像寬度 image_height: 可選的圖像高度 region_analyzer: 可選的RegionAnalyzer實例,用於生成標準化描述 Returns: str: 空間描述字符串,空值region時返回空字串 """ try: region = obj.get("region") or "" object_type = obj.get("class_name", "") # 處理空值或無效region,直接返回空字串避免不完整描述 if not region.strip() or region == "unknown": # 根據物件類型提供合適的預設位置描述 if object_type and any(vehicle in object_type.lower() for vehicle in ["car", "truck", "bus"]): return "positioned in the scene" elif object_type and "person" in object_type.lower(): return "present in the area" else: return "located in the scene" # 如果提供了RegionAnalyzer實例,使用其標準化方法 if region_analyzer and hasattr(region_analyzer, 'get_spatial_description_phrase'): if hasattr(region_analyzer, 'get_contextual_spatial_description'): spatial_desc = region_analyzer.get_contextual_spatial_description(region, object_type) else: spatial_desc = region_analyzer.get_spatial_description_phrase(region) if spatial_desc: return spatial_desc # 備用邏輯:使用改進的內建映射 clean_region = region.replace('_', ' ').strip().lower() region_map = { "top left": "in the upper left area", "top center": "in the upper area", "top right": "in the upper right area", "middle left": "on the left side", "middle center": "in the center", "center": "in the center", "middle right": "on the right side", "bottom left": "in the lower left area", "bottom center": "in the lower area", "bottom right": "in the lower right area" } # 直接映射匹配 if clean_region in region_map: return region_map[clean_region] # 比較模糊籠統的方位匹配 if "top" in clean_region and "left" in clean_region: return "in the upper left area" elif "top" in clean_region and "right" in clean_region: return "in the upper right area" elif "bottom" in clean_region and "left" in clean_region: return "in the lower left area" elif "bottom" in clean_region and "right" in clean_region: return "in the lower right area" elif "top" in clean_region: return "in the upper area" elif "bottom" in clean_region: return "in the lower area" elif "left" in clean_region: return "on the left side" elif "right" in clean_region: return "on the right side" elif "center" in clean_region or "middle" in clean_region: return "in the center" # 如果region無法辨識,使用normalized_center作為備用 norm_center = obj.get("normalized_center") if norm_center and image_width and image_height: x_norm, y_norm = norm_center h_pos = "left" if x_norm < 0.4 else "right" if x_norm > 0.6 else "center" v_pos = "upper" if y_norm < 0.4 else "lower" if y_norm > 0.6 else "center" if h_pos == "center" and v_pos == "center": return "in the center" return f"in the {v_pos} {h_pos} area" # 如果所有方法都失敗,返回空字串 return "" except Exception as e: self.logger.warning(f"Error generating spatial description: {str(e)}") return "" def get_standardized_spatial_description(self, obj: Dict) -> str: """ 使用RegionAnalyzer生成標準化空間描述的內部方法 Args: obj: 物件字典 Returns: str: 標準化空間描述,失敗時返回空字串 """ try: if hasattr(self, 'region_analyzer') and self.region_analyzer: region = obj.get("region", "") object_type = obj.get("class_name", "") if hasattr(self.region_analyzer, 'get_contextual_spatial_description'): return self.region_analyzer.get_contextual_spatial_description(region, object_type) elif hasattr(self.region_analyzer, 'get_spatial_description_phrase'): return self.region_analyzer.get_spatial_description_phrase(region) return "" except Exception as e: self.logger.warning(f"Error getting standardized spatial description: {str(e)}") object_type = obj.get("class_name", "") if object_type: return "visible in the scene" return "present in the view" def analyze_spatial_arrangement(self, class_name: str, scene_type: Optional[str], detected_objects: Optional[List[Dict]], count: int) -> Optional[str]: """ 分析物件的空間排列模式並生成相應描述 Args: class_name: 物件類別名稱 scene_type: 場景類型 detected_objects: 該類型的所有檢測物件 count: 物件數量 Returns: Optional[str]: 空間排列描述,如果無法分析則返回None """ if not detected_objects or len(detected_objects) < 2: return None try: # 提取物件的標準化位置 positions = [] for obj in detected_objects: center = obj.get("normalized_center", [0.5, 0.5]) if isinstance(center, (list, tuple)) and len(center) >= 2: positions.append(center) if len(positions) < 2: return None # 分析排列模式 arrangement_pattern = self._analyze_arrangement_pattern(positions) # 根據物件類型和場景生成描述 return self._generate_arrangement_description(class_name, scene_type, arrangement_pattern, count) except Exception as e: self.logger.warning(f"Error analyzing spatial arrangement: {str(e)}") return None def _analyze_arrangement_pattern(self, positions: List[List[float]]) -> str: """ 分析位置點的排列模式 Args: positions: 標準化的位置座標列表 Returns: str: 排列模式類型(linear, clustered, scattered, circular等) """ if len(positions) < 2: return "single" # 轉換為numpy陣列便於計算 pos_array = np.array(positions) # 計算位置的分布特徵 x_coords = pos_array[:, 0] y_coords = pos_array[:, 1] # 分析x和y方向的變異程度 x_variance = np.var(x_coords) y_variance = np.var(y_coords) # 計算物件間的平均距離 distances = [] for i in range(len(positions)): for j in range(i + 1, len(positions)): dist = np.sqrt((positions[i][0] - positions[j][0])**2 + (positions[i][1] - positions[j][1])**2) distances.append(dist) avg_distance = np.mean(distances) if distances else 0 distance_variance = np.var(distances) if distances else 0 # 判斷排列模式 if len(positions) >= 4 and self._is_circular_pattern(positions): return "circular" elif x_variance < 0.05 or y_variance < 0.05: # 一個方向變異很小 return "linear" elif avg_distance < 0.3 and distance_variance < 0.02: # 物件聚集且距離相近 return "clustered" elif avg_distance > 0.6: # 物件分散 return "scattered" elif distance_variance < 0.03: # 距離一致,可能是規則排列 return "regular" else: return "distributed" def _is_circular_pattern(self, positions: List[List[float]]) -> bool: """ 檢查位置是否形成圓形或環形排列 Args: positions: 位置座標列表 Returns: bool: 是否為圓形排列 """ if len(positions) < 4: return False try: pos_array = np.array(positions) # 計算中心點 center_x = np.mean(pos_array[:, 0]) center_y = np.mean(pos_array[:, 1]) # 計算每個點到中心的距離 distances_to_center = [] for pos in positions: dist = np.sqrt((pos[0] - center_x)**2 + (pos[1] - center_y)**2) distances_to_center.append(dist) # 如果所有距離都相近,可能是圓形排列 distance_variance = np.var(distances_to_center) return distance_variance < 0.05 and np.mean(distances_to_center) > 0.2 except: return False def _generate_arrangement_description(self, class_name: str, scene_type: Optional[str], arrangement_pattern: str, count: int) -> Optional[str]: """ 根據物件類型、場景和排列模式生成空間描述 Args: class_name: 物件類別名稱 scene_type: 場景類型 arrangement_pattern: 排列模式 count: 物件數量 Returns: Optional[str]: 生成的空間排列描述 """ # 基於物件類型的描述模板 arrangement_templates = { "chair": { "linear": "arranged in a row", "clustered": "grouped together for conversation", "circular": "arranged around the table", "scattered": "positioned throughout the space", "regular": "evenly spaced", "distributed": "thoughtfully positioned" }, "dining table": { "linear": "aligned to create a unified dining space", "clustered": "grouped to form intimate dining areas", "scattered": "distributed to optimize space flow", "regular": "systematically positioned", "distributed": "strategically placed" }, "car": { "linear": "parked in sequence", "clustered": "grouped in the parking area", "scattered": "distributed throughout the lot", "regular": "neatly parked", "distributed": "positioned across the area" }, "person": { "linear": "moving in a line", "clustered": "gathered together", "circular": "forming a circle", "scattered": "spread across the area", "distributed": "positioned throughout the scene" } } # 獲取對應的描述模板 if class_name in arrangement_templates: template_dict = arrangement_templates[class_name] base_description = template_dict.get(arrangement_pattern, "positioned in the scene") else: # 通用的排列描述 generic_templates = { "linear": "arranged in a line", "clustered": "grouped together", "circular": "arranged in a circular pattern", "scattered": "distributed across the space", "regular": "evenly positioned", "distributed": "thoughtfully placed" } base_description = generic_templates.get(arrangement_pattern, "positioned in the scene") return base_description