VisionScout / spatial_location_handler.py
DawnC's picture
Upload 14 files
12d9ea9 verified
raw
history blame
13.8 kB
import logging
import traceback
import numpy as np
from typing import Dict, List, Optional, Any, Tuple
class SpatialLocationHandler:
"""
空間位置處理器 - 專門處理空間描述生成和排列模式分析
負責生成物件的空間位置描述、分析排列模式以及與 RegionAnalyzer 的整合
"""
def __init__(self, region_analyzer: Optional[Any] = None):
"""
初始化空間位置處理器
Args:
region_analyzer: RegionAnalyzer實例
"""
self.logger = logging.getLogger(self.__class__.__name__)
self.region_analyzer = region_analyzer
def set_region_analyzer(self, region_analyzer: Any) -> None:
"""
設置RegionAnalyzer,用於標準化空間描述生成
Args:
region_analyzer: RegionAnalyzer實例
"""
try:
self.region_analyzer = region_analyzer
self.logger.info("RegionAnalyzer instance set for SpatialLocationHandler")
except Exception as e:
self.logger.warning(f"Error setting RegionAnalyzer: {str(e)}")
def generate_spatial_description(self, obj: Dict, image_width: Optional[int] = None,
image_height: Optional[int] = None,
region_analyzer: Optional[Any] = None) -> str:
"""
為物件生成空間位置描述
Args:
obj: 物件字典
image_width: 可選的圖像寬度
image_height: 可選的圖像高度
region_analyzer: 可選的RegionAnalyzer實例,用於生成標準化描述
Returns:
str: 空間描述字符串,空值region時返回空字串
"""
try:
region = obj.get("region") or ""
object_type = obj.get("class_name", "")
# 處理空值或無效region,直接返回空字串避免不完整描述
if not region.strip() or region == "unknown":
# 根據物件類型提供合適的預設位置描述
if object_type and any(vehicle in object_type.lower() for vehicle in ["car", "truck", "bus"]):
return "positioned in the scene"
elif object_type and "person" in object_type.lower():
return "present in the area"
else:
return "located in the scene"
# 如果提供了RegionAnalyzer實例,使用其標準化方法
if region_analyzer and hasattr(region_analyzer, 'get_spatial_description_phrase'):
if hasattr(region_analyzer, 'get_contextual_spatial_description'):
spatial_desc = region_analyzer.get_contextual_spatial_description(region, object_type)
else:
spatial_desc = region_analyzer.get_spatial_description_phrase(region)
if spatial_desc:
return spatial_desc
# 備用邏輯:使用改進的內建映射
clean_region = region.replace('_', ' ').strip().lower()
region_map = {
"top left": "in the upper left area",
"top center": "in the upper area",
"top right": "in the upper right area",
"middle left": "on the left side",
"middle center": "in the center",
"center": "in the center",
"middle right": "on the right side",
"bottom left": "in the lower left area",
"bottom center": "in the lower area",
"bottom right": "in the lower right area"
}
# 直接映射匹配
if clean_region in region_map:
return region_map[clean_region]
# 比較模糊籠統的方位匹配
if "top" in clean_region and "left" in clean_region:
return "in the upper left area"
elif "top" in clean_region and "right" in clean_region:
return "in the upper right area"
elif "bottom" in clean_region and "left" in clean_region:
return "in the lower left area"
elif "bottom" in clean_region and "right" in clean_region:
return "in the lower right area"
elif "top" in clean_region:
return "in the upper area"
elif "bottom" in clean_region:
return "in the lower area"
elif "left" in clean_region:
return "on the left side"
elif "right" in clean_region:
return "on the right side"
elif "center" in clean_region or "middle" in clean_region:
return "in the center"
# 如果region無法辨識,使用normalized_center作為備用
norm_center = obj.get("normalized_center")
if norm_center and image_width and image_height:
x_norm, y_norm = norm_center
h_pos = "left" if x_norm < 0.4 else "right" if x_norm > 0.6 else "center"
v_pos = "upper" if y_norm < 0.4 else "lower" if y_norm > 0.6 else "center"
if h_pos == "center" and v_pos == "center":
return "in the center"
return f"in the {v_pos} {h_pos} area"
# 如果所有方法都失敗,返回空字串
return ""
except Exception as e:
self.logger.warning(f"Error generating spatial description: {str(e)}")
return ""
def get_standardized_spatial_description(self, obj: Dict) -> str:
"""
使用RegionAnalyzer生成標準化空間描述的內部方法
Args:
obj: 物件字典
Returns:
str: 標準化空間描述,失敗時返回空字串
"""
try:
if hasattr(self, 'region_analyzer') and self.region_analyzer:
region = obj.get("region", "")
object_type = obj.get("class_name", "")
if hasattr(self.region_analyzer, 'get_contextual_spatial_description'):
return self.region_analyzer.get_contextual_spatial_description(region, object_type)
elif hasattr(self.region_analyzer, 'get_spatial_description_phrase'):
return self.region_analyzer.get_spatial_description_phrase(region)
return ""
except Exception as e:
self.logger.warning(f"Error getting standardized spatial description: {str(e)}")
object_type = obj.get("class_name", "")
if object_type:
return "visible in the scene"
return "present in the view"
def analyze_spatial_arrangement(self, class_name: str, scene_type: Optional[str],
detected_objects: Optional[List[Dict]],
count: int) -> Optional[str]:
"""
分析物件的空間排列模式並生成相應描述
Args:
class_name: 物件類別名稱
scene_type: 場景類型
detected_objects: 該類型的所有檢測物件
count: 物件數量
Returns:
Optional[str]: 空間排列描述,如果無法分析則返回None
"""
if not detected_objects or len(detected_objects) < 2:
return None
try:
# 提取物件的標準化位置
positions = []
for obj in detected_objects:
center = obj.get("normalized_center", [0.5, 0.5])
if isinstance(center, (list, tuple)) and len(center) >= 2:
positions.append(center)
if len(positions) < 2:
return None
# 分析排列模式
arrangement_pattern = self._analyze_arrangement_pattern(positions)
# 根據物件類型和場景生成描述
return self._generate_arrangement_description(class_name, scene_type,
arrangement_pattern, count)
except Exception as e:
self.logger.warning(f"Error analyzing spatial arrangement: {str(e)}")
return None
def _analyze_arrangement_pattern(self, positions: List[List[float]]) -> str:
"""
分析位置點的排列模式
Args:
positions: 標準化的位置座標列表
Returns:
str: 排列模式類型(linear, clustered, scattered, circular等)
"""
if len(positions) < 2:
return "single"
# 轉換為numpy陣列便於計算
pos_array = np.array(positions)
# 計算位置的分布特徵
x_coords = pos_array[:, 0]
y_coords = pos_array[:, 1]
# 分析x和y方向的變異程度
x_variance = np.var(x_coords)
y_variance = np.var(y_coords)
# 計算物件間的平均距離
distances = []
for i in range(len(positions)):
for j in range(i + 1, len(positions)):
dist = np.sqrt((positions[i][0] - positions[j][0])**2 +
(positions[i][1] - positions[j][1])**2)
distances.append(dist)
avg_distance = np.mean(distances) if distances else 0
distance_variance = np.var(distances) if distances else 0
# 判斷排列模式
if len(positions) >= 4 and self._is_circular_pattern(positions):
return "circular"
elif x_variance < 0.05 or y_variance < 0.05: # 一個方向變異很小
return "linear"
elif avg_distance < 0.3 and distance_variance < 0.02: # 物件聚集且距離相近
return "clustered"
elif avg_distance > 0.6: # 物件分散
return "scattered"
elif distance_variance < 0.03: # 距離一致,可能是規則排列
return "regular"
else:
return "distributed"
def _is_circular_pattern(self, positions: List[List[float]]) -> bool:
"""
檢查位置是否形成圓形或環形排列
Args:
positions: 位置座標列表
Returns:
bool: 是否為圓形排列
"""
if len(positions) < 4:
return False
try:
pos_array = np.array(positions)
# 計算中心點
center_x = np.mean(pos_array[:, 0])
center_y = np.mean(pos_array[:, 1])
# 計算每個點到中心的距離
distances_to_center = []
for pos in positions:
dist = np.sqrt((pos[0] - center_x)**2 + (pos[1] - center_y)**2)
distances_to_center.append(dist)
# 如果所有距離都相近,可能是圓形排列
distance_variance = np.var(distances_to_center)
return distance_variance < 0.05 and np.mean(distances_to_center) > 0.2
except:
return False
def _generate_arrangement_description(self, class_name: str, scene_type: Optional[str],
arrangement_pattern: str, count: int) -> Optional[str]:
"""
根據物件類型、場景和排列模式生成空間描述
Args:
class_name: 物件類別名稱
scene_type: 場景類型
arrangement_pattern: 排列模式
count: 物件數量
Returns:
Optional[str]: 生成的空間排列描述
"""
# 基於物件類型的描述模板
arrangement_templates = {
"chair": {
"linear": "arranged in a row",
"clustered": "grouped together for conversation",
"circular": "arranged around the table",
"scattered": "positioned throughout the space",
"regular": "evenly spaced",
"distributed": "thoughtfully positioned"
},
"dining table": {
"linear": "aligned to create a unified dining space",
"clustered": "grouped to form intimate dining areas",
"scattered": "distributed to optimize space flow",
"regular": "systematically positioned",
"distributed": "strategically placed"
},
"car": {
"linear": "parked in sequence",
"clustered": "grouped in the parking area",
"scattered": "distributed throughout the lot",
"regular": "neatly parked",
"distributed": "positioned across the area"
},
"person": {
"linear": "moving in a line",
"clustered": "gathered together",
"circular": "forming a circle",
"scattered": "spread across the area",
"distributed": "positioned throughout the scene"
}
}
# 獲取對應的描述模板
if class_name in arrangement_templates:
template_dict = arrangement_templates[class_name]
base_description = template_dict.get(arrangement_pattern, "positioned in the scene")
else:
# 通用的排列描述
generic_templates = {
"linear": "arranged in a line",
"clustered": "grouped together",
"circular": "arranged in a circular pattern",
"scattered": "distributed across the space",
"regular": "evenly positioned",
"distributed": "thoughtfully placed"
}
base_description = generic_templates.get(arrangement_pattern, "positioned in the scene")
return base_description