Spaces:

DawnC
/

VisionScout

Running on Zero

App Files Files Community

VisionScout / text_optimizer.py

DawnC

Upload 14 files

12d9ea9 verified about 1 month ago

raw

history blame

22.9 kB

	import re
	import logging
	from typing import Dict, List, Optional, Any, Tuple

	class TextOptimizer:
	"""
	文本優化器 - 專門處理文本格式化、清理和優化
	負責物件列表格式化、重複移除、複數形式處理以及描述文本的優化
	"""

	def __init__(self):
	"""初始化文本優化器"""
	self.logger = logging.getLogger(self.__class__.__name__)

	def format_object_list_for_description(self,
	objects: List[Dict],
	use_indefinite_article_for_one: bool = False,
	count_threshold_for_generalization: int = -1,
	max_types_to_list: int = 5) -> str:
	"""
	將物件列表格式化為人類可讀的字符串，包含總計數字

	Args:
	objects: 物件字典列表，每個應包含 'class_name'
	use_indefinite_article_for_one: 單個物件是否使用 "a/an"，否則使用 "one"
	count_threshold_for_generalization: 超過此計數時使用通用術語，-1表示精確計數
	max_types_to_list: 列表中包含的不同物件類型最大數量

	Returns:
	str: 格式化的物件描述字符串
	"""
	try:
	if not objects:
	return "no specific objects clearly identified"

	counts: Dict[str, int] = {}
	for obj in objects:
	name = obj.get("class_name", "unknown object")
	if name == "unknown object" or not name:
	continue
	counts[name] = counts.get(name, 0) + 1

	if not counts:
	return "no specific objects clearly identified"

	descriptions = []
	# 按計數降序然後按名稱升序排序，限制物件類型數量
	sorted_counts = sorted(counts.items(), key=lambda item: (-item[1], item[0]))[:max_types_to_list]

	for name, count in sorted_counts:
	if count == 1:
	if use_indefinite_article_for_one:
	if name[0].lower() in 'aeiou':
	descriptions.append(f"an {name}")
	else:
	descriptions.append(f"a {name}")
	else:
	descriptions.append(f"one {name}")
	else:
	# 處理複數形式
	plural_name = self._get_plural_form(name)

	if count_threshold_for_generalization != -1 and count > count_threshold_for_generalization:
	if count <= count_threshold_for_generalization + 3:
	descriptions.append(f"several {plural_name}")
	else:
	descriptions.append(f"many {plural_name}")
	else:
	descriptions.append(f"{count} {plural_name}")

	if not descriptions:
	return "no specific objects clearly identified"

	if len(descriptions) == 1:
	return descriptions[0]
	elif len(descriptions) == 2:
	return f"{descriptions[0]} and {descriptions[1]}"
	else:
	# 使用牛津逗號格式
	return ", ".join(descriptions[:-1]) + f", and {descriptions[-1]}"

	except Exception as e:
	self.logger.warning(f"Error formatting object list: {str(e)}")
	return "various objects"

	def optimize_object_description(self, description: str) -> str:
	"""
	優化物件描述文本，消除多餘重複並改善表達流暢度

	這個函數是後處理階段的關鍵組件，負責清理和精簡自然語言生成系統
	產出的描述文字。它專門處理常見的重複問題，如相同物件的重複
	列舉和冗餘的空間描述，讓最終的描述更簡潔自然。

	Args:
	description: 原始的場景描述文本，可能包含重複或冗餘的表達

	Returns:
	str: 經過優化清理的描述文本，如果處理失敗則返回原始文本
	"""
	try:
	# 1. 處理多餘的空間限定表達
	# 使用通用模式來識別和移除不必要的空間描述
	description = self._remove_redundant_spatial_qualifiers(description)

	# 2. 辨識並處理物件列表的重複問題
	# 尋找形如 "with X, Y, Z" 或 "with X and Y" 的物件列表
	object_lists = re.findall(r'with ([^.]+?)(?=\.\|$)', description)

	# 遍歷每個找到的物件列表進行重複檢測和優化
	for obj_list in object_lists:
	# 3. 解析單個物件列表中的項目
	all_items = self._parse_object_list_items(obj_list)

	# 4. 統計物件出現頻率
	item_counts = self._count_object_items(all_items)

	# 5. 生成優化後的物件列表
	if item_counts:
	new_items = self._generate_optimized_item_list(item_counts)
	new_list = self._format_item_list(new_items)
	description = description.replace(obj_list, new_list)

	return description

	except Exception as e:
	self.logger.warning(f"Error optimizing object description: {str(e)}")
	return description

	def remove_repetitive_descriptors(self, description: str) -> str:
	"""
	移除描述中的重複性和不適當的描述詞彙，特別是 "identical" 等詞彙

	Args:
	description: 原始描述文本

	Returns:
	str: 清理後的描述文本
	"""
	try:
	# 定義需要移除或替換的模式
	cleanup_patterns = [
	# 移除 "identical" 描述模式
	(r'\b(\d+)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
	(r'\b(two\|three\|four\|five\|six\|seven\|eight\|nine\|ten\|eleven\|twelve)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
	(r'\bidentical\s+([a-zA-Z\s]+)', r'\1'),

	# 改善 "comprehensive arrangement" 等過於技術性的表達
	(r'\bcomprehensive arrangement of\b', 'arrangement of'),
	(r'\bcomprehensive view featuring\b', 'scene featuring'),
	(r'\bcomprehensive display of\b', 'display of'),

	# 簡化過度描述性的短語
	(r'\bpositioning around\s+(\d+)\s+identical\b', r'positioning around \1'),
	(r'\barranged around\s+(\d+)\s+identical\b', r'arranged around \1'),
	]

	processed_description = description
	for pattern, replacement in cleanup_patterns:
	processed_description = re.sub(pattern, replacement, processed_description, flags=re.IGNORECASE)

	# 進一步清理可能的多餘空格
	processed_description = re.sub(r'\s+', ' ', processed_description).strip()

	self.logger.debug(f"Cleaned description: removed repetitive descriptors")
	return processed_description

	except Exception as e:
	self.logger.warning(f"Error removing repetitive descriptors: {str(e)}")
	return description

	def format_object_count_description(self, class_name: str, count: int,
	scene_type: Optional[str] = None,
	detected_objects: Optional[List[Dict]] = None,
	avg_confidence: float = 0.0) -> str:
	"""
	格式化物件數量描述的核心方法，整合空間排列、材質推斷和場景語境

	Args:
	class_name: 標準化後的類別名稱
	count: 物件數量
	scene_type: 場景類型，用於語境化描述
	detected_objects: 該類型的所有檢測物件，用於空間分析
	avg_confidence: 平均檢測置信度，影響材質推斷的可信度

	Returns:
	str: 完整的格式化數量描述
	"""
	try:
	if count <= 0:
	return ""

	# 獲取基礎的複數形式
	plural_form = self._get_plural_form(class_name)

	# 單數情況的處理
	if count == 1:
	return self._format_single_object_description(class_name, scene_type,
	detected_objects, avg_confidence)

	# 複數情況的處理
	return self._format_multiple_objects_description(class_name, count, plural_form,
	scene_type, detected_objects, avg_confidence)

	except Exception as e:
	self.logger.warning(f"Error formatting object count for '{class_name}': {str(e)}")
	return f"{count} {class_name}s" if count > 1 else class_name

	def normalize_object_class_name(self, class_name: str) -> str:
	"""
	標準化物件類別名稱，確保輸出自然語言格式

	Args:
	class_name: 原始類別名稱

	Returns:
	str: 標準化後的類別名稱
	"""
	try:
	if not class_name or not isinstance(class_name, str):
	return "object"

	# 移除可能的技術性前綴或後綴
	normalized = re.sub(r'^(class_\|id_\|type_)', '', class_name.lower())
	normalized = re.sub(r'(_class\|_id\|_type)$', '', normalized)

	# 將下劃線和連字符替換為空格
	normalized = normalized.replace('_', ' ').replace('-', ' ')

	# 移除多餘空格
	normalized = ' '.join(normalized.split())

	# 特殊類別名稱的標準化映射
	class_name_mapping = {
	'traffic light': 'traffic light',
	'stop sign': 'stop sign',
	'fire hydrant': 'fire hydrant',
	'dining table': 'dining table',
	'potted plant': 'potted plant',
	'tv monitor': 'television',
	'cell phone': 'mobile phone',
	'wine glass': 'wine glass',
	'hot dog': 'hot dog',
	'teddy bear': 'teddy bear',
	'hair drier': 'hair dryer',
	'toothbrush': 'toothbrush'
	}

	return class_name_mapping.get(normalized, normalized)

	except Exception as e:
	self.logger.warning(f"Error normalizing class name '{class_name}': {str(e)}")
	return class_name if isinstance(class_name, str) else "object"

	def _remove_redundant_spatial_qualifiers(self, description: str) -> str:
	"""
	移除描述中冗餘的空間限定詞

	Args:
	description: 包含可能多餘空間描述的文本

	Returns:
	str: 移除多餘空間限定詞後的文本
	"""
	# 定義常見的多餘空間表達模式
	redundant_patterns = [
	# 室內物件的多餘房間描述
	(r'\b(bed\|sofa\|couch\|chair\|table\|desk\|dresser\|nightstand)\s+in\s+the\s+(room\|bedroom\|living\s+room)', r'\1'),
	# 廚房物件的多餘描述
	(r'\b(refrigerator\|stove\|oven\|sink\|microwave)\s+in\s+the\s+kitchen', r'\1'),
	# 浴室物件的多餘描述
	(r'\b(toilet\|shower\|bathtub\|sink)\s+in\s+the\s+(bathroom\|restroom)', r'\1'),
	# 一般性的多餘表達：「在場景中」、「在圖片中」等
	(r'\b([\w\s]+)\s+in\s+the\s+(scene\|image\|picture\|frame)', r'\1'),
	]

	for pattern, replacement in redundant_patterns:
	description = re.sub(pattern, replacement, description, flags=re.IGNORECASE)

	return description

	def _parse_object_list_items(self, obj_list: str) -> List[str]:
	"""
	解析物件列表中的項目

	Args:
	obj_list: 物件列表字符串

	Returns:
	List[str]: 解析後的項目列表
	"""
	# 先處理逗號格式 "A, B, and C"
	if ", and " in obj_list:
	before_last_and = obj_list.rsplit(", and ", 1)[0]
	last_item = obj_list.rsplit(", and ", 1)[1]
	front_items = [item.strip() for item in before_last_and.split(",")]
	all_items = front_items + [last_item.strip()]
	elif " and " in obj_list:
	all_items = [item.strip() for item in obj_list.split(" and ")]
	else:
	all_items = [item.strip() for item in obj_list.split(",")]

	return all_items

	def _count_object_items(self, all_items: List[str]) -> Dict[str, int]:
	"""
	統計物件項目的出現次數

	Args:
	all_items: 所有項目列表

	Returns:
	Dict[str, int]: 項目計數字典
	"""
	item_counts = {}

	for item in all_items:
	item = item.strip()
	if item and item not in ["and", "with", ""]:
	clean_item = self._normalize_item_for_counting(item)
	if clean_item not in item_counts:
	item_counts[clean_item] = 0
	item_counts[clean_item] += 1

	return item_counts

	def _generate_optimized_item_list(self, item_counts: Dict[str, int]) -> List[str]:
	"""
	生成優化後的項目列表

	Args:
	item_counts: 項目計數字典

	Returns:
	List[str]: 優化後的項目列表
	"""
	new_items = []

	for item, count in item_counts.items():
	if count > 1:
	plural_item = self._make_plural(item)
	new_items.append(f"{count} {plural_item}")
	else:
	new_items.append(item)

	return new_items

	def _format_item_list(self, new_items: List[str]) -> str:
	"""
	格式化項目列表為字符串

	Args:
	new_items: 新項目列表

	Returns:
	str: 格式化後的字符串
	"""
	if len(new_items) == 1:
	return new_items[0]
	elif len(new_items) == 2:
	return f"{new_items[0]} and {new_items[1]}"
	else:
	return ", ".join(new_items[:-1]) + f", and {new_items[-1]}"

	def _normalize_item_for_counting(self, item: str) -> str:
	"""
	正規化物件項目以便準確計數

	Args:
	item: 原始物件項目字串

	Returns:
	str: 正規化後的物件項目
	"""
	item = re.sub(r'^(a\|an\|the)\s+', '', item.lower())
	return item.strip()

	def _make_plural(self, item: str) -> str:
	"""
	將單數名詞轉換為複數形式

	Args:
	item: 單數形式的名詞

	Returns:
	str: 複數形式的名詞
	"""
	if item.endswith("y") and len(item) > 1 and item[-2].lower() not in 'aeiou':
	return item[:-1] + "ies"
	elif item.endswith(("s", "sh", "ch", "x", "z")):
	return item + "es"
	elif not item.endswith("s"):
	return item + "s"
	else:
	return item

	def _get_plural_form(self, word: str) -> str:
	"""
	獲取詞彙的複數形式

	Args:
	word: 單數詞彙

	Returns:
	str: 複數形式
	"""
	try:
	# 特殊複數形式
	irregular_plurals = {
	'person': 'people',
	'child': 'children',
	'foot': 'feet',
	'tooth': 'teeth',
	'mouse': 'mice',
	'man': 'men',
	'woman': 'women'
	}

	if word.lower() in irregular_plurals:
	return irregular_plurals[word.lower()]

	# 規則複數形式
	if word.endswith(('s', 'sh', 'ch', 'x', 'z')):
	return word + 'es'
	elif word.endswith('y') and word[-2] not in 'aeiou':
	return word[:-1] + 'ies'
	elif word.endswith('f'):
	return word[:-1] + 'ves'
	elif word.endswith('fe'):
	return word[:-2] + 'ves'
	else:
	return word + 's'

	except Exception as e:
	self.logger.warning(f"Error getting plural form for '{word}': {str(e)}")
	return word + 's'

	def _format_single_object_description(self, class_name: str, scene_type: Optional[str],
	detected_objects: Optional[List[Dict]],
	avg_confidence: float) -> str:
	"""
	處理單個物件的描述生成

	Args:
	class_name: 物件類別名稱
	scene_type: 場景類型
	detected_objects: 檢測物件列表
	avg_confidence: 平均置信度

	Returns:
	str: 單個物件的完整描述
	"""
	article = "an" if class_name[0].lower() in 'aeiou' else "a"

	# 獲取材質描述符
	material_descriptor = self._get_material_descriptor(class_name, scene_type, avg_confidence)

	# 獲取位置或特徵描述符
	feature_descriptor = self._get_single_object_feature(class_name, scene_type, detected_objects)

	# 組合描述
	descriptors = []
	if material_descriptor:
	descriptors.append(material_descriptor)
	if feature_descriptor:
	descriptors.append(feature_descriptor)

	if descriptors:
	return f"{article} {' '.join(descriptors)} {class_name}"
	else:
	return f"{article} {class_name}"

	def _format_multiple_objects_description(self, class_name: str, count: int, plural_form: str,
	scene_type: Optional[str], detected_objects: Optional[List[Dict]],
	avg_confidence: float) -> str:
	"""
	處理多個物件的描述生成

	Args:
	class_name: 物件類別名稱
	count: 物件數量
	plural_form: 複數形式
	scene_type: 場景類型
	detected_objects: 檢測物件列表
	avg_confidence: 平均置信度

	Returns:
	str: 多個物件的完整描述
	"""
	# 數字到文字的轉換映射
	number_words = {
	2: "two", 3: "three", 4: "four", 5: "five", 6: "six",
	7: "seven", 8: "eight", 9: "nine", 10: "ten",
	11: "eleven", 12: "twelve"
	}

	# 確定基礎數量表達
	if count in number_words:
	count_expression = number_words[count]
	elif count <= 20:
	count_expression = "several"
	else:
	count_expression = "numerous"

	# 獲取材質或功能描述符
	material_descriptor = self._get_material_descriptor(class_name, scene_type, avg_confidence)

	# 構建基礎描述
	descriptors = []
	if material_descriptor:
	descriptors.append(material_descriptor)

	base_description = f"{count_expression} {' '.join(descriptors)} {plural_form}".strip()
	return base_description

	def _get_material_descriptor(self, class_name: str, scene_type: Optional[str],
	avg_confidence: float) -> Optional[str]:
	"""
	基於場景語境和置信度進行材質推斷

	Args:
	class_name: 物件類別名稱
	scene_type: 場景類型
	avg_confidence: 檢測置信度

	Returns:
	Optional[str]: 材質描述符
	"""
	# 只有在置信度足夠高時才進行材質推斷
	if avg_confidence < 0.5:
	return None

	# 餐廳和用餐相關場景
	if scene_type and scene_type in ["dining_area", "restaurant", "upscale_dining", "cafe"]:
	material_mapping = {
	"chair": "wooden" if avg_confidence > 0.7 else None,
	"dining table": "wooden",
	"couch": "upholstered",
	"vase": "decorative"
	}
	return material_mapping.get(class_name)

	# 辦公場景
	elif scene_type and scene_type in ["office_workspace", "meeting_room", "conference_room"]:
	material_mapping = {
	"chair": "office",
	"dining table": "conference",
	"laptop": "modern",
	"book": "reference"
	}
	return material_mapping.get(class_name)

	# 客廳場景
	elif scene_type and scene_type in ["living_room"]:
	material_mapping = {
	"couch": "comfortable",
	"chair": "accent",
	"tv": "large",
	"vase": "decorative"
	}
	return material_mapping.get(class_name)

	# 室外場景
	elif scene_type and scene_type in ["city_street", "park_area", "parking_lot"]:
	material_mapping = {
	"car": "parked",
	"person": "walking",
	"bicycle": "stationed"
	}
	return material_mapping.get(class_name)

	# 如果沒有特定的場景映射，返回通用描述符
	generic_mapping = {
	"chair": "comfortable",
	"dining table": "sturdy",
	"car": "parked",
	"person": "present"
	}

	return generic_mapping.get(class_name)

	def _get_single_object_feature(self, class_name: str, scene_type: Optional[str],
	detected_objects: Optional[List[Dict]]) -> Optional[str]:
	"""
	為單個物件生成特徵描述符

	Args:
	class_name: 物件類別名稱
	scene_type: 場景類型
	detected_objects: 檢測物件

	Returns:
	Optional[str]: 特徵描述符
	"""
	if not detected_objects or len(detected_objects) != 1:
	return None

	obj = detected_objects[0]
	region = obj.get("region", "").lower()

	# 基於位置的描述
	if "center" in region:
	if class_name == "dining table":
	return "central"
	elif class_name == "chair":
	return "centrally placed"
	elif "corner" in region or "left" in region or "right" in region:
	return "positioned"

	# 基於場景的功能描述
	if scene_type and scene_type in ["dining_area", "restaurant"]:
	if class_name == "chair":
	return "dining"
	elif class_name == "vase":
	return "decorative"

	return None