Spaces:
Running
on
Zero
Running
on
Zero
Upload 4 files
Browse filesfixed placeholder, region issues
- content_generator.py +147 -25
- functional_zone_detector.py +45 -6
- scene_zone_identifier.py +5 -6
- template_processor.py +58 -1
content_generator.py
CHANGED
@@ -15,7 +15,7 @@ class ContentGenerator:
|
|
15 |
"""初始化內容生成器"""
|
16 |
self.logger = logging.getLogger(self.__class__.__name__)
|
17 |
|
18 |
-
# 預載入默認替換內容
|
19 |
self.default_replacements = self._generate_default_replacements()
|
20 |
|
21 |
self.logger.debug("ContentGenerator initialized successfully")
|
@@ -238,7 +238,7 @@ class ContentGenerator:
|
|
238 |
if not detected_objects:
|
239 |
return "various elements"
|
240 |
|
241 |
-
# 計算物件統計
|
242 |
object_counts = {}
|
243 |
total_confidence = 0
|
244 |
|
@@ -277,21 +277,40 @@ class ContentGenerator:
|
|
277 |
else:
|
278 |
descriptions.append(f"{count} {clean_name}s")
|
279 |
|
280 |
-
#
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
285 |
else:
|
286 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
287 |
|
288 |
except Exception as e:
|
289 |
self.logger.warning(f"Error generating objects summary: {str(e)}")
|
290 |
return "various elements"
|
291 |
|
292 |
def get_placeholder_replacement(self, placeholder: str, fillers: Dict,
|
293 |
-
|
294 |
-
|
295 |
"""
|
296 |
獲取特定佔位符的替換內容,確保永遠不返回空值
|
297 |
|
@@ -305,18 +324,36 @@ class ContentGenerator:
|
|
305 |
Returns:
|
306 |
str: 替換內容
|
307 |
"""
|
308 |
-
try:
|
309 |
-
#
|
310 |
-
|
311 |
-
'primary_objects'
|
312 |
-
'
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
320 |
# 檢查預定義替換內容
|
321 |
if placeholder in all_replacements:
|
322 |
replacement = all_replacements[placeholder]
|
@@ -346,7 +383,7 @@ class ContentGenerator:
|
|
346 |
if scene_specific_replacement and scene_specific_replacement.strip():
|
347 |
return scene_specific_replacement.strip()
|
348 |
|
349 |
-
# 通用備用字典
|
350 |
fallback_replacements = {
|
351 |
# 交通和城市相關
|
352 |
"crossing_pattern": "pedestrian crosswalks",
|
@@ -405,7 +442,7 @@ class ContentGenerator:
|
|
405 |
# 最終備用:將下劃線轉換為有意義的短語
|
406 |
cleaned_placeholder = placeholder.replace('_', ' ')
|
407 |
|
408 |
-
#
|
409 |
if placeholder.endswith('_pattern'):
|
410 |
return f"{cleaned_placeholder.replace(' pattern', '')} arrangement"
|
411 |
elif placeholder.endswith('_behavior'):
|
@@ -421,9 +458,94 @@ class ContentGenerator:
|
|
421 |
|
422 |
except Exception as e:
|
423 |
self.logger.warning(f"Error getting replacement for placeholder '{placeholder}': {str(e)}")
|
424 |
-
# 確保即使在異常情況下也返回有意義的內容
|
425 |
return placeholder.replace('_', ' ') if placeholder else "scene elements"
|
426 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
427 |
def get_scene_based_default(self, placeholder: str, scene_type: str) -> Optional[str]:
|
428 |
"""
|
429 |
基於場景類型提供智能默認值
|
|
|
15 |
"""初始化內容生成器"""
|
16 |
self.logger = logging.getLogger(self.__class__.__name__)
|
17 |
|
18 |
+
# 預載入默認替換內容
|
19 |
self.default_replacements = self._generate_default_replacements()
|
20 |
|
21 |
self.logger.debug("ContentGenerator initialized successfully")
|
|
|
238 |
if not detected_objects:
|
239 |
return "various elements"
|
240 |
|
241 |
+
# 計算物件統計
|
242 |
object_counts = {}
|
243 |
total_confidence = 0
|
244 |
|
|
|
277 |
else:
|
278 |
descriptions.append(f"{count} {clean_name}s")
|
279 |
|
280 |
+
# === 修正組合描述邏輯,增加驗證機制 ===
|
281 |
+
# 過濾掉空的或無效的描述
|
282 |
+
valid_descriptions = [desc.strip() for desc in descriptions if desc and desc.strip()]
|
283 |
+
|
284 |
+
# 確保有有效的描述項目
|
285 |
+
if not valid_descriptions:
|
286 |
+
return "various elements"
|
287 |
+
|
288 |
+
# 組合描述 - 修正邏輯以避免不完整的結尾
|
289 |
+
if len(valid_descriptions) == 1:
|
290 |
+
return valid_descriptions[0]
|
291 |
+
elif len(valid_descriptions) == 2:
|
292 |
+
return f"{valid_descriptions[0]} and {valid_descriptions[1]}"
|
293 |
else:
|
294 |
+
# 對於3個或以上的項目,確保正確的語法結構
|
295 |
+
main_items = ", ".join(valid_descriptions[:-1])
|
296 |
+
last_item = valid_descriptions[-1]
|
297 |
+
# 確保 main_items 和 last_item 都不為空
|
298 |
+
if main_items and last_item:
|
299 |
+
return f"{main_items}, and {last_item}"
|
300 |
+
elif main_items:
|
301 |
+
return main_items
|
302 |
+
elif last_item:
|
303 |
+
return last_item
|
304 |
+
else:
|
305 |
+
return "various elements"
|
306 |
|
307 |
except Exception as e:
|
308 |
self.logger.warning(f"Error generating objects summary: {str(e)}")
|
309 |
return "various elements"
|
310 |
|
311 |
def get_placeholder_replacement(self, placeholder: str, fillers: Dict,
|
312 |
+
all_replacements: Dict, detected_objects: List[Dict],
|
313 |
+
scene_type: str) -> str:
|
314 |
"""
|
315 |
獲取特定佔位符的替換內容,確保永遠不返回空值
|
316 |
|
|
|
324 |
Returns:
|
325 |
str: 替換內容
|
326 |
"""
|
327 |
+
try:
|
328 |
+
# 動態佔位符的差異化處理策略
|
329 |
+
dynamic_placeholders_mapping = {
|
330 |
+
'primary_objects': 'full_summary',
|
331 |
+
'detected_objects_summary': 'full_summary',
|
332 |
+
'main_objects': 'simple_summary',
|
333 |
+
'functional_area': 'area_focus',
|
334 |
+
'functional_zones_description': 'zones_focus',
|
335 |
+
'scene_elements': 'elements_focus'
|
336 |
+
}
|
337 |
+
|
338 |
+
if placeholder in dynamic_placeholders_mapping:
|
339 |
+
content_type = dynamic_placeholders_mapping[placeholder]
|
340 |
+
|
341 |
+
# 根據內容類型和當前檢測物件生成不同的描述
|
342 |
+
if content_type == 'full_summary':
|
343 |
+
return self.generate_objects_summary(detected_objects)
|
344 |
+
elif content_type == 'simple_summary':
|
345 |
+
# 避免重複敘述
|
346 |
+
return self._generate_simplified_objects_summary(detected_objects)
|
347 |
+
elif content_type == 'area_focus':
|
348 |
+
# 以圖片中的area 作為重點描述
|
349 |
+
return self._generate_area_focused_summary(detected_objects)
|
350 |
+
elif content_type == 'zones_focus':
|
351 |
+
# 以圖片中的zones 作為重點描述
|
352 |
+
return self._generate_zones_summary(detected_objects)
|
353 |
+
elif content_type == 'elements_focus':
|
354 |
+
# 以圖片中物品作為重點描述
|
355 |
+
return self._generate_elements_summary(detected_objects)
|
356 |
+
|
357 |
# 檢查預定義替換內容
|
358 |
if placeholder in all_replacements:
|
359 |
replacement = all_replacements[placeholder]
|
|
|
383 |
if scene_specific_replacement and scene_specific_replacement.strip():
|
384 |
return scene_specific_replacement.strip()
|
385 |
|
386 |
+
# 通用備用字典
|
387 |
fallback_replacements = {
|
388 |
# 交通和城市相關
|
389 |
"crossing_pattern": "pedestrian crosswalks",
|
|
|
442 |
# 最終備用:將下劃線轉換為有意義的短語
|
443 |
cleaned_placeholder = placeholder.replace('_', ' ')
|
444 |
|
445 |
+
# 對常見模式提供更全面的defualt value
|
446 |
if placeholder.endswith('_pattern'):
|
447 |
return f"{cleaned_placeholder.replace(' pattern', '')} arrangement"
|
448 |
elif placeholder.endswith('_behavior'):
|
|
|
458 |
|
459 |
except Exception as e:
|
460 |
self.logger.warning(f"Error getting replacement for placeholder '{placeholder}': {str(e)}")
|
|
|
461 |
return placeholder.replace('_', ' ') if placeholder else "scene elements"
|
462 |
|
463 |
+
def _generate_simplified_objects_summary(self, detected_objects: List[Dict]) -> str:
|
464 |
+
"""生成簡化的物件摘要,避免與詳細摘要重複"""
|
465 |
+
try:
|
466 |
+
if not detected_objects:
|
467 |
+
return "scene elements"
|
468 |
+
|
469 |
+
# 只取最重要的前3個物件
|
470 |
+
object_counts = {}
|
471 |
+
for obj in detected_objects:
|
472 |
+
class_name = obj.get("class_name", "unknown")
|
473 |
+
confidence = obj.get("confidence", 0.5)
|
474 |
+
|
475 |
+
if class_name not in object_counts:
|
476 |
+
object_counts[class_name] = {"count": 0, "total_confidence": 0}
|
477 |
+
|
478 |
+
object_counts[class_name]["count"] += 1
|
479 |
+
object_counts[class_name]["total_confidence"] += confidence
|
480 |
+
|
481 |
+
# 排序並取前3個
|
482 |
+
sorted_objects = []
|
483 |
+
for class_name, stats in object_counts.items():
|
484 |
+
count = stats["count"]
|
485 |
+
avg_confidence = stats["total_confidence"] / count
|
486 |
+
importance = count * 0.6 + avg_confidence * 0.4
|
487 |
+
sorted_objects.append((class_name, count, importance))
|
488 |
+
|
489 |
+
sorted_objects.sort(key=lambda x: x[2], reverse=True)
|
490 |
+
top_objects = sorted_objects[:3]
|
491 |
+
|
492 |
+
if top_objects:
|
493 |
+
primary_object = top_objects[0]
|
494 |
+
clean_name = primary_object[0].replace('_', ' ')
|
495 |
+
count = primary_object[1]
|
496 |
+
|
497 |
+
if count == 1:
|
498 |
+
article = "an" if clean_name[0].lower() in 'aeiou' else "a"
|
499 |
+
return f"{article} {clean_name}"
|
500 |
+
else:
|
501 |
+
return f"{count} {clean_name}s"
|
502 |
+
|
503 |
+
return "scene elements"
|
504 |
+
|
505 |
+
except Exception as e:
|
506 |
+
self.logger.warning(f"Error generating simplified summary: {str(e)}")
|
507 |
+
return "scene elements"
|
508 |
+
|
509 |
+
def _generate_area_focused_summary(self, detected_objects: List[Dict]) -> str:
|
510 |
+
"""生成區域導向的摘要"""
|
511 |
+
try:
|
512 |
+
# 根據檢測到的物件推斷主要功能區域
|
513 |
+
furniture_objects = [obj for obj in detected_objects if obj.get("class_name") in ["chair", "dining table", "sofa", "bed"]]
|
514 |
+
|
515 |
+
if any(obj.get("class_name") == "dining table" for obj in furniture_objects):
|
516 |
+
return "dining area"
|
517 |
+
elif any(obj.get("class_name") == "sofa" for obj in furniture_objects):
|
518 |
+
return "seating area"
|
519 |
+
elif any(obj.get("class_name") == "bed" for obj in furniture_objects):
|
520 |
+
return "sleeping area"
|
521 |
+
elif furniture_objects:
|
522 |
+
return "furnished area"
|
523 |
+
else:
|
524 |
+
return "activity area"
|
525 |
+
|
526 |
+
except Exception as e:
|
527 |
+
self.logger.warning(f"Error generating area-focused summary: {str(e)}")
|
528 |
+
return "functional area"
|
529 |
+
|
530 |
+
def _generate_zones_summary(self, detected_objects: List[Dict]) -> str:
|
531 |
+
"""生成區域描述摘要"""
|
532 |
+
try:
|
533 |
+
return "organized areas of activity"
|
534 |
+
except Exception as e:
|
535 |
+
return "functional zones"
|
536 |
+
|
537 |
+
def _generate_elements_summary(self, detected_objects: List[Dict]) -> str:
|
538 |
+
"""生成元素導向的摘要"""
|
539 |
+
try:
|
540 |
+
if len(detected_objects) > 5:
|
541 |
+
return "diverse elements"
|
542 |
+
elif len(detected_objects) > 2:
|
543 |
+
return "multiple elements"
|
544 |
+
else:
|
545 |
+
return "key elements"
|
546 |
+
except Exception as e:
|
547 |
+
return "scene elements"
|
548 |
+
|
549 |
def get_scene_based_default(self, placeholder: str, scene_type: str) -> Optional[str]:
|
550 |
"""
|
551 |
基於場景類型提供智能默認值
|
functional_zone_detector.py
CHANGED
@@ -230,7 +230,14 @@ class FunctionalZoneDetector:
|
|
230 |
region = zone_data.get("region", "")
|
231 |
description = zone_data.get("description", "")
|
232 |
|
233 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
234 |
if any("dining" in obj.lower() or "table" in obj.lower() for obj in objects):
|
235 |
base_name = "dining area"
|
236 |
elif any("chair" in obj.lower() or "sofa" in obj.lower() for obj in objects):
|
@@ -241,20 +248,52 @@ class FunctionalZoneDetector:
|
|
241 |
base_name = "workspace area"
|
242 |
elif any("plant" in obj.lower() or "vase" in obj.lower() for obj in objects):
|
243 |
base_name = "decorative area"
|
244 |
-
elif
|
|
|
245 |
base_name = "kitchen area"
|
246 |
else:
|
247 |
-
#
|
248 |
-
if "dining" in description.lower():
|
|
|
249 |
base_name = "dining area"
|
250 |
elif "seating" in description.lower() or "relaxation" in description.lower():
|
251 |
base_name = "seating area"
|
252 |
-
elif "work" in description.lower():
|
|
|
253 |
base_name = "workspace area"
|
254 |
elif "decorative" in description.lower():
|
255 |
base_name = "decorative area"
|
256 |
else:
|
257 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
258 |
|
259 |
# 為次要區域添加位置標識以區分
|
260 |
if priority_level == "secondary" and region:
|
|
|
230 |
region = zone_data.get("region", "")
|
231 |
description = zone_data.get("description", "")
|
232 |
|
233 |
+
# 確保只有在明確檢測到廚房設備時才產生 kitchen area
|
234 |
+
kitchen_objects = ["refrigerator", "microwave", "oven", "sink", "dishwasher", "stove"]
|
235 |
+
explicit_kitchen_detected = any(
|
236 |
+
any(kitchen_item in obj.lower() for kitchen_item in kitchen_objects)
|
237 |
+
for obj in objects
|
238 |
+
)
|
239 |
+
|
240 |
+
# 基於物件內容確定功能類型(保持原有順序,但加強廚房確認, 因為與dining room混淆)
|
241 |
if any("dining" in obj.lower() or "table" in obj.lower() for obj in objects):
|
242 |
base_name = "dining area"
|
243 |
elif any("chair" in obj.lower() or "sofa" in obj.lower() for obj in objects):
|
|
|
248 |
base_name = "workspace area"
|
249 |
elif any("plant" in obj.lower() or "vase" in obj.lower() for obj in objects):
|
250 |
base_name = "decorative area"
|
251 |
+
elif explicit_kitchen_detected:
|
252 |
+
# 只有在明確檢測到廚房設備時才使用 kitchen area
|
253 |
base_name = "kitchen area"
|
254 |
else:
|
255 |
+
# 基於描述內容推斷,但避免不當的 kitchen area 判斷
|
256 |
+
if "dining" in description.lower() and any("table" in obj.lower() for obj in objects):
|
257 |
+
# 只有當描述中提到 dining 且確實有桌子時才使用 dining area
|
258 |
base_name = "dining area"
|
259 |
elif "seating" in description.lower() or "relaxation" in description.lower():
|
260 |
base_name = "seating area"
|
261 |
+
elif "work" in description.lower() and any("laptop" in obj.lower() or "keyboard" in obj.lower() for obj in objects):
|
262 |
+
# 只有當描述中提到 work 且確實有工作設備時才使用 workspace area
|
263 |
base_name = "workspace area"
|
264 |
elif "decorative" in description.lower():
|
265 |
base_name = "decorative area"
|
266 |
else:
|
267 |
+
# 根據主要物件類型決定預設區域類型,避免使用 kitchen area
|
268 |
+
if objects:
|
269 |
+
# 根據最常見的物件類型決定區域名稱
|
270 |
+
object_counts = {}
|
271 |
+
for obj in objects:
|
272 |
+
obj_lower = obj.lower()
|
273 |
+
if "chair" in obj_lower:
|
274 |
+
object_counts["seating"] = object_counts.get("seating", 0) + 1
|
275 |
+
elif "table" in obj_lower:
|
276 |
+
object_counts["dining"] = object_counts.get("dining", 0) + 1
|
277 |
+
elif "person" in obj_lower:
|
278 |
+
object_counts["activity"] = object_counts.get("activity", 0) + 1
|
279 |
+
else:
|
280 |
+
object_counts["general"] = object_counts.get("general", 0) + 1
|
281 |
+
|
282 |
+
# 選擇最常見的類型
|
283 |
+
if object_counts:
|
284 |
+
most_common = max(object_counts, key=object_counts.get)
|
285 |
+
if most_common == "seating":
|
286 |
+
base_name = "seating area"
|
287 |
+
elif most_common == "dining":
|
288 |
+
base_name = "dining area"
|
289 |
+
elif most_common == "activity":
|
290 |
+
base_name = "activity area"
|
291 |
+
else:
|
292 |
+
base_name = "functional area"
|
293 |
+
else:
|
294 |
+
base_name = "functional area"
|
295 |
+
else:
|
296 |
+
base_name = "functional area"
|
297 |
|
298 |
# 為次要區域添加位置標識以區分
|
299 |
if priority_level == "secondary" and region:
|
scene_zone_identifier.py
CHANGED
@@ -47,10 +47,10 @@ class SceneZoneIdentifier:
|
|
47 |
try:
|
48 |
zones = {}
|
49 |
|
50 |
-
#
|
51 |
primary_zone = self.functional_detector.identify_primary_functional_area(detected_objects)
|
52 |
if primary_zone:
|
53 |
-
#
|
54 |
descriptive_key = self.functional_detector.generate_descriptive_zone_key_from_data(primary_zone, "primary")
|
55 |
zones[descriptive_key] = primary_zone
|
56 |
|
@@ -116,7 +116,7 @@ class SceneZoneIdentifier:
|
|
116 |
"description": f"Pedestrian area with {len(objs)} {'people' if len(objs) > 1 else 'person'}"
|
117 |
}
|
118 |
|
119 |
-
#
|
120 |
vehicle_objs = [obj for obj in detected_objects if obj["class_id"] in [1, 2, 3, 5, 6, 7]]
|
121 |
if vehicle_objs:
|
122 |
vehicle_regions = {}
|
@@ -250,7 +250,6 @@ class SceneZoneIdentifier:
|
|
250 |
# 5. Step D: 分析車輛交通區域(Vehicle Zones)
|
251 |
if vehicle_objs:
|
252 |
traffic_zones = self.pattern_analyzer.analyze_traffic_zones(vehicle_objs)
|
253 |
-
# analyze_traffic_zones 內部已用英文 debug,直接更新
|
254 |
for zone_key, zone_info in traffic_zones.items():
|
255 |
if zone_key in zones:
|
256 |
suffix = 1
|
@@ -355,7 +354,7 @@ class SceneZoneIdentifier:
|
|
355 |
try:
|
356 |
zones = {}
|
357 |
|
358 |
-
#
|
359 |
# 由於店面不能直接檢測,從情境推斷
|
360 |
# 例如,尋找有標誌、行人和小物件的區域
|
361 |
storefront_regions = {}
|
@@ -373,7 +372,7 @@ class SceneZoneIdentifier:
|
|
373 |
reverse=True)[:2] # 前2個區域
|
374 |
|
375 |
for idx, (region, objs) in enumerate(main_storefront_regions):
|
376 |
-
#
|
377 |
spatial_desc = self._get_directional_description(region)
|
378 |
if spatial_desc and spatial_desc != "central":
|
379 |
zone_key = f"{spatial_desc} commercial area"
|
|
|
47 |
try:
|
48 |
zones = {}
|
49 |
|
50 |
+
# 主要功能區域(根據物件關聯性而非場景類型)
|
51 |
primary_zone = self.functional_detector.identify_primary_functional_area(detected_objects)
|
52 |
if primary_zone:
|
53 |
+
# 根據區域內容生成描述性鍵名
|
54 |
descriptive_key = self.functional_detector.generate_descriptive_zone_key_from_data(primary_zone, "primary")
|
55 |
zones[descriptive_key] = primary_zone
|
56 |
|
|
|
116 |
"description": f"Pedestrian area with {len(objs)} {'people' if len(objs) > 1 else 'person'}"
|
117 |
}
|
118 |
|
119 |
+
# 辨識車輛區域,適用於街道和停車場
|
120 |
vehicle_objs = [obj for obj in detected_objects if obj["class_id"] in [1, 2, 3, 5, 6, 7]]
|
121 |
if vehicle_objs:
|
122 |
vehicle_regions = {}
|
|
|
250 |
# 5. Step D: 分析車輛交通區域(Vehicle Zones)
|
251 |
if vehicle_objs:
|
252 |
traffic_zones = self.pattern_analyzer.analyze_traffic_zones(vehicle_objs)
|
|
|
253 |
for zone_key, zone_info in traffic_zones.items():
|
254 |
if zone_key in zones:
|
255 |
suffix = 1
|
|
|
354 |
try:
|
355 |
zones = {}
|
356 |
|
357 |
+
# 辨識店面區域
|
358 |
# 由於店面不能直接檢測,從情境推斷
|
359 |
# 例如,尋找有標誌、行人和小物件的區域
|
360 |
storefront_regions = {}
|
|
|
372 |
reverse=True)[:2] # 前2個區域
|
373 |
|
374 |
for idx, (region, objs) in enumerate(main_storefront_regions):
|
375 |
+
# 根據基於位置的描述性key name
|
376 |
spatial_desc = self._get_directional_description(region)
|
377 |
if spatial_desc and spatial_desc != "central":
|
378 |
zone_key = f"{spatial_desc} commercial area"
|
template_processor.py
CHANGED
@@ -50,6 +50,7 @@ class TemplateProcessor:
|
|
50 |
str: 修復後的模板字符串
|
51 |
"""
|
52 |
try:
|
|
|
53 |
# 修復 "In , " 模式
|
54 |
filled_template = re.sub(r'\bIn\s*,\s*', 'In this scene, ', filled_template)
|
55 |
filled_template = re.sub(r'\bAt\s*,\s*', 'At this location, ', filled_template)
|
@@ -60,7 +61,63 @@ class TemplateProcessor:
|
|
60 |
|
61 |
# 修復開頭的逗號
|
62 |
filled_template = re.sub(r'^[,\s]*', '', filled_template)
|
63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
# 確保首字母大寫
|
65 |
if filled_template and not filled_template[0].isupper():
|
66 |
filled_template = filled_template[0].upper() + filled_template[1:]
|
|
|
50 |
str: 修復後的模板字符串
|
51 |
"""
|
52 |
try:
|
53 |
+
# 原有的語法修復邏輯
|
54 |
# 修復 "In , " 模式
|
55 |
filled_template = re.sub(r'\bIn\s*,\s*', 'In this scene, ', filled_template)
|
56 |
filled_template = re.sub(r'\bAt\s*,\s*', 'At this location, ', filled_template)
|
|
|
61 |
|
62 |
# 修復開頭的逗號
|
63 |
filled_template = re.sub(r'^[,\s]*', '', filled_template)
|
64 |
+
|
65 |
+
# 1. 修復不完整的 "and." 結尾問題
|
66 |
+
# 處理 "物件列表, and." 的模式,將其修正為完整的句子
|
67 |
+
filled_template = re.sub(r',\s*and\s*\.\s*', '. ', filled_template)
|
68 |
+
filled_template = re.sub(r'\s+and\s*\.\s*', '. ', filled_template)
|
69 |
+
|
70 |
+
# 2. 處理重複的物件列表模式
|
71 |
+
# 識別並移除重複的完整物件描述片段
|
72 |
+
# 針對 "數字 + 物件名稱" 的重複模式
|
73 |
+
object_pattern = r'(\b\d+\s+\w+(?:\s+\w+)*(?:,\s*\d+\s+\w+(?:\s+\w+)*)*(?:,\s*(?:a|an)\s+\w+(?:\s+\w+)*)*)'
|
74 |
+
|
75 |
+
# 找到所有物件列表片段
|
76 |
+
object_matches = re.findall(object_pattern, filled_template)
|
77 |
+
if object_matches:
|
78 |
+
# 移除重複的物件列表
|
79 |
+
seen_objects = set()
|
80 |
+
for obj_desc in object_matches:
|
81 |
+
# 標準化物件描述用於比較(移除多餘空格)
|
82 |
+
normalized_desc = re.sub(r'\s+', ' ', obj_desc.strip().lower())
|
83 |
+
if normalized_desc in seen_objects:
|
84 |
+
|
85 |
+
# 找到重複的物件描述,移除後續出現的實例
|
86 |
+
escaped_desc = re.escape(obj_desc)
|
87 |
+
pattern = r'\.\s*' + escaped_desc + r'(?=\s*\.|\s*$)'
|
88 |
+
filled_template = re.sub(pattern, '', filled_template, count=1)
|
89 |
+
else:
|
90 |
+
seen_objects.add(normalized_desc)
|
91 |
+
|
92 |
+
# 3. 處理重複的句子片段
|
93 |
+
# 將文本分割為句子,檢查是否有完整句子的重複
|
94 |
+
sentences = re.split(r'(?<=[.!?])\s+', filled_template)
|
95 |
+
unique_sentences = []
|
96 |
+
seen_sentences = set()
|
97 |
+
|
98 |
+
for sentence in sentences:
|
99 |
+
if sentence.strip(): # 忽略空句子
|
100 |
+
# 標準化句子用於比較(移除標點符號和多餘空格)
|
101 |
+
normalized_sentence = re.sub(r'[^\w\s]', '', sentence.lower().strip())
|
102 |
+
normalized_sentence = re.sub(r'\s+', ' ', normalized_sentence)
|
103 |
+
|
104 |
+
# 只有當句子足夠長且確實重複時才移除
|
105 |
+
if len(normalized_sentence) > 10 and normalized_sentence not in seen_sentences:
|
106 |
+
unique_sentences.append(sentence.strip())
|
107 |
+
seen_sentences.add(normalized_sentence)
|
108 |
+
elif len(normalized_sentence) <= 10:
|
109 |
+
# 短句子直接保留,避免過度清理
|
110 |
+
unique_sentences.append(sentence.strip())
|
111 |
+
|
112 |
+
# 重新組合句子
|
113 |
+
if unique_sentences:
|
114 |
+
filled_template = ' '.join(unique_sentences)
|
115 |
+
|
116 |
+
# 4. 清理可能產生的多餘空格和標點符號
|
117 |
+
filled_template = re.sub(r'\s+', ' ', filled_template)
|
118 |
+
filled_template = re.sub(r'\s*\.\s*\.\s*', '. ', filled_template) # 移除連續句號
|
119 |
+
filled_template = re.sub(r'\s*,\s*\.\s*', '. ', filled_template) # 修正 ", ."
|
120 |
+
|
121 |
# 確保首字母大寫
|
122 |
if filled_template and not filled_template[0].isupper():
|
123 |
filled_template = filled_template[0].upper() + filled_template[1:]
|