DawnC commited on
Commit
0347f2d
·
verified ·
1 Parent(s): 248637a

Upload 4 files

Browse files

fixed placeholder, region issues

content_generator.py CHANGED
@@ -15,7 +15,7 @@ class ContentGenerator:
15
  """初始化內容生成器"""
16
  self.logger = logging.getLogger(self.__class__.__name__)
17
 
18
- # 預載入默認替換內容
19
  self.default_replacements = self._generate_default_replacements()
20
 
21
  self.logger.debug("ContentGenerator initialized successfully")
@@ -238,7 +238,7 @@ class ContentGenerator:
238
  if not detected_objects:
239
  return "various elements"
240
 
241
- # 計算物件統計
242
  object_counts = {}
243
  total_confidence = 0
244
 
@@ -277,21 +277,40 @@ class ContentGenerator:
277
  else:
278
  descriptions.append(f"{count} {clean_name}s")
279
 
280
- # 組合描述
281
- if len(descriptions) == 1:
282
- return descriptions[0]
283
- elif len(descriptions) == 2:
284
- return f"{descriptions[0]} and {descriptions[1]}"
 
 
 
 
 
 
 
 
285
  else:
286
- return ", ".join(descriptions[:-1]) + f", and {descriptions[-1]}"
 
 
 
 
 
 
 
 
 
 
 
287
 
288
  except Exception as e:
289
  self.logger.warning(f"Error generating objects summary: {str(e)}")
290
  return "various elements"
291
 
292
  def get_placeholder_replacement(self, placeholder: str, fillers: Dict,
293
- all_replacements: Dict, detected_objects: List[Dict],
294
- scene_type: str) -> str:
295
  """
296
  獲取特定佔位符的替換內容,確保永遠不返回空值
297
 
@@ -305,18 +324,36 @@ class ContentGenerator:
305
  Returns:
306
  str: 替換內容
307
  """
308
- try:
309
- # 優先處理動態內容生成的佔位符
310
- dynamic_placeholders = [
311
- 'primary_objects', 'detected_objects_summary', 'main_objects',
312
- 'functional_area', 'functional_zones_description', 'scene_elements'
313
- ]
314
-
315
- if placeholder in dynamic_placeholders:
316
- dynamic_content = self.generate_objects_summary(detected_objects)
317
- if dynamic_content and dynamic_content.strip():
318
- return dynamic_content.strip()
319
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
320
  # 檢查預定義替換內容
321
  if placeholder in all_replacements:
322
  replacement = all_replacements[placeholder]
@@ -346,7 +383,7 @@ class ContentGenerator:
346
  if scene_specific_replacement and scene_specific_replacement.strip():
347
  return scene_specific_replacement.strip()
348
 
349
- # 通用備用字典
350
  fallback_replacements = {
351
  # 交通和城市相關
352
  "crossing_pattern": "pedestrian crosswalks",
@@ -405,7 +442,7 @@ class ContentGenerator:
405
  # 最終備用:將下劃線轉換為有意義的短語
406
  cleaned_placeholder = placeholder.replace('_', ' ')
407
 
408
- # 對常見模式提供更好的默認值
409
  if placeholder.endswith('_pattern'):
410
  return f"{cleaned_placeholder.replace(' pattern', '')} arrangement"
411
  elif placeholder.endswith('_behavior'):
@@ -421,9 +458,94 @@ class ContentGenerator:
421
 
422
  except Exception as e:
423
  self.logger.warning(f"Error getting replacement for placeholder '{placeholder}': {str(e)}")
424
- # 確保即使在異常情況下也返回有意義的內容
425
  return placeholder.replace('_', ' ') if placeholder else "scene elements"
426
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
427
  def get_scene_based_default(self, placeholder: str, scene_type: str) -> Optional[str]:
428
  """
429
  基於場景類型提供智能默認值
 
15
  """初始化內容生成器"""
16
  self.logger = logging.getLogger(self.__class__.__name__)
17
 
18
+ # 預載入默認替換內容
19
  self.default_replacements = self._generate_default_replacements()
20
 
21
  self.logger.debug("ContentGenerator initialized successfully")
 
238
  if not detected_objects:
239
  return "various elements"
240
 
241
+ # 計算物件統計
242
  object_counts = {}
243
  total_confidence = 0
244
 
 
277
  else:
278
  descriptions.append(f"{count} {clean_name}s")
279
 
280
+ # === 修正組合描述邏輯,增加驗證機制 ===
281
+ # 過濾掉空的或無效的描述
282
+ valid_descriptions = [desc.strip() for desc in descriptions if desc and desc.strip()]
283
+
284
+ # 確保有有效的描述項目
285
+ if not valid_descriptions:
286
+ return "various elements"
287
+
288
+ # 組合描述 - 修正邏輯以避免不完整的結尾
289
+ if len(valid_descriptions) == 1:
290
+ return valid_descriptions[0]
291
+ elif len(valid_descriptions) == 2:
292
+ return f"{valid_descriptions[0]} and {valid_descriptions[1]}"
293
  else:
294
+ # 對於3個或以上的項目,確保正確的語法結構
295
+ main_items = ", ".join(valid_descriptions[:-1])
296
+ last_item = valid_descriptions[-1]
297
+ # 確保 main_items 和 last_item 都不為空
298
+ if main_items and last_item:
299
+ return f"{main_items}, and {last_item}"
300
+ elif main_items:
301
+ return main_items
302
+ elif last_item:
303
+ return last_item
304
+ else:
305
+ return "various elements"
306
 
307
  except Exception as e:
308
  self.logger.warning(f"Error generating objects summary: {str(e)}")
309
  return "various elements"
310
 
311
  def get_placeholder_replacement(self, placeholder: str, fillers: Dict,
312
+ all_replacements: Dict, detected_objects: List[Dict],
313
+ scene_type: str) -> str:
314
  """
315
  獲取特定佔位符的替換內容,確保永遠不返回空值
316
 
 
324
  Returns:
325
  str: 替換內容
326
  """
327
+ try:
328
+ # 動態佔位符的差異化處理策略
329
+ dynamic_placeholders_mapping = {
330
+ 'primary_objects': 'full_summary',
331
+ 'detected_objects_summary': 'full_summary',
332
+ 'main_objects': 'simple_summary',
333
+ 'functional_area': 'area_focus',
334
+ 'functional_zones_description': 'zones_focus',
335
+ 'scene_elements': 'elements_focus'
336
+ }
337
+
338
+ if placeholder in dynamic_placeholders_mapping:
339
+ content_type = dynamic_placeholders_mapping[placeholder]
340
+
341
+ # 根據內容類型和當前檢測物件生成不同的描述
342
+ if content_type == 'full_summary':
343
+ return self.generate_objects_summary(detected_objects)
344
+ elif content_type == 'simple_summary':
345
+ # 避免重複敘述
346
+ return self._generate_simplified_objects_summary(detected_objects)
347
+ elif content_type == 'area_focus':
348
+ # 以圖片中的area 作為重點描述
349
+ return self._generate_area_focused_summary(detected_objects)
350
+ elif content_type == 'zones_focus':
351
+ # 以圖片中的zones 作為重點描述
352
+ return self._generate_zones_summary(detected_objects)
353
+ elif content_type == 'elements_focus':
354
+ # 以圖片中物品作為重點描述
355
+ return self._generate_elements_summary(detected_objects)
356
+
357
  # 檢查預定義替換內容
358
  if placeholder in all_replacements:
359
  replacement = all_replacements[placeholder]
 
383
  if scene_specific_replacement and scene_specific_replacement.strip():
384
  return scene_specific_replacement.strip()
385
 
386
+ # 通用備用字典
387
  fallback_replacements = {
388
  # 交通和城市相關
389
  "crossing_pattern": "pedestrian crosswalks",
 
442
  # 最終備用:將下劃線轉換為有意義的短語
443
  cleaned_placeholder = placeholder.replace('_', ' ')
444
 
445
+ # 對常見模式提供更全面的defualt value
446
  if placeholder.endswith('_pattern'):
447
  return f"{cleaned_placeholder.replace(' pattern', '')} arrangement"
448
  elif placeholder.endswith('_behavior'):
 
458
 
459
  except Exception as e:
460
  self.logger.warning(f"Error getting replacement for placeholder '{placeholder}': {str(e)}")
 
461
  return placeholder.replace('_', ' ') if placeholder else "scene elements"
462
 
463
+ def _generate_simplified_objects_summary(self, detected_objects: List[Dict]) -> str:
464
+ """生成簡化的物件摘要,避免與詳細摘要重複"""
465
+ try:
466
+ if not detected_objects:
467
+ return "scene elements"
468
+
469
+ # 只取最重要的前3個物件
470
+ object_counts = {}
471
+ for obj in detected_objects:
472
+ class_name = obj.get("class_name", "unknown")
473
+ confidence = obj.get("confidence", 0.5)
474
+
475
+ if class_name not in object_counts:
476
+ object_counts[class_name] = {"count": 0, "total_confidence": 0}
477
+
478
+ object_counts[class_name]["count"] += 1
479
+ object_counts[class_name]["total_confidence"] += confidence
480
+
481
+ # 排序並取前3個
482
+ sorted_objects = []
483
+ for class_name, stats in object_counts.items():
484
+ count = stats["count"]
485
+ avg_confidence = stats["total_confidence"] / count
486
+ importance = count * 0.6 + avg_confidence * 0.4
487
+ sorted_objects.append((class_name, count, importance))
488
+
489
+ sorted_objects.sort(key=lambda x: x[2], reverse=True)
490
+ top_objects = sorted_objects[:3]
491
+
492
+ if top_objects:
493
+ primary_object = top_objects[0]
494
+ clean_name = primary_object[0].replace('_', ' ')
495
+ count = primary_object[1]
496
+
497
+ if count == 1:
498
+ article = "an" if clean_name[0].lower() in 'aeiou' else "a"
499
+ return f"{article} {clean_name}"
500
+ else:
501
+ return f"{count} {clean_name}s"
502
+
503
+ return "scene elements"
504
+
505
+ except Exception as e:
506
+ self.logger.warning(f"Error generating simplified summary: {str(e)}")
507
+ return "scene elements"
508
+
509
+ def _generate_area_focused_summary(self, detected_objects: List[Dict]) -> str:
510
+ """生成區域導向的摘要"""
511
+ try:
512
+ # 根據檢測到的物件推斷主要功能區域
513
+ furniture_objects = [obj for obj in detected_objects if obj.get("class_name") in ["chair", "dining table", "sofa", "bed"]]
514
+
515
+ if any(obj.get("class_name") == "dining table" for obj in furniture_objects):
516
+ return "dining area"
517
+ elif any(obj.get("class_name") == "sofa" for obj in furniture_objects):
518
+ return "seating area"
519
+ elif any(obj.get("class_name") == "bed" for obj in furniture_objects):
520
+ return "sleeping area"
521
+ elif furniture_objects:
522
+ return "furnished area"
523
+ else:
524
+ return "activity area"
525
+
526
+ except Exception as e:
527
+ self.logger.warning(f"Error generating area-focused summary: {str(e)}")
528
+ return "functional area"
529
+
530
+ def _generate_zones_summary(self, detected_objects: List[Dict]) -> str:
531
+ """生成區域描述摘要"""
532
+ try:
533
+ return "organized areas of activity"
534
+ except Exception as e:
535
+ return "functional zones"
536
+
537
+ def _generate_elements_summary(self, detected_objects: List[Dict]) -> str:
538
+ """生成元素導向的摘要"""
539
+ try:
540
+ if len(detected_objects) > 5:
541
+ return "diverse elements"
542
+ elif len(detected_objects) > 2:
543
+ return "multiple elements"
544
+ else:
545
+ return "key elements"
546
+ except Exception as e:
547
+ return "scene elements"
548
+
549
  def get_scene_based_default(self, placeholder: str, scene_type: str) -> Optional[str]:
550
  """
551
  基於場景類型提供智能默認值
functional_zone_detector.py CHANGED
@@ -230,7 +230,14 @@ class FunctionalZoneDetector:
230
  region = zone_data.get("region", "")
231
  description = zone_data.get("description", "")
232
 
233
- # 基於物件內容確定功能類型
 
 
 
 
 
 
 
234
  if any("dining" in obj.lower() or "table" in obj.lower() for obj in objects):
235
  base_name = "dining area"
236
  elif any("chair" in obj.lower() or "sofa" in obj.lower() for obj in objects):
@@ -241,20 +248,52 @@ class FunctionalZoneDetector:
241
  base_name = "workspace area"
242
  elif any("plant" in obj.lower() or "vase" in obj.lower() for obj in objects):
243
  base_name = "decorative area"
244
- elif any("refrigerator" in obj.lower() or "microwave" in obj.lower() for obj in objects):
 
245
  base_name = "kitchen area"
246
  else:
247
- # 基於描述內容推斷
248
- if "dining" in description.lower():
 
249
  base_name = "dining area"
250
  elif "seating" in description.lower() or "relaxation" in description.lower():
251
  base_name = "seating area"
252
- elif "work" in description.lower():
 
253
  base_name = "workspace area"
254
  elif "decorative" in description.lower():
255
  base_name = "decorative area"
256
  else:
257
- base_name = "functional area"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
 
259
  # 為次要區域添加位置標識以區分
260
  if priority_level == "secondary" and region:
 
230
  region = zone_data.get("region", "")
231
  description = zone_data.get("description", "")
232
 
233
+ # 確保只有在明確檢測到廚房設備時才產生 kitchen area
234
+ kitchen_objects = ["refrigerator", "microwave", "oven", "sink", "dishwasher", "stove"]
235
+ explicit_kitchen_detected = any(
236
+ any(kitchen_item in obj.lower() for kitchen_item in kitchen_objects)
237
+ for obj in objects
238
+ )
239
+
240
+ # 基於物件內容確定功能類型(保持原有順序,但加強廚房確認, 因為與dining room混淆)
241
  if any("dining" in obj.lower() or "table" in obj.lower() for obj in objects):
242
  base_name = "dining area"
243
  elif any("chair" in obj.lower() or "sofa" in obj.lower() for obj in objects):
 
248
  base_name = "workspace area"
249
  elif any("plant" in obj.lower() or "vase" in obj.lower() for obj in objects):
250
  base_name = "decorative area"
251
+ elif explicit_kitchen_detected:
252
+ # 只有在明確檢測到廚房設備時才使用 kitchen area
253
  base_name = "kitchen area"
254
  else:
255
+ # 基於描述內容推斷,但避免不當的 kitchen area 判斷
256
+ if "dining" in description.lower() and any("table" in obj.lower() for obj in objects):
257
+ # 只有當描述中提到 dining 且確實有桌子時才使用 dining area
258
  base_name = "dining area"
259
  elif "seating" in description.lower() or "relaxation" in description.lower():
260
  base_name = "seating area"
261
+ elif "work" in description.lower() and any("laptop" in obj.lower() or "keyboard" in obj.lower() for obj in objects):
262
+ # 只有當描述中提到 work 且確實有工作設備時才使用 workspace area
263
  base_name = "workspace area"
264
  elif "decorative" in description.lower():
265
  base_name = "decorative area"
266
  else:
267
+ # 根據主要物件類型決定預設區域類型,避免使用 kitchen area
268
+ if objects:
269
+ # 根據最常見的物件類型決定區域名稱
270
+ object_counts = {}
271
+ for obj in objects:
272
+ obj_lower = obj.lower()
273
+ if "chair" in obj_lower:
274
+ object_counts["seating"] = object_counts.get("seating", 0) + 1
275
+ elif "table" in obj_lower:
276
+ object_counts["dining"] = object_counts.get("dining", 0) + 1
277
+ elif "person" in obj_lower:
278
+ object_counts["activity"] = object_counts.get("activity", 0) + 1
279
+ else:
280
+ object_counts["general"] = object_counts.get("general", 0) + 1
281
+
282
+ # 選擇最常見的類型
283
+ if object_counts:
284
+ most_common = max(object_counts, key=object_counts.get)
285
+ if most_common == "seating":
286
+ base_name = "seating area"
287
+ elif most_common == "dining":
288
+ base_name = "dining area"
289
+ elif most_common == "activity":
290
+ base_name = "activity area"
291
+ else:
292
+ base_name = "functional area"
293
+ else:
294
+ base_name = "functional area"
295
+ else:
296
+ base_name = "functional area"
297
 
298
  # 為次要區域添加位置標識以區分
299
  if priority_level == "secondary" and region:
scene_zone_identifier.py CHANGED
@@ -47,10 +47,10 @@ class SceneZoneIdentifier:
47
  try:
48
  zones = {}
49
 
50
- # 主要功能區域(基於物件關聯性而非場景類型)
51
  primary_zone = self.functional_detector.identify_primary_functional_area(detected_objects)
52
  if primary_zone:
53
- # 基於區域內容生成描述性鍵名
54
  descriptive_key = self.functional_detector.generate_descriptive_zone_key_from_data(primary_zone, "primary")
55
  zones[descriptive_key] = primary_zone
56
 
@@ -116,7 +116,7 @@ class SceneZoneIdentifier:
116
  "description": f"Pedestrian area with {len(objs)} {'people' if len(objs) > 1 else 'person'}"
117
  }
118
 
119
- # 識別車輛區域,適用於街道和停車場
120
  vehicle_objs = [obj for obj in detected_objects if obj["class_id"] in [1, 2, 3, 5, 6, 7]]
121
  if vehicle_objs:
122
  vehicle_regions = {}
@@ -250,7 +250,6 @@ class SceneZoneIdentifier:
250
  # 5. Step D: 分析車輛交通區域(Vehicle Zones)
251
  if vehicle_objs:
252
  traffic_zones = self.pattern_analyzer.analyze_traffic_zones(vehicle_objs)
253
- # analyze_traffic_zones 內部已用英文 debug,直接更新
254
  for zone_key, zone_info in traffic_zones.items():
255
  if zone_key in zones:
256
  suffix = 1
@@ -355,7 +354,7 @@ class SceneZoneIdentifier:
355
  try:
356
  zones = {}
357
 
358
- # 識別店面區域
359
  # 由於店面不能直接檢測,從情境推斷
360
  # 例如,尋找有標誌、行人和小物件的區域
361
  storefront_regions = {}
@@ -373,7 +372,7 @@ class SceneZoneIdentifier:
373
  reverse=True)[:2] # 前2個區域
374
 
375
  for idx, (region, objs) in enumerate(main_storefront_regions):
376
- # 生成基於位置的描述性鍵名
377
  spatial_desc = self._get_directional_description(region)
378
  if spatial_desc and spatial_desc != "central":
379
  zone_key = f"{spatial_desc} commercial area"
 
47
  try:
48
  zones = {}
49
 
50
+ # 主要功能區域(根據物件關聯性而非場景類型)
51
  primary_zone = self.functional_detector.identify_primary_functional_area(detected_objects)
52
  if primary_zone:
53
+ # 根據區域內容生成描述性鍵名
54
  descriptive_key = self.functional_detector.generate_descriptive_zone_key_from_data(primary_zone, "primary")
55
  zones[descriptive_key] = primary_zone
56
 
 
116
  "description": f"Pedestrian area with {len(objs)} {'people' if len(objs) > 1 else 'person'}"
117
  }
118
 
119
+ # 辨識車輛區域,適用於街道和停車場
120
  vehicle_objs = [obj for obj in detected_objects if obj["class_id"] in [1, 2, 3, 5, 6, 7]]
121
  if vehicle_objs:
122
  vehicle_regions = {}
 
250
  # 5. Step D: 分析車輛交通區域(Vehicle Zones)
251
  if vehicle_objs:
252
  traffic_zones = self.pattern_analyzer.analyze_traffic_zones(vehicle_objs)
 
253
  for zone_key, zone_info in traffic_zones.items():
254
  if zone_key in zones:
255
  suffix = 1
 
354
  try:
355
  zones = {}
356
 
357
+ # 辨識店面區域
358
  # 由於店面不能直接檢測,從情境推斷
359
  # 例如,尋找有標誌、行人和小物件的區域
360
  storefront_regions = {}
 
372
  reverse=True)[:2] # 前2個區域
373
 
374
  for idx, (region, objs) in enumerate(main_storefront_regions):
375
+ # 根據基於位置的描述性key name
376
  spatial_desc = self._get_directional_description(region)
377
  if spatial_desc and spatial_desc != "central":
378
  zone_key = f"{spatial_desc} commercial area"
template_processor.py CHANGED
@@ -50,6 +50,7 @@ class TemplateProcessor:
50
  str: 修復後的模板字符串
51
  """
52
  try:
 
53
  # 修復 "In , " 模式
54
  filled_template = re.sub(r'\bIn\s*,\s*', 'In this scene, ', filled_template)
55
  filled_template = re.sub(r'\bAt\s*,\s*', 'At this location, ', filled_template)
@@ -60,7 +61,63 @@ class TemplateProcessor:
60
 
61
  # 修復開頭的逗號
62
  filled_template = re.sub(r'^[,\s]*', '', filled_template)
63
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  # 確保首字母大寫
65
  if filled_template and not filled_template[0].isupper():
66
  filled_template = filled_template[0].upper() + filled_template[1:]
 
50
  str: 修復後的模板字符串
51
  """
52
  try:
53
+ # 原有的語法修復邏輯
54
  # 修復 "In , " 模式
55
  filled_template = re.sub(r'\bIn\s*,\s*', 'In this scene, ', filled_template)
56
  filled_template = re.sub(r'\bAt\s*,\s*', 'At this location, ', filled_template)
 
61
 
62
  # 修復開頭的逗號
63
  filled_template = re.sub(r'^[,\s]*', '', filled_template)
64
+
65
+ # 1. 修復不完整的 "and." 結尾問題
66
+ # 處理 "物件列表, and." 的模式,將其修正為完整的句子
67
+ filled_template = re.sub(r',\s*and\s*\.\s*', '. ', filled_template)
68
+ filled_template = re.sub(r'\s+and\s*\.\s*', '. ', filled_template)
69
+
70
+ # 2. 處理重複的物件列表模式
71
+ # 識別並移除重複的完整物件描述片段
72
+ # 針對 "數字 + 物件名稱" 的重複模式
73
+ object_pattern = r'(\b\d+\s+\w+(?:\s+\w+)*(?:,\s*\d+\s+\w+(?:\s+\w+)*)*(?:,\s*(?:a|an)\s+\w+(?:\s+\w+)*)*)'
74
+
75
+ # 找到所有物件列表片段
76
+ object_matches = re.findall(object_pattern, filled_template)
77
+ if object_matches:
78
+ # 移除重複的物件列表
79
+ seen_objects = set()
80
+ for obj_desc in object_matches:
81
+ # 標準化物件描述用於比較(移除多餘空格)
82
+ normalized_desc = re.sub(r'\s+', ' ', obj_desc.strip().lower())
83
+ if normalized_desc in seen_objects:
84
+
85
+ # 找到重複的物件描述,移除後續出現的實例
86
+ escaped_desc = re.escape(obj_desc)
87
+ pattern = r'\.\s*' + escaped_desc + r'(?=\s*\.|\s*$)'
88
+ filled_template = re.sub(pattern, '', filled_template, count=1)
89
+ else:
90
+ seen_objects.add(normalized_desc)
91
+
92
+ # 3. 處理重複的句子片段
93
+ # 將文本分割為句子,檢查是否有完整句子的重複
94
+ sentences = re.split(r'(?<=[.!?])\s+', filled_template)
95
+ unique_sentences = []
96
+ seen_sentences = set()
97
+
98
+ for sentence in sentences:
99
+ if sentence.strip(): # 忽略空句子
100
+ # 標準化句子用於比較(移除標點符號和多餘空格)
101
+ normalized_sentence = re.sub(r'[^\w\s]', '', sentence.lower().strip())
102
+ normalized_sentence = re.sub(r'\s+', ' ', normalized_sentence)
103
+
104
+ # 只有當句子足夠長且確實重複時才移除
105
+ if len(normalized_sentence) > 10 and normalized_sentence not in seen_sentences:
106
+ unique_sentences.append(sentence.strip())
107
+ seen_sentences.add(normalized_sentence)
108
+ elif len(normalized_sentence) <= 10:
109
+ # 短句子直接保留,避免過度清理
110
+ unique_sentences.append(sentence.strip())
111
+
112
+ # 重新組合句子
113
+ if unique_sentences:
114
+ filled_template = ' '.join(unique_sentences)
115
+
116
+ # 4. 清理可能產生的多餘空格和標點符號
117
+ filled_template = re.sub(r'\s+', ' ', filled_template)
118
+ filled_template = re.sub(r'\s*\.\s*\.\s*', '. ', filled_template) # 移除連續句號
119
+ filled_template = re.sub(r'\s*,\s*\.\s*', '. ', filled_template) # 修正 ", ."
120
+
121
  # 確保首字母大寫
122
  if filled_template and not filled_template[0].isupper():
123
  filled_template = filled_template[0].upper() + filled_template[1:]