Spaces:

DawnC
/

VisionScout

Running on Zero

File size: 60,747 Bytes

import re
import logging
import traceback
from typing import Dict, List, Any, Optional, Set


class ResponseProcessingError(Exception):
    """回應處理相關錯誤的自定義異常"""
    pass


class ResponseProcessor:
    """
    負責處理和清理LLM模型輸出的回應。
    包含格式清理、重複內容檢測、語法完整性確保等功能。
    """

    def __init__(self):
        """初始化回應處理器"""
        # set the logger
        self.logger = logging.getLogger(self.__class__.__name__)
        if not self.logger.handlers:
            handler = logging.StreamHandler()
            formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
            handler.setFormatter(formatter)
            self.logger.addHandler(handler)
            self.logger.setLevel(logging.INFO)

        # 初始化清理規則和替換字典
        self._initialize_cleaning_rules()
        self.logger.info("ResponseProcessor initialized successfully")


    def _initialize_cleaning_rules(self):
        """初始化各種清理規則和替換字典，把常見有問題情況優化"""
        try:
            # 設置重複詞彙的替換字典
            self.replacement_alternatives = {
                'visible': ['present', 'evident', 'apparent', 'observable'],
                'positioned': ['arranged', 'placed', 'set', 'organized'],
                'located': ['found', 'placed', 'situated', 'established'],
                'situated': ['placed', 'positioned', 'arranged', 'set'],
                'appears': ['seems', 'looks', 'presents', 'exhibits'],
                'features': ['includes', 'contains', 'displays', 'showcases'],
                'shows': ['reveals', 'presents', 'exhibits', 'demonstrates'],
                'displays': ['presents', 'exhibits', 'shows', 'reveals']
            }

            # 設置需要移除的前綴短語
            self.prefixes_to_remove = [
                "Here's the enhanced description:",
                "Enhanced description:",
                "Here is the enhanced scene description:",
                "I've enhanced the description while preserving all factual details:",
                "Enhanced Description:",
                "Scene Description:",
                "Description:",
                "Here is the enhanced description:",
                "Here's the enhanced description:",
                "Here is a rewritten scene description that adheres to the provided critical rules:",
                "Here is the rewritten scene description:",
                "Here's a rewritten scene description:",
                "The rewritten scene description is as follows:",
                "indoor,",
                "outdoor,",
                "indoor ",
                "outdoor "
            ]

            # 設置需要移除的後綴短語
            self.suffixes_to_remove = [
                "I've maintained all the key factual elements",
                "I've preserved all the factual details",
                "All factual elements have been maintained"
            ]

            # 設置重複檢測模式
            self.repetitive_patterns = [
                (r'\b(visible)\b.*?\b(visible)\b', 'Multiple uses of "visible" detected'),
                (r'\b(positioned)\b.*?\b(positioned)\b', 'Multiple uses of "positioned" detected'),
                (r'\b(located)\b.*?\b(located)\b', 'Multiple uses of "located" detected'),
                (r'\b(situated)\b.*?\b(situated)\b', 'Multiple uses of "situated" detected'),
                (r'\b(appears)\b.*?\b(appears)\b', 'Multiple uses of "appears" detected'),
                (r'\b(features)\b.*?\b(features)\b', 'Multiple uses of "features" detected'),
                (r'\bThis\s+(\w+)\s+.*?\bThis\s+\1\b', 'Repetitive sentence structure detected')
            ]

            # 斜線組合的形容詞替換字典(有時會有斜線格式問題)
            self.slash_replacements = {
                'sunrise/sunset': 'warm lighting',
                'sunset/sunrise': 'warm lighting',
                'day/night': 'ambient lighting',
                'night/day': 'ambient lighting',
                'morning/evening': 'soft lighting',
                'evening/morning': 'soft lighting',
                'dawn/dusk': 'gentle lighting',
                'dusk/dawn': 'gentle lighting',
                'sunny/cloudy': 'natural lighting',
                'cloudy/sunny': 'natural lighting',
                'bright/dark': 'varied lighting',
                'dark/bright': 'varied lighting',
                'light/shadow': 'contrasting illumination',
                'shadow/light': 'contrasting illumination',
                'indoor/outdoor': 'mixed environment',
                'outdoor/indoor': 'mixed environment',
                'inside/outside': 'transitional space',
                'outside/inside': 'transitional space',
                'urban/rural': 'diverse landscape',
                'rural/urban': 'diverse landscape',
                'modern/traditional': 'architectural blend',
                'traditional/modern': 'architectural blend',
                'old/new': 'varied architecture',
                'new/old': 'varied architecture',
                'busy/quiet': 'dynamic atmosphere',
                'quiet/busy': 'dynamic atmosphere',
                'crowded/empty': 'varying occupancy',
                'empty/crowded': 'varying occupancy',
                'hot/cold': 'comfortable temperature',
                'cold/hot': 'comfortable temperature',
                'wet/dry': 'mixed conditions',
                'dry/wet': 'mixed conditions',
                'summer/winter': 'seasonal atmosphere',
                'winter/summer': 'seasonal atmosphere',
                'spring/autumn': 'transitional season',
                'autumn/spring': 'transitional season',
                'left/right': 'balanced composition',
                'right/left': 'balanced composition',
                'near/far': 'layered perspective',
                'far/near': 'layered perspective',
                'high/low': 'varied elevation',
                'low/high': 'varied elevation',
                'big/small': 'diverse scale',
                'small/big': 'diverse scale',
                'wide/narrow': 'varied width',
                'narrow/wide': 'varied width',
                'open/closed': 'flexible space',
                'closed/open': 'flexible space',
                'public/private': 'community space',
                'private/public': 'community space',
                'formal/informal': 'relaxed setting',
                'informal/formal': 'relaxed setting',
                'commercial/residential': 'mixed-use area',
                'residential/commercial': 'mixed-use area'
            }

            # 新增：擴展的底線替換字典
            self.underscore_replacements = {
                'urban_intersection': 'urban intersection',
                'tourist_landmark': 'tourist landmark',
                'historical_site': 'historical site',
                'religious_building': 'religious building',
                'natural_landmark': 'natural landmark',
                'commercial_area': 'commercial area',
                'residential_area': 'residential area',
                'public_space': 'public space',
                'outdoor_scene': 'outdoor scene',
                'indoor_scene': 'indoor scene',
                'street_scene': 'street scene',
                'city_center': 'city center',
                'shopping_district': 'shopping district',
                'business_district': 'business district',
                'traffic_light': 'traffic light',
                'street_lamp': 'street lamp',
                'parking_meter': 'parking meter',
                'fire_hydrant': 'fire hydrant',
                'bus_stop': 'bus stop',
                'train_station': 'train station',
                'police_car': 'police car',
                'fire_truck': 'fire truck',
                'school_bus': 'school bus',
                'time_of_day': 'time of day',
                'weather_condition': 'weather condition',
                'lighting_condition': 'lighting condition',
                'atmospheric_condition': 'atmospheric condition',
                'human_activity': 'human activity',
                'pedestrian_traffic': 'pedestrian traffic',
                'vehicle_traffic': 'vehicle traffic',
                'social_gathering': 'social gathering',
                'object_detection': 'object detection',
                'scene_analysis': 'scene analysis',
                'image_classification': 'image classification',
                'computer_vision': 'computer vision'
            }

            self.logger.info("Cleaning rules initialized successfully")

        except Exception as e:
            error_msg = f"Failed to initialize cleaning rules: {str(e)}"
            self.logger.error(error_msg)
            self.logger.error(traceback.format_exc())
            raise ResponseProcessingError(error_msg) from e

    def clean_response(self, response: str, model_type: str = "general") -> str:
        if not response:
            raise ResponseProcessingError("Empty response provided for cleaning")

        try:
            # 調試：記錄清理前的原始回應
            self.logger.info(f"DEBUG: Response before cleaning: {response}")

            self.logger.debug(f"Starting response cleaning (original length: {len(response)})")

            # 保存原始回應作為備份
            original_response = response

            # 根據模型類型選擇清理策略
            if "llama" in model_type.lower():
                cleaned_response = self._clean_llama_response(response)
            else:
                cleaned_response = self._clean_general_response(response)

            # 調試：記錄清理後的回應
            self.logger.info(f"DEBUG: Response after cleaning: {cleaned_response}")

            # 如果清理後內容過短，嘗試從原始回應中恢復
            if len(cleaned_response.strip()) < 40:
                self.logger.warning("Cleaned response too short, attempting recovery")
                cleaned_response = self._recover_from_overcleaning(original_response)

            # 最終驗證
            self._validate_cleaned_response(cleaned_response)

            self.logger.debug(f"Response cleaning completed (final length: {len(cleaned_response)})")
            return cleaned_response

        except Exception as e:
            error_msg = f"Response cleaning failed: {str(e)}"
            self.logger.error(error_msg)
            self.logger.error(traceback.format_exc())
            raise ResponseProcessingError(error_msg) from e

    def _clean_llama_response(self, response: str) -> str:
        """
        專門處理Llama模型的回應清理

        Args:
            response: 原始Llama回應

        Returns:
            str: 清理後的回應
        """
        # 首先應用通用清理
        response = self._clean_general_response(response)

        # Llama特有的前綴清理
        llama_prefixes = [
            "Here's the enhanced description:",
            "Enhanced description:",
            "Here is the enhanced scene description:",
            "I've enhanced the description while preserving all factual details:"
        ]

        for prefix in llama_prefixes:
            if response.lower().startswith(prefix.lower()):
                response = response[len(prefix):].strip()

        # Llama特有的後綴清理
        llama_suffixes = [
            "I've maintained all the key factual elements",
            "I've preserved all the factual details",
            "All factual elements have been maintained"
        ]

        for suffix in llama_suffixes:
            if response.lower().endswith(suffix.lower()):
                response = response[:response.rfind(suffix)].strip()

        return response

    def _clean_general_response(self, response: str) -> str:
        """
        通用回應清理方法

        Args:
            response: 原始回應

        Returns:
            str: 清理後的回應
        """
        response = self._critical_format_preprocess(response)

        # 1. 移除系統remark
        response = self._remove_system_markers(response)

        # 2. 移除介紹性prefix
        response = self._remove_introduction_prefixes(response)

        # 3. 移除格式標記和上下文標籤
        response = self._remove_format_markers(response)

        # 4. 清理場景類型引用
        response = self._clean_scene_type_references(response)

        # 5. 標準化標點符號
        response = self._normalize_punctuation(response)

        # 6. 移除重複句子
        response = self._remove_duplicate_sentences(response)

        # 7. 處理重複詞彙
        response = self._handle_repetitive_vocabulary(response)

        # 8. ensure completement
        response = self._ensure_grammatical_completeness(response)

        # 9. 控制字數長度
        response = self._control_word_length(response)

        # 10. 最終格式化
        response = self._final_formatting(response)

        return response


    def _critical_format_preprocess(self, response: str) -> str:
        """
        關鍵格式預處理，處理最常見的格式問題
    
        Args:
            response: 原始回應
    
        Returns:
            str: 預處理後的回應
        """
        if not response:
            return response
    
        try:
            import re
            # 移除各種形式的 confirmed
            confirmed_patterns = [
                r'\bconfirmed\s+',  # "confirmed cars" -> "cars"
                r'\b(\d+)\s+confirmed\s+([a-zA-Z\s]+)',  # "12 confirmed cars" -> "12 cars"
                r'\b(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s+confirmed\s+([a-zA-Z\s]+)',  # "twelve confirmed cars" -> "twelve cars"
            ]
            
            for pattern in confirmed_patterns:
                if pattern == r'\bconfirmed\s+':
                    response = re.sub(pattern, '', response, flags=re.IGNORECASE)
                else:
                    response = re.sub(pattern, r'\1 \2', response, flags=re.IGNORECASE)
    
            # 數字轉文字的完整字典
            number_conversions = {
                '0': 'zero', '1': 'one', '2': 'two', '3': 'three', '4': 'four', '5': 'five',
                '6': 'six', '7': 'seven', '8': 'eight', '9': 'nine', '10': 'ten',
                '11': 'eleven', '12': 'twelve', '13': 'thirteen', '14': 'fourteen', '15': 'fifteen',
                '16': 'sixteen', '17': 'seventeen', '18': 'eighteen', '19': 'nineteen', '20': 'twenty'
            }
    
            # 強化數字替換邏輯 - 處理各種語法結構
            for digit, word in number_conversions.items():
                # 模式1: 數字 + 名詞 (如 "3 cars", "12 people")
                pattern1 = rf'\b{digit}\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\b'
                response = re.sub(pattern1, rf'{word} \1', response)
    
                # 模式2: 數字 + visible/present + 名詞 (如 "3 visible traffic lights")
                pattern2 = rf'\b{digit}\s+(visible|present|apparent|evident)\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\b'
                response = re.sub(pattern2, rf'{word} \1 \2', response, flags=re.IGNORECASE)
    
                # 模式3: 介詞 + 數字 + 名詞 (如 "against a backdrop of 3 visible traffic lights")
                pattern3 = rf'\b(against|with|featuring|including|containing)\s+(?:a\s+backdrop\s+of\s+)?{digit}\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\b'
                response = re.sub(pattern3, rf'\1 {word} \2', response, flags=re.IGNORECASE)
    
                # 模式4: 複合描述中的數字 (如 "featuring twelve confirmed cars and 3 confirmed persons")
                pattern4 = rf'\b(and|,)\s+{digit}\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\b'
                response = re.sub(pattern4, rf'\1 {word} \2', response, flags=re.IGNORECASE)
                
            grammar_fixes = [
                # persons -> people 的全面修正
                (r'\b(\d+|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty)\s+persons\b', r'\1 people'),
                (r'\bmultiple\s+persons\b', 'multiple people'),
                (r'\bseveral\s+persons\b', 'several people'),
                (r'\bmany\s+persons\b', 'many people'),
                (r'\ba\s+few\s+persons\b', 'a few people'),
                (r'\bsome\s+persons\b', 'some people'),
                (r'\bvarious\s+persons\b', 'various people'),
                (r'\bnumerous\s+persons\b', 'numerous people'),
                
                # 修正語法結構問題
                (r'\bvisible\s+traffic\s+lights\b', 'traffic lights visible'),
                (r'\bpresent\s+traffic\s+lights\b', 'traffic lights present'),
                (r'\bapparent\s+traffic\s+lights\b', 'traffic lights apparent'),
                
                # 修正重複的形容詞結構
                (r'\b(visible|present|apparent|evident)\s+(visible|present|apparent|evident)\s+', r'\1 '),
            ]
    
            for pattern, replacement in grammar_fixes:
                response = re.sub(pattern, replacement, response, flags=re.IGNORECASE)

            # 首先處理已知的斜線組合，使用形容詞替換
            for slash_combo, replacement in self.slash_replacements.items():
                if slash_combo.lower() in response.lower():
                    # 保持原始大小寫格式
                    if slash_combo.upper() in response:
                        replacement_formatted = replacement.upper()
                    elif slash_combo.title() in response:
                        replacement_formatted = replacement.title()
                    else:
                        replacement_formatted = replacement
    
                    # 執行替換（不區分大小寫）
                    response = re.sub(re.escape(slash_combo), replacement_formatted, response, flags=re.IGNORECASE)
    
            # 處理其他未預定義的斜線模式
            slash_pattern = r'\b([a-zA-Z]+)/([a-zA-Z]+)\b'
            matches = list(re.finditer(slash_pattern, response))
            for match in reversed(matches):  # 從後往前處理避免位置偏移
                word1, word2 = match.groups()
                # 選擇較短或更常見的詞作為替換
                if len(word1) <= len(word2):
                    replacement = word1
                else:
                    replacement = word2
                response = response[:match.start()] + replacement + response[match.end():]
    
            # 首先處理已知的底線組合
            for underscore_combo, replacement in self.underscore_replacements.items():
                if underscore_combo in response:
                    response = response.replace(underscore_combo, replacement)
    
            # 處理三個詞的底線組合：word_word_word → word word word
            response = re.sub(r'\b([a-z]+)_([a-z]+)_([a-z]+)\b', r'\1 \2 \3', response)
    
            # 處理任何剩餘的底線模式：word_word → word word
            response = re.sub(r'\b([a-zA-Z]+)_([a-zA-Z]+)\b', r'\1 \2', response)
    
            # 確保句子的完整性
            incomplete_sentence_fixes = [
                (r'\bIn\s*,\s*', 'Throughout the area, '),
                (r'\bOverall,\s+exudes\b', 'Overall, the scene exudes'),
                (r'\bThe overall atmosphere of\s+is\b', 'The overall atmosphere'),
                (r'\bwith its lights turned illuminating\b', 'with its lights illuminating'),
                (r'\bwhere it stands as\b', 'where it stands as'),
            ]
    
            for pattern, replacement in incomplete_sentence_fixes:
                response = re.sub(pattern, replacement, response, flags=re.IGNORECASE)
    
            # 清理多餘空格並確保格式一致性 
            response = re.sub(r'\s+', ' ', response).strip()
    
            return response
    
        except Exception as e:
            self.logger.warning(f"Error in critical format preprocessing: {str(e)}")
            return response

    def _remove_system_markers(self, response: str) -> str:
        """移除系統樣式標記"""
        # 移除對話remark
        response = re.sub(r'<\|.*?\|>', '', response)

        # 移除輸出remark
        output_start = response.find("[OUTPUT_START]")
        output_end = response.find("[OUTPUT_END]")
        if output_start != -1 and output_end != -1 and output_end > output_start:
            response = response[output_start + len("[OUTPUT_START]"):output_end].strip()

        # 移除其他remark
        section_markers = [
            r'\[.*?\]',
            r'OUTPUT_START\s*:|OUTPUT_END\s*:',
            r'ENHANCED DESCRIPTION\s*:',
            r'Scene Type\s*:.*?(?=\n|$)',
            r'Original Description\s*:.*?(?=\n|$)',
            r'GOOD\s*:|BAD\s*:',
            r'PROBLEM\s*:.*?(?=\n|$)',
            r'</?\|(?:assistant|system|user)\|>',
            r'\(Note:.*?\)',
            r'\(.*?I\'ve.*?\)',
            r'\(.*?as per your request.*?\)'
        ]

        for marker in section_markers:
            response = re.sub(marker, '', response, flags=re.IGNORECASE)

        return response

    def _remove_introduction_prefixes(self, response: str) -> str:
        """
        移除介紹性前綴，強化對多種模式的處理。
        """
        if not response:
            return ""

        cleaned_response = response.strip()

        # 1. 將所有要移除的前綴模式合併成一個大的正則表達式
        #    - r'^(?: ... )' 表示從字串開頭匹配非捕獲分組
        #    - '|' 用於分隔不同的模式
        #    - re.escape() 用於安全地處理 self.prefixes_to_remove 中的特殊字符
        #    - `\\s*,?` 處理可選的逗號和空格
        #    - `\\s*` 處理結尾的任意空格
        all_prefix_patterns = [
            r'Here\s+is\s+(?:a\s+|the\s+)?(?:rewritten\s+|enhanced\s+)?scene\s+description.*?:',
            r'The\s+(?:rewritten\s+|enhanced\s+)?(?:scene\s+)?description\s+is.*?:',
            r'Here\'s\s+(?:a\s+|the\s+)?(?:rewritten\s+|enhanced\s+)?description.*?:',

            # 這個模式會匹配這些詞，無論後面是逗號還是空格
            r'(?:indoor|outdoor|inside|outside)\s*,?'
        ]

        # 將 self.prefixes_to_remove 中的字符串也轉換為正則表達式模式
        # 確保 self.prefixes_to_remove 存在，否則提供一個空列表
        prefixes_to_add = getattr(self, 'prefixes_to_remove', [])
        for prefix in prefixes_to_add:
            # 使用 re.escape 來確保前綴中的任何特殊字符被正確處理
            all_prefix_patterns.append(re.escape(prefix))

        cleaned_response = re.sub(r'^(?:indoor|outdoor|inside|outside)\s*,?\s*', '', cleaned_response, flags=re.IGNORECASE).strip()

        # 將所有模式用 '|' 連接起來，形成一個大的組合模式
        # 我們在模式的結尾加上 \\s* 來匹配並移除前綴後可能跟隨的空格
        combined_pattern = r'^(?:' + '|'.join(all_prefix_patterns) + r')\s*'

        # 2. 執行一次性的替換，並忽略大小寫
        # 這一行程式碼會移除所有匹配到的前綴
        cleaned_response = re.sub(combined_pattern, '', cleaned_response, flags=re.IGNORECASE).strip()

        # 3. 確保首字母大寫
        # 移除前綴後，新的句首可能變成小寫, 這邊得修正
        if cleaned_response:
            cleaned_response = cleaned_response[0].upper() + cleaned_response[1:]

        return cleaned_response

    def _remove_format_markers(self, response: str) -> str:
        """移除格式標記和上下文標籤（保留括號內的地理與細節資訊）"""
        # 移除上下文相關remark
        response = re.sub(r'<\s*Context:.*?>', '', response)
        response = re.sub(r'Context:.*?(?=\n|$)', '', response)
        response = re.sub(r'Note:.*?(?=\n|$)', '', response, flags=re.IGNORECASE)

        # 移除Markdown格式
        response = re.sub(r'\*\*|\*|__|\|', '', response)

        # 移除任何剩餘的特殊標記 (避開括號內容，以免剔除地理位置等有用資訊)
        response = re.sub(r'</?\|.*?\|>', '', response)
        # ※ 以下移除「刪除整個括號及其內文」的方式已註解，以保留地理位置資訊
        # response = re.sub(r'\(.*?\)', '', response)

        return response


    def _clean_scene_type_references(self, response: str) -> str:
        """清理不當的場景類型引用"""
        scene_type_pattern = r'This ([a-zA-Z_]+) (features|shows|displays|contains)'
        match = re.search(scene_type_pattern, response)
        if match and '_' in match.group(1):
            fixed_text = f"This scene {match.group(2)}"
            response = re.sub(scene_type_pattern, fixed_text, response)

        return response

    def _normalize_punctuation(self, response: str) -> str:
        """標準化標點符號"""
        # 減少破折號使用
        response = re.sub(r'—', ', ', response)
        response = re.sub(r' - ', ', ', response)

        # 處理連續標點符號
        response = re.sub(r'([.,;:!?])\1+', r'\1', response)

        # 修復不完整句子的標點
        response = re.sub(r',\s*$', '.', response)

        # 修復句號後缺少空格的問題
        response = re.sub(r'([.!?])([A-Z])', r'\1 \2', response)

        # 清理多餘空格和換行
        response = response.replace('\r', ' ')
        response = re.sub(r'\n+', ' ', response)
        response = re.sub(r'\s{2,}', ' ', response)

        return response


    def _remove_duplicate_sentences(self, response: str, similarity_threshold: float = 0.85) -> str:
        """
        移除重複或高度相似的句子，使用 Jaccard 相似度進行比較。
        Args:
            response: 原始回應文本。
            similarity_threshold: 認定句子重複的相似度閾值 (0.0 到 1.0)。
                                  較高的閾值表示句子需要非常相似才會被移除。
        Returns:
            str: 移除重複句子後的文本。
        """
        try:
            if not response or not response.strip():
                return ""

            # (?<=[.!?]) 會保留分隔符在句尾, \s+ 會消耗句尾的空格
            # 這樣用 ' ' join 回去時, 標點和下個句子間剛好一個空格
            sentences = re.split(r'(?<=[.!?])\s+', response.strip())

            unique_sentences_data = [] # Store tuples of (original_sentence, simplified_word_set)

            min_sentence_len_for_check = 8 # 簡化後詞彙數少於此值，除非完全相同否則不輕易判斷為重複

            for sentence in sentences:
                sentence = sentence.strip()
                if not sentence:
                    continue

                # 創建簡化版本用於比較 (小寫，移除標點，分割為詞彙集合)
                # 保留數字，因為數字可能是關鍵資訊
                simplified_text = re.sub(r'[^\w\s\d]', '', sentence.lower())
                current_sentence_words = set(simplified_text.split())

                if not current_sentence_words: # 如果處理後是空集合，跳過
                    continue

                is_duplicate = False
                # 與已保留的唯一句子比較
                for i, (kept_sentence_text, kept_sentence_words) in enumerate(unique_sentences_data):
                    # Jaccard Index
                    intersection_len = len(current_sentence_words.intersection(kept_sentence_words))
                    union_len = len(current_sentence_words.union(kept_sentence_words))

                    if union_len == 0: # 兩個都是空集合，代表相同句子
                        jaccard_similarity = 1.0
                    else:
                        jaccard_similarity = intersection_len / union_len

                    # 用Jaccard 相似度超過閾值，不是兩個都非常短的句子 (避免 "Yes." 和 "No." 被錯誤合併)
                    # 新句子完全被舊句子包含 (且舊句子更長)
                    # 舊句子完全被新句子包含 (且新句子更長) -> 這種情況就需要替換
                    if jaccard_similarity >= similarity_threshold:
                        # 如果當前句子比已保留的句子短，且高度相似，則認為是重複
                        if len(current_sentence_words) < len(kept_sentence_words):
                            is_duplicate = True
                            self.logger.debug(f"Sentence \"{sentence[:30]}...\" marked duplicate (shorter, similar to \"{kept_sentence_text[:30]}...\") Jaccard: {jaccard_similarity:.2f}")
                            break
                        # 如果當前句子比已保留的句子長，且高度相似，則替換掉已保留的
                        elif len(current_sentence_words) > len(kept_sentence_words):
                            self.logger.debug(f"Sentence \"{kept_sentence_text[:30]}...\" replaced by longer similar sentence \"{sentence[:30]}...\" Jaccard: {jaccard_similarity:.2f}")
                            unique_sentences_data.pop(i) # 移除舊的、較短的句子

                        # 如果長度差不多，但相似度高，保留第一個出現的
                        elif current_sentence_words != kept_sentence_words : # 避免完全相同的句子被錯誤地跳過替換邏輯
                             is_duplicate = True # 保留先出現的
                             self.logger.debug(f"Sentence \"{sentence[:30]}...\" marked duplicate (similar length, similar to \"{kept_sentence_text[:30]}...\") Jaccard: {jaccard_similarity:.2f}")
                             break

                if not is_duplicate:
                    unique_sentences_data.append((sentence, current_sentence_words))

            # 重組唯一句子
            final_sentences = [s_data[0] for s_data in unique_sentences_data]

            # 確保每個句子以標點結尾 (因為 split 可能會產生沒有標點的最後一個片段)
            reconstructed_response = ""
            for i, s in enumerate(final_sentences):
                s = s.strip()
                if not s: continue
                if not s[-1] in ".!?":
                    s += "."
                reconstructed_response += s
                if i < len(final_sentences) - 1:
                     reconstructed_response += " " # 在句子間添加空格

            return reconstructed_response.strip()

        except Exception as e:
            self.logger.error(f"Error in _remove_duplicate_sentences: {str(e)}")
            self.logger.error(traceback.format_exc())
            return response # 發生錯誤時返回原始回應

    def _handle_repetitive_vocabulary(self, response: str) -> str:
        """處理重複詞彙，使用改進的檢測和替換機制"""
        try:
            # 先進行重複模式檢測（記錄但不直接處理）
            if hasattr(self, 'repetitive_patterns'):
                for pattern, issue in self.repetitive_patterns:
                    matches = list(re.finditer(pattern, response, re.IGNORECASE | re.DOTALL))
                    if matches:
                        self.logger.warning(f"Text quality issue detected: {issue} in response: \"{response[:100]}...\"")
    
            if not hasattr(self, 'replacement_alternatives') or not self.replacement_alternatives:
                return response
    
            processed_response = response
    
            # 強化的重複詞彙處理
            for word_to_replace, alternatives in self.replacement_alternatives.items():
                if not alternatives:
                    continue
    
                # 創建更精確的詞彙匹配模式
                word_pattern = re.compile(r'\b' + re.escape(word_to_replace) + r'\b', re.IGNORECASE)
                matches = list(word_pattern.finditer(processed_response))
                
                if len(matches) <= 1:
                    continue  # 如果只出現一次或沒有出現，跳過
    
                # 對於多次出現的情況，進行智能替換
                replacement_count = 0
                alternative_index = 0
                
                def smart_replacer(match_obj):
                    nonlocal replacement_count, alternative_index
                    replacement_count += 1
                    original_word = match_obj.group(0)
                    
                    # 第一次出現保持原樣，後續出現進行替換
                    if replacement_count == 1:
                        return original_word
                    
                    # 選擇適當的替代詞
                    replacement = alternatives[alternative_index % len(alternatives)]
                    alternative_index += 1
                    
                    # 保持原始大小寫格式
                    if original_word.isupper():
                        return replacement.upper()
                    elif original_word.istitle():
                        return replacement.capitalize()
                    return replacement
    
                processed_response = word_pattern.sub(smart_replacer, processed_response)
    
            # === 新增：專門處理 "positioned" 的特殊邏輯 ===
            # 由於 "positioned" 經常出現問題，給予特別處理
            positioned_pattern = r'\b(positioned)\b'
            positioned_matches = re.findall(positioned_pattern, processed_response, re.IGNORECASE)
            
            if len(positioned_matches) > 1:
                # 替換除了第一個以外的所有 "positioned"
                positioned_alternatives = ['arranged', 'placed', 'set', 'located', 'situated']
                replacement_counter = 0
                
                def positioned_replacer(match):
                    nonlocal replacement_counter
                    if replacement_counter == 0:
                        replacement_counter += 1
                        return match.group(0)  # 保持第一個不變
                    else:
                        alt_index = (replacement_counter - 1) % len(positioned_alternatives)
                        replacement_counter += 1
                        original = match.group(0)
                        new_word = positioned_alternatives[alt_index]
                        
                        # 保持大小寫格式
                        if original.isupper():
                            return new_word.upper()
                        elif original.istitle():
                            return new_word.capitalize()
                        return new_word
                
                processed_response = re.sub(positioned_pattern, positioned_replacer, processed_response, flags=re.IGNORECASE)
    
            # 移除 identical 等重複性描述詞彙
            identical_cleanup_patterns = [
                (r'\b(\d+)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
                (r'\b(two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
                (r'\bidentical\s+([a-zA-Z\s]+)', r'\1'),
                (r'\bcomprehensive arrangement of\b', 'arrangement of'),
                (r'\bcomprehensive view featuring\b', 'scene featuring'),
                (r'\bcomprehensive display of\b', 'display of'),
            ]
    
            for pattern, replacement in identical_cleanup_patterns:
                processed_response = re.sub(pattern, replacement, processed_response, flags=re.IGNORECASE)
    
            # 數字到文字轉換（保持原有邏輯）
            number_conversions = {
                '2': 'two', '3': 'three', '4': 'four', '5': 'five', '6': 'six',
                '7': 'seven', '8': 'eight', '9': 'nine', '10': 'ten',
                '11': 'eleven', '12': 'twelve'
            }
    
            for digit, word in number_conversions.items():
                # 各種數字模式的處理
                patterns_to_fix = [
                    (rf'\b{digit}\s+([a-zA-Z]+s)\b', rf'{word} \1'),
                    (rf'\b{digit}\s+(more|additional|other|identical)\s+([a-zA-Z]+s)\b', rf'{word} \1 \2'),
                    (rf'\b{digit}\s+([a-zA-Z]+)\s+([a-zA-Z]+s)\b', rf'{word} \1 \2'),
                    (rf'\b(around|approximately|about)\s+{digit}\s+([a-zA-Z]+s)\b', rf'\1 {word} \2'),
                ]
                
                for pattern, replacement in patterns_to_fix:
                    processed_response = re.sub(pattern, replacement, processed_response, flags=re.IGNORECASE)
    
            return processed_response
    
        except Exception as e:
            self.logger.error(f"Error in _handle_repetitive_vocabulary: {str(e)}")
            self.logger.error(traceback.format_exc())
            return response

    def _ensure_grammatical_completeness(self, response: str) -> str:
        """
        確保語法完整性，處理不完整句子和格式問題

        Args:
            response: 待檢查的回應文本

        Returns:
            str: 語法完整的回應文本
        """
        try:
            if not response or not response.strip():
                return response

            # 第一階段：檢查並修正不完整的句子模式
            incomplete_patterns = [
                # 介詞後直接結束的問題（針對 "over ." 等情況）
                (r'\b(over|under|through|across|along|beneath|beyond|throughout)\s*\.', 'incomplete_preposition'),
                (r'\b(with|without|against|towards|beside|between|among)\s*\.', 'incomplete_preposition'),
                (r'\b(into|onto|upon|within|behind|below|above)\s*\.', 'incomplete_preposition'),

                # 處理 "In ," 這類缺失詞彙的問題
                (r'\bIn\s*,', 'incomplete_location'),
                (r'\bAt\s*,', 'incomplete_location'),
                (r'\bOn\s*,', 'incomplete_location'),
                (r'\bWith\s*,', 'incomplete_context'),

                # 不完整的描述模式
                (r'\b(fine|the)\s+(the\s+)?(?:urban|area|scene)\b(?!\s+\w)', 'incomplete_description'),

                # 連詞或介詞後直接標點的問題
                (r'\b(and|or|but|with|from|in|at|on|by|for|to)\s*[.!?]', 'incomplete_conjunction'),

                # 重複詞彙
                (r'\b(\w+)\s+\1\b', 'word_repetition'),

                # 不完整的場景類型引用（如 "urban_intersection" 格式問題）
                (r'\b(\w+)_(\w+)\b', 'underscore_format'),

                # 地標場景特有問題
                (r'\btourist_landmark\b', 'underscore_format'),
                (r'\burban_intersection\b', 'underscore_format'),
                (r'\bIn\s*,\s*(?=\w)', 'incomplete_prepositional'),
                (r'\bOverall,\s+(?=exudes|shows|displays)(?!\s+(?:the|this|it))', 'missing_subject'),
                (r'\batmosphere of\s+is one of\b', 'redundant_structure'),
                (r'\bwith.*?turned\s+illuminating\b', 'redundant_participle')
            ]

            for pattern, issue_type in incomplete_patterns:
                try:
                    matches = list(re.finditer(pattern, response, re.IGNORECASE))

                    for match in matches:
                        if issue_type == 'incomplete_preposition':
                            # 處理介詞後直接結束的情況
                            response = self._fix_incomplete_preposition(response, match)

                        elif issue_type == 'underscore_format':
                            # 將下劃線格式轉換為空格分隔
                            original = match.group(0)
                            replacement = original.replace('_', ' ')
                            response = response.replace(original, replacement)

                        elif issue_type == 'word_repetition':
                            # 移除重複的詞彙
                            repeated_word = match.group(1)
                            response = response.replace(f"{repeated_word} {repeated_word}", repeated_word)

                        elif issue_type == 'incomplete_location' or issue_type == 'incomplete_context':
                            # 移除不完整的位置或上下文引用
                            response = response.replace(match.group(0), '')

                        elif issue_type == 'incomplete_prepositional':
                            # 處理不完整的介詞短語
                            response = re.sub(r'\bIn\s*,\s*', 'Throughout the scene, ', response)

                        elif issue_type == 'missing_subject':
                            # 為Overall句子添加主語
                            response = re.sub(r'\bOverall,\s+(?=exudes)', 'Overall, the scene ', response)

                        elif issue_type == 'redundant_structure':
                            # 簡化冗餘結構
                            response = re.sub(r'\batmosphere of\s+is one of\b', 'atmosphere is one of', response)

                        elif issue_type == 'redundant_participle':
                            # 清理冗餘分詞
                            response = re.sub(r'turned\s+illuminating', 'illuminating', response)

                        else:
                            # 對於其他不完整模式，直接移除
                            response = response.replace(match.group(0), '')

                    # 清理多餘空格
                    response = re.sub(r'\s{2,}', ' ', response).strip()

                except re.error as e:
                    self.logger.warning(f"Regular expression pattern error for {issue_type}: {pattern} - {str(e)}")
                    continue

            # 第二階段：處理物件類別格式問題
            response = self._clean_object_class_references(response)

            # 第三階段：確保句子正確結束
            response = self._ensure_proper_sentence_ending(response)

            # 第四階段：最終語法檢查
            response = self._final_grammar_check(response)

            return response.strip()

        except Exception as e:
            self.logger.error(f"Error in _ensure_grammatical_completeness: {str(e)}")
            return response

    def _fix_incomplete_preposition(self, response: str, match) -> str:
        """
        修正不完整的介詞短語

        Args:
            response: 回應文本
            match: 正則匹配對象

        Returns:
            str: 修正後的回應
        """
        preposition = match.group(1)
        match_start = match.start()

        # 找到句子的開始位置
        sentence_start = response.rfind('.', 0, match_start)
        sentence_start = sentence_start + 1 if sentence_start != -1 else 0

        # 提取句子片段
        sentence_fragment = response[sentence_start:match_start].strip()

        # 如果句子片段有意義，嘗試移除不完整的介詞部分
        if len(sentence_fragment) > 10:
            # 移除介詞及其後的內容，添加適當的句號
            response = response[:match_start].rstrip() + '.'
        else:
            # 如果句子片段太短，移除整個不完整的句子
            response = response[:sentence_start] + response[match.end():]

        return response

    def _clean_object_class_references(self, response: str) -> str:
        """
        清理物件類別引用中的格式問題

        Args:
            response: 回應文本

        Returns:
            str: 清理後的回應
        """
        # 移除類別ID引用（如 "unknown-class 2", "Class 0" 等）
        class_id_patterns = [
            r'\bunknown[- ]?class\s*\d+\s*objects?',
            r'\bclass[- ]?\d+\s*objects?',
            r'\b[Cc]lass\s*\d+\s*objects?',
            r'\bunknown[- ][Cc]lass\s*\d+\s*objects?'
        ]

        for pattern in class_id_patterns:
            try:
                # 替換為更自然的描述
                response = re.sub(pattern, 'objects', response, flags=re.IGNORECASE)
            except re.error as e:
                self.logger.warning(f"Error cleaning class reference pattern {pattern}: {str(e)}")
                continue

        # 處理數量描述中的問題
        response = re.sub(r'\b(\w+)\s+unknown[- ]?\w*\s*objects?', r'\1 objects', response, flags=re.IGNORECASE)

        return response

    def _ensure_proper_sentence_ending(self, response: str) -> str:
        """
        確保句子有適當的結尾

        Args:
            response: 回應文本

        Returns:
            str: 具有適當結尾的回應
        """
        if not response or not response.strip():
            return response

        response = response.strip()

        # 檢查是否以標點符號結尾
        if response and response[-1] not in ['.', '!', '?']:

            # 常見介詞和連詞列表
            problematic_endings = [
                "into", "onto", "about", "above", "across", "after", "along", "around",
                "at", "before", "behind", "below", "beneath", "beside", "between",
                "beyond", "by", "down", "during", "except", "for", "from", "in",
                "inside", "near", "of", "off", "on", "over", "through", "to",
                "toward", "under", "up", "upon", "with", "within", "and", "or", "but"
            ]

            words = response.split()
            if words:
                last_word = words[-1].lower().rstrip('.,!?')

                if last_word in problematic_endings:
                    # 找到最後完整的句子
                    last_period_pos = max(
                        response.rfind('.'),
                        response.rfind('!'),
                        response.rfind('?')
                    )

                    if last_period_pos > len(response) // 2:  # 如果有較近的完整句子
                        response = response[:last_period_pos + 1]
                    else:
                        # 移除問題詞彙並添加句號
                        if len(words) > 1:
                            response = " ".join(words[:-1]) + "."
                        else:
                            response = "The scene displays various elements."
                else:
                    # 正常情況下添加句號
                    response += "."

        return response

    def _final_grammar_check(self, response: str) -> str:
        """
        最終語法檢查和清理

        Args:
            response: 回應文本

        Returns:
            str: 最終清理後的回應
        """
        if not response:
            return response

        # 修正連續標點符號
        response = re.sub(r'([.!?]){2,}', r'\1', response)

        # 修正句號前的空格
        response = re.sub(r'\s+([.!?])', r'\1', response)

        # 修正句號後缺少空格的問題
        response = re.sub(r'([.!?])([A-Z])', r'\1 \2', response)

        # 確保首字母大寫
        if response and response[0].islower():
            response = response[0].upper() + response[1:]

        # 移除多餘的空格
        response = re.sub(r'\s{2,}', ' ', response)

        # 處理空句子或過短的回應
        if len(response.strip()) < 20:
            return "The scene contains various visual elements."

        return response.strip()

    def _control_word_length(self, response: str) -> str:
        """控制文字長度在合理範圍內，確保句子完整性"""
        words = response.split()
        
        # 提高基礎限制，給予更多彈性
        base_limit = 220
        extended_limit = 250
        
        if len(words) <= base_limit:
            return response
        
        # 首先嘗試在基礎限制內找到完整句子
        truncated = ' '.join(words[:base_limit])
        last_period = max(truncated.rfind('.'), truncated.rfind('!'), truncated.rfind('?'))
        
        # 如果在基礎限制內找到了適當的句子結尾
        if last_period > len(truncated) * 0.8:  # 確保截斷點不會太早
            result = truncated[:last_period+1]
            self.logger.info(f"Text truncated at {base_limit} words with proper sentence ending")
            return result
        
        # 如果基礎限制內沒有找到合適結尾，擴展搜尋範圍
        if len(words) > extended_limit:
            extended_truncated = ' '.join(words[:extended_limit])
            extended_last_period = max(
                extended_truncated.rfind('.'), 
                extended_truncated.rfind('!'), 
                extended_truncated.rfind('?')
            )
            
            # 在擴展範圍內找到合適的結尾
            if extended_last_period > len(extended_truncated) * 0.7:
                result = extended_truncated[:extended_last_period+1]
                self.logger.info(f"Text truncated at extended limit with proper sentence ending")
                return result
        
        # 如果仍然找不到合適的結尾，使用智能截斷
        # 尋找最後一個完整的句子或子句
        final_truncated = ' '.join(words[:base_limit])
        
        # 尋找可能的子句結尾（逗號後的位置）
        last_comma = final_truncated.rfind(',')
        last_semicolon = final_truncated.rfind(';')
        
        # 選擇最佳截斷點
        best_cutoff = max(last_period, last_comma, last_semicolon)
        
        if best_cutoff > len(final_truncated) * 0.6:
            # 如果是逗號或分號結尾，改為句號
            result = final_truncated[:best_cutoff]
            if result.endswith(',') or result.endswith(';'):
                result = result[:-1] + '.'
            elif not result.endswith(('.', '!', '?')):
                result += '.'
            
            self.logger.warning(f"Text truncated with intelligent cutoff at position {best_cutoff}")
            return result
        
        # 移除可能不完整的最後一個句子
        # 找到倒數第二個句子的結尾
        second_last_period = final_truncated.rfind('.', 0, last_period)
        if second_last_period > 0:
            result = final_truncated[:second_last_period+1]
            self.logger.warning("Text truncated by removing incomplete final sentence")
            return result
        
        # 如果所有方法都失敗，添加適合的結尾
        result = final_truncated.rstrip() + "."
        self.logger.warning("Text truncated with forced period ending")
        return result

    def _final_formatting(self, response: str) -> str:
        """最終格式化處理"""
        # 專門處理 "indoor," 前綴問題
        indoor_patterns = [
            r'^indoor\s*,\s*',
            r'^outdoor\s*,\s*',
            r'^inside\s*,\s*',
            r'^outside\s*,\s*',
            r'^indoor\s+',
            r'^outdoor\s+',
        ]

        for pattern in indoor_patterns:
            response = re.sub(pattern, '', response, flags=re.IGNORECASE)

        # 移除開頭的空白和標點符號
        response = re.sub(r'^[\s,;:.-]+', '', response)

        # 修復常見的語法問題
        response = self._fix_grammatical_issues(response)

        # 確保首字母大寫
        if response and response[0].islower():
            response = response[0].upper() + response[1:]

        # 統一格式為單一段落
        response = re.sub(r'\s*\n\s*', ' ', response)
        response = ' '.join(response.split())

        return response.strip()

    def _fix_grammatical_issues(self, response: str) -> str:
        """修復常見的語法問題"""
        if not response:
            return response

        # 修復不完整的句子開頭
        grammar_fixes = [
            # 修復 "A dining table with... A dining table..." 重複問題
            (r'\b(A|An)\s+([^.!?]*?)\s+\1\s+\2', r'\1 \2'),

            # 修復 "This scene presents a scene" 重複
            (r'\bThis scene presents a scene\b', 'This scene presents'),

            # 修復不完整的句子 "A dining table with four chairs and a dining table"
            (r'\b([A-Z][^.!?]*?)\s+and\s+a\s+\1\b', r'\1'),

            # 修復空的介詞短語
            (r'\bwith\s+with\b', 'with'),
            (r'\band\s+and\b', 'and'),

            # 確保句子完整性
            (r'(\w+)\s*\.\s*(\w+)', r'\1. \2'),
        ]

        for pattern, replacement in grammar_fixes:
            response = re.sub(pattern, replacement, response, flags=re.IGNORECASE)

        return response

    def _recover_from_overcleaning(self, original_response: str) -> str:
        """從過度清理中恢復內容"""
        try:
            # 嘗試從原始回應中找到最佳段落
            paragraphs = [p for p in original_response.split('\n\n') if p.strip()]
            if paragraphs:
                # 選擇最長的段落作為主要描述
                best_para = max(paragraphs, key=len)
                # 使用基本清理規則
                best_para = re.sub(r'\[.*?\]', '', best_para)
                best_para = re.sub(r'\s{2,}', ' ', best_para).strip()

                if len(best_para) >= 40:
                    return best_para

            return "Unable to generate a valid enhanced description."

        except Exception as e:
            self.logger.error(f"Recovery from overcleaning failed: {str(e)}")
            return "Description generation error."

    def _validate_cleaned_response(self, response: str):
        """驗證清理後的回應"""
        if not response:
            raise ResponseProcessingError("Response is empty after cleaning")

        if len(response.strip()) < 20:
            raise ResponseProcessingError("Response is too short after cleaning")

        # 檢查是否包含基本的句子結構
        if not re.search(r'[.!?]', response):
            raise ResponseProcessingError("Response lacks proper sentence structure")

    def remove_explanatory_notes(self, response: str) -> str:
        """
        移除解釋性注釋和說明，特別處理破碎的解釋性片段
    
        Args:
            response: 包含可能注釋的回應
    
        Returns:
            str: 移除注釋後的回應
        """
        try:
            if not response or not response.strip():
                return response
                
            # 第一步：移除完整的解釋性句子片段
            problematic_fragments = [
                # 處理 "strictly adhered to..." 相關片段
                r'\bstrictly\s+adhered\s+to\s+the\s+(?:critical\s+adherence\s+to\s+input\s+rule|or\s+inferences\s+beyond\s+the\s+explicitly\s+provided\s+information)\.?',
                
                # 處理 "or inferences beyond..." 片段
                r'\bor\s+inferences\s+beyond\s+the\s+explicitly\s+provided\s+information\.?',
                
                # 處理 "the mentioning only..." 片段  
                r'\bthe\s+mentioning\s+only\s+the\s+objects\s+and\s+their\s+locations\.?',
                
                # 處理 "avoided speculating..." 片段
                r'\bavoided\s+speculating\s+on\s+object\s+quantities,?\s*spatial\s+relationships,?\s*and\s+atmospheres,?\.?',
                
                # 處理更一般的解釋性片段
                r'\b(?:have\s+)?strictly\s+adhered\s+to.*?(?:information|rule)\.?',
                r'\b(?:have\s+)?followed\s+the.*?(?:whitelist|rule)\.?',
                r'\b(?:have\s+)?avoided\s+(?:any\s+)?(?:assumptions|speculation).*?\.?',
                r'\bmentioning\s+only\s+the\s+objects.*?\.?',
                
                # 處理孤立的片段詞組
                r'\bthe\s+mentioning\s+only\b',
                r'\bavoided\s+speculating\b',
                r'\bstrictly\s+adhered\s+to\s+the\b',
                r'\bor\s+inferences\s+beyond\b',
            ]
            
            cleaned_response = response
            for pattern in problematic_fragments:
                cleaned_response = re.sub(pattern, '', cleaned_response, flags=re.IGNORECASE)
            
            # 第二步：清理標點符號問題
            # 移除多餘的逗號和句號
            cleaned_response = re.sub(r'\s*,\s*,+\s*', ', ', cleaned_response)
            cleaned_response = re.sub(r'\s*\.+\s*\.+\s*', '. ', cleaned_response)
            cleaned_response = re.sub(r'\s*,\s*\.\s*', '. ', cleaned_response)
            
            # 修復句子結尾的孤立標點
            cleaned_response = re.sub(r'\s+,\s*$', '.', cleaned_response)
            cleaned_response = re.sub(r'\s+,\s*(?=\s+[A-Z])', '. ', cleaned_response)
            
            # 第三步：傳統的段落級處理
            traditional_note_patterns = [
                r'(?:^|\n)Note:.*?(?:\n|$)',
                r'(?:^|\n)I have (?:followed|adhered to|ensured).*?(?:\n|$)',
                r'(?:^|\n)This description (?:follows|adheres to|maintains).*?(?:\n|$)',
                r'(?:^|\n)The enhanced description (?:maintains|preserves).*?(?:\n|$)'
            ]
    
            # 尋找段落
            paragraphs = [p.strip() for p in cleaned_response.split('\n\n') if p.strip()]
    
            # 如果只有一個段落，檢查並清理它
            if len(paragraphs) == 1:
                for pattern in traditional_note_patterns:
                    paragraphs[0] = re.sub(pattern, '', paragraphs[0], flags=re.IGNORECASE)
                result = paragraphs[0].strip()
            else:
                # 如果有多個段落，移除注釋段落
                content_paragraphs = []
                for paragraph in paragraphs:
                    is_note = False
                    for pattern in traditional_note_patterns:
                        if re.search(pattern, paragraph, flags=re.IGNORECASE):
                            is_note = True
                            break
    
                    # 檢查段落是否以常見的注釋詞開頭
                    if paragraph.lower().startswith(('note:', 'please note:', 'remember:')):
                        is_note = True
    
                    if not is_note:
                        content_paragraphs.append(paragraph)
    
                result = '\n\n'.join(content_paragraphs).strip()
            
            # 第四步：最終清理和格式化
            if result:
                # 標準化空格
                result = re.sub(r'\s+', ' ', result)
                
                # 修復句子間的間距
                result = re.sub(r'([.!?])\s*([A-Z])', r'\1 \2', result)
                
                # 確保句子以適當的標點結尾
                result = result.strip()
                if result and not result.endswith(('.', '!', '?')):
                    result += '.'
                    
                return result
            
            # 如果結果為空，嘗試更保守的清理
            fallback_result = response
            conservative_patterns = [
                r'\bstrictly\s+adhered\s+to.*?information\.?',
                r'\bavoided\s+speculating.*?atmospheres,?\.?',
                r'\bthe\s+mentioning\s+only.*?locations\.?'
            ]
            
            for pattern in conservative_patterns:
                fallback_result = re.sub(pattern, '', fallback_result, flags=re.IGNORECASE)
            
            fallback_result = re.sub(r'\s+', ' ', fallback_result).strip()
            return fallback_result if fallback_result else response
    
        except Exception as e:
            self.logger.error(f"Failed to remove explanatory notes: {str(e)}")
            return response

    def get_processor_info(self) -> Dict[str, Any]:
        """
        獲取處理器信息

        Returns:
            Dict[str, Any]: 包含處理器狀態和配置的信息
        """
        return {
            "replacement_alternatives_count": len(self.replacement_alternatives),
            "prefixes_to_remove_count": len(self.prefixes_to_remove),
            "suffixes_to_remove_count": len(self.suffixes_to_remove),
            "repetitive_patterns_count": len(self.repetitive_patterns),
            "initialization_status": "success"
        }