Spaces:

DawnC
/

VisionScout

Running on Zero

App Files Files Community

DawnC commited on 26 days ago

Commit

30671de

verified ·

1 Parent(s): 1c7033a

Update response_processor.py

Browse files

Files changed (1) hide show

response_processor.py +32 -135

response_processor.py CHANGED Viewed

@@ -1218,157 +1218,54 @@ class ResponseProcessor:
         if not re.search(r'[.!?]', response):
             raise ResponseProcessingError("Response lacks proper sentence structure")
-    def remove_explanatory_notes(self, response: str) -> str:
         """
-        移除解釋性注釋和說明，採用多階段清理策略處理破碎片段
         Args:
             response: 包含可能注釋的回應
         Returns:
             str: 移除注釋後的回應
         """
         try:
-            if not response or not response.strip():
-                return response
-            original_response = response
-            # 階段1：移除明確的完整問題句型
-            complete_problem_patterns = [
-                # 完整的破碎句型（貪婪匹配）
-                r'Note\s+that\s+I\s+have\s*[,.\s]*.*?(?:natural\s+flow|concise\s+manner)[,.\s]*',
-                # 從 Note that 開始到句號結束的整個片段
-                r'Note\s+that\s+I\s+have\s*[,.\s]*.*?\.',
-                # 處理包含 avoiding assumptions 的整個片段
-                r'[,.\s]*avoiding\s+any\s+assumptions.*?(?:manner|flow|locations)[,.\s]*',
-            ]
-            cleaned_text = response
-            for pattern in complete_problem_patterns:
-                cleaned_text = re.sub(pattern, '', cleaned_text, flags=re.IGNORECASE | re.DOTALL)
-            # 階段2：移除具體的問題關鍵詞組合
-            specific_fragments = [
-                # 移除 "I have also" 相關片段
-                r'\bI\s+have\s+also\s*[,.\s]*(?:and\s+detail\s+accuracy\s+rule\s*[,.\s]*)?',
-                # 移除 "and their locations" 孤立片段
-                r'[,.\s]*and\s+their\s+locations[,.\s]*',
-                # 移除 "on describing in a clear" 片段
-                r'[,.\s]*on\s+describing\s+in\s+a\s+clear(?:\s+and\s+concise)?(?:\s+manner)?[,.\s]*',
-                # 移除 "detail accuracy rule" 相關
-                r'[,.\s]*(?:and\s+)?detail\s+accuracy\s+rule[,.\s]*',
-                # 移除孤立的 "avoiding any assumptions"
-                r'[,.\s]*avoiding\s+any\s+assumptions[,.\s]*',
-                # 移除 "Additionally, I have" 開頭的破碎片段
-                r'Additionally,?\s*I\s+have\s*[,.\s]*(?:and\s+have\s+focused\s*[,.\s]*)?',
-                # 移除 "using transitional phrases" 相關
-                r'[,.\s]*using\s+transitional\s+phrases(?:\s+and\s+varying\s+sentence\s+structures)?[,.\s]*',
-                # 移除 "to create a natural flow"
-                r'[,.\s]*to\s+create\s+a\s+natural\s+flow[,.\s]*',
-            ]
-            for pattern in specific_fragments:
-                cleaned_text = re.sub(pattern, '', cleaned_text, flags=re.IGNORECASE)
-            # 階段3：移除任何以問題關鍵詞開頭的殘留片段
-            problem_starters = [
-                r'^[,.\s]*Note\s+that.*?[,.\s]*',
-                r'^[,.\s]*I\s+have\s+(?:strictly\s+)?(?:adhered|followed|ensured).*?[,.\s]*',
-                r'^[,.\s]*avoiding\s+any.*?[,.\s]*',
-                r'^[,.\s]*Additionally.*?[,.\s]*',
-            ]
-            for pattern in problem_starters:
-                cleaned_text = re.sub(pattern, '', cleaned_text, flags=re.IGNORECASE | re.MULTILINE)
-            # 階段4：清理標點符號和格式問題
-            # 移除多餘的逗號和句號
-            cleaned_text = re.sub(r'\s*,\s*,+\s*', ', ', cleaned_text)
-            cleaned_text = re.sub(r'\s*\.+\s*\.+\s*', '. ', cleaned_text)
-            cleaned_text = re.sub(r'\s*,\s*\.\s*', '. ', cleaned_text)
-            # 移除開頭和結尾的標點符號
-            cleaned_text = re.sub(r'^[,.\s]+', '', cleaned_text)
-            cleaned_text = re.sub(r'[,.\s]+$', '', cleaned_text)
-            # 修復句子間的標點問題
-            cleaned_text = re.sub(r'([.!?])\s*,\s*([A-Z])', r'\1 \2', cleaned_text)
-            cleaned_text = re.sub(r',\s*([A-Z])', r'. \1', cleaned_text)
-            # 階段5：傳統段落級處理（保持原有邏輯）
-            traditional_note_patterns = [
                 r'(?:^|\n)Note:.*?(?:\n|$)',
                 r'(?:^|\n)I have (?:followed|adhered to|ensured).*?(?:\n|$)',
                 r'(?:^|\n)This description (?:follows|adheres to|maintains).*?(?:\n|$)',
                 r'(?:^|\n)The enhanced description (?:maintains|preserves).*?(?:\n|$)'
             ]
-            paragraphs = [p.strip() for p in cleaned_text.split('\n\n') if p.strip()]
             if len(paragraphs) == 1:
-                for pattern in traditional_note_patterns:
                     paragraphs[0] = re.sub(pattern, '', paragraphs[0], flags=re.IGNORECASE)
-                result = paragraphs[0].strip()
-            else:
-                content_paragraphs = []
-                for paragraph in paragraphs:
-                    is_note = False
-                    for pattern in traditional_note_patterns:
-                        if re.search(pattern, paragraph, flags=re.IGNORECASE):
-                            is_note = True
-                            break
-                    if paragraph.lower().startswith(('note:', 'please note:', 'remember:')):
                         is_note = True
-                    if not is_note:
-                        content_paragraphs.append(paragraph)
-                result = '\n\n'.join(content_paragraphs).strip()
-            # 階段6：最終驗證和格式化
-            if result:
-                # 標準化空格
-                result = re.sub(r'\s+', ' ', result)
-                # 確保句子以適當的標點結尾
-                result = result.strip()
-                if result and not result.endswith(('.', '!', '?')):
-                    result += '.'
-                # 最終檢查：如果結果太短，使用更保守的方法
-                if len(result.split()) < 5:
-                    conservative_result = original_response
-                    # 只移除最明顯的問題片段
-                    conservative_patterns = [
-                        r'Note\s+that\s+I\s+have.*?manner[,.\s]*',
-                        r'avoiding\s+any\s+assumptions.*?locations[,.\s]*',
-                        r'Additionally,?\s*I\s+have.*?flow[,.\s]*'
-                    ]
-                    for pattern in conservative_patterns:
-                        conservative_result = re.sub(pattern, '', conservative_result, flags=re.IGNORECASE)
-                    conservative_result = re.sub(r'\s+', ' ', conservative_result).strip()
-                    return conservative_result if conservative_result else original_response
-                return result
-            # 如果所有處理後結果為空，返回原始內容
-            return original_response
         except Exception as e:
-            if hasattr(self, 'logger'):
-                self.logger.error(f"Failed to remove explanatory notes: {str(e)}")
             return response
     def get_processor_info(self) -> Dict[str, Any]:

         if not re.search(r'[.!?]', response):
             raise ResponseProcessingError("Response lacks proper sentence structure")
+     def remove_explanatory_notes(self, response: str) -> str:
         """
+        移除解釋性注釋和說明
         Args:
             response: 包含可能注釋的回應
         Returns:
             str: 移除注釋後的回應
         """
         try:
+            # 識別常見的注釋和解釋模式
+            note_patterns = [
                 r'(?:^|\n)Note:.*?(?:\n|$)',
                 r'(?:^|\n)I have (?:followed|adhered to|ensured).*?(?:\n|$)',
                 r'(?:^|\n)This description (?:follows|adheres to|maintains).*?(?:\n|$)',
                 r'(?:^|\n)The enhanced description (?:maintains|preserves).*?(?:\n|$)'
             ]
+            # 尋找段落
+            paragraphs = [p.strip() for p in response.split('\n\n') if p.strip()]
+            # 如果只有一個段落，檢查並清理它
             if len(paragraphs) == 1:
+                for pattern in note_patterns:
                     paragraphs[0] = re.sub(pattern, '', paragraphs[0], flags=re.IGNORECASE)
+                return paragraphs[0].strip()
+            # 如果有多個段落，移除注釋段落
+            content_paragraphs = []
+            for paragraph in paragraphs:
+                is_note = False
+                for pattern in note_patterns:
+                    if re.search(pattern, paragraph, flags=re.IGNORECASE):
                         is_note = True
+                        break
+                # 檢查段落是否以常見的注釋詞開頭
+                if paragraph.lower().startswith(('note:', 'please note:', 'remember:')):
+                    is_note = True
+                if not is_note:
+                    content_paragraphs.append(paragraph)
+            return '\n\n'.join(content_paragraphs).strip()
         except Exception as e:
+            self.logger.error(f"Failed to remove explanatory notes: {str(e)}")
             return response
     def get_processor_info(self) -> Dict[str, Any]: