Spaces:

DawnC
/

VisionScout

Running on Zero

App Files Files Community

DawnC commited on 27 days ago

Commit

1c7033a

verified ·

1 Parent(s): f7ca18f

Update response_processor.py

Browse files

Files changed (1) hide show

response_processor.py +81 -69

response_processor.py CHANGED Viewed

@@ -1220,7 +1220,7 @@ class ResponseProcessor:
     def remove_explanatory_notes(self, response: str) -> str:
         """
-        移除解釋性注釋和說明，特別處理破碎的解釋性片段
         Args:
             response: 包含可能注釋的回應
@@ -1232,66 +1232,80 @@ class ResponseProcessor:
             if not response or not response.strip():
                 return response
-            # 第一步：移除完整的解釋性句子片段
-            problematic_fragments = [
-                # 處理完整的 "Note that I have..." 破碎句型
-                r'Note\s+that\s+I\s+have\s*,?\s*avoiding\s+any\s+assumptions.*?natural\s+flow\.?',
-                # 處理 "avoiding any assumptions I have also" 片段
-                r'\bavoiding\s+any\s+assumptions\s+I\s+have\s+also\s+and\s+detail\s+accuracy\s+rule.*?\.?',
-                # 處理 "and their locations. Additionally, I have" 片段
-                r'\band\s+their\s+locations\.\s*Additionally,?\s*I\s+have\s+and\s+have\s+focused.*?\.?',
-                # 處理 "using transitional phrases..." 片段
-                r'\busing\s+transitional\s+phrases\s+and\s+varying\s+sentence\s+structures\s+to\s+create\s+a\s+natural\s+flow\.?',
-                # 處理 "strictly adhered to..." 相關片段
-                r'\bstrictly\s+adhered\s+to\s+the\s+(?:critical\s+adherence\s+to\s+input\s+rule|or\s+inferences\s+beyond\s+the\s+explicitly\s+provided\s+information)\.?',
-                # 處理 "or inferences beyond..." 片段
-                r'\bor\s+inferences\s+beyond\s+the\s+explicitly\s+provided\s+information\.?',
-                # 處理 "the mentioning only..." 片段
-                r'\bthe\s+mentioning\s+only\s+the\s+objects\s+and\s+their\s+locations\.?',
-                # 處理 "avoided speculating..." 片段
-                r'\bavoided\s+speculating\s+on\s+object\s+quantities,?\s*spatial\s+relationships,?\s*and\s+atmospheres,?\.?',
-                # 處理 "and detail accuracy rule" 片段
-                r'\band\s+detail\s+accuracy\s+rule,?\s*and\s+their\s+locations\.?',
-                # 處理更一般的解釋性片段
-                r'\b(?:have\s+)?strictly\s+adhered\s+to.*?(?:information|rule)\.?',
-                r'\b(?:have\s+)?followed\s+the.*?(?:whitelist|rule)\.?',
-                r'\b(?:have\s+)?avoided\s+(?:any\s+)?(?:assumptions|speculation).*?\.?',
-                r'\bmentioning\s+only\s+the\s+objects.*?\.?',
-                # 處理孤立的片段詞組
-                r'\bthe\s+mentioning\s+only\b',
-                r'\bavoided\s+speculating\b',
-                r'\bstrictly\s+adhered\s+to\s+the\b',
-                r'\bor\s+inferences\s+beyond\b',
-                r'\band\s+detail\s+accuracy\s+rule\b',
-                r'\bAdditionally,?\s*I\s+have\s+and\s+have\s+focused\b',
-                r'\bclear\s+and\s+concise\s+manner,?\s*using\s+transitional\s+phrases\b',
             ]
-            cleaned_response = response
-            for pattern in problematic_fragments:
-                cleaned_response = re.sub(pattern, '', cleaned_response, flags=re.IGNORECASE)
-            # 第二步：清理標點符號問題
             # 移除多餘的逗號和句號
-            cleaned_response = re.sub(r'\s*,\s*,+\s*', ', ', cleaned_response)
-            cleaned_response = re.sub(r'\s*\.+\s*\.+\s*', '. ', cleaned_response)
-            cleaned_response = re.sub(r'\s*,\s*\.\s*', '. ', cleaned_response)
-            # 修復句子結尾的孤立標點
-            cleaned_response = re.sub(r'\s+,\s*$', '.', cleaned_response)
-            cleaned_response = re.sub(r'\s+,\s*(?=\s+[A-Z])', '. ', cleaned_response)
-            # 第三步：傳統的段落級處理
             traditional_note_patterns = [
                 r'(?:^|\n)Note:.*?(?:\n|$)',
                 r'(?:^|\n)I have (?:followed|adhered to|ensured).*?(?:\n|$)',
@@ -1299,16 +1313,13 @@ class ResponseProcessor:
                 r'(?:^|\n)The enhanced description (?:maintains|preserves).*?(?:\n|$)'
             ]
-            # 尋找段落
-            paragraphs = [p.strip() for p in cleaned_response.split('\n\n') if p.strip()]
-            # 如果只有一個段落，檢查並清理它
             if len(paragraphs) == 1:
                 for pattern in traditional_note_patterns:
                     paragraphs[0] = re.sub(pattern, '', paragraphs[0], flags=re.IGNORECASE)
                 result = paragraphs[0].strip()
             else:
-                # 如果有多個段落，移除注釋段落
                 content_paragraphs = []
                 for paragraph in paragraphs:
                     is_note = False
@@ -1317,7 +1328,6 @@ class ResponseProcessor:
                             is_note = True
                             break
-                    # 檢查段落是否以常見的注釋詞開頭
                     if paragraph.lower().startswith(('note:', 'please note:', 'remember:')):
                         is_note = True
@@ -1326,37 +1336,39 @@ class ResponseProcessor:
                 result = '\n\n'.join(content_paragraphs).strip()
-            # 第四步：最終清理和格式化
             if result:
                 # 標準化空格
                 result = re.sub(r'\s+', ' ', result)
-                # 修復句子間的間距
-                result = re.sub(r'([.!?])\s*([A-Z])', r'\1 \2', result)
                 # 確保句子以適當的標點結尾
                 result = result.strip()
                 if result and not result.endswith(('.', '!', '?')):
                     result += '.'
                 return result
-            # 如果結果為空，嘗試更保守的清理
-            fallback_result = response
-            conservative_patterns = [
-                r'\bstrictly\s+adhered\s+to.*?information\.?',
-                r'\bavoided\s+speculating.*?atmospheres,?\.?',
-                r'\bthe\s+mentioning\s+only.*?locations\.?'
-            ]
-            for pattern in conservative_patterns:
-                fallback_result = re.sub(pattern, '', fallback_result, flags=re.IGNORECASE)
-            fallback_result = re.sub(r'\s+', ' ', fallback_result).strip()
-            return fallback_result if fallback_result else response
         except Exception as e:
-            self.logger.error(f"Failed to remove explanatory notes: {str(e)}")
             return response
     def get_processor_info(self) -> Dict[str, Any]:

     def remove_explanatory_notes(self, response: str) -> str:
         """
+        移除解釋性注釋和說明，採用多階段清理策略處理破碎片段
         Args:
             response: 包含可能注釋的回應
             if not response or not response.strip():
                 return response
+            original_response = response
+            # 階段1：移除明確的完整問題句型
+            complete_problem_patterns = [
+                # 完整的破碎句型（貪婪匹配）
+                r'Note\s+that\s+I\s+have\s*[,.\s]*.*?(?:natural\s+flow|concise\s+manner)[,.\s]*',
+                # 從 Note that 開始到句號結束的整個片段
+                r'Note\s+that\s+I\s+have\s*[,.\s]*.*?\.',
+                # 處理包含 avoiding assumptions 的整個片段
+                r'[,.\s]*avoiding\s+any\s+assumptions.*?(?:manner|flow|locations)[,.\s]*',
+            ]
+            cleaned_text = response
+            for pattern in complete_problem_patterns:
+                cleaned_text = re.sub(pattern, '', cleaned_text, flags=re.IGNORECASE | re.DOTALL)
+            # 階段2：移除具體的問題關鍵詞組合
+            specific_fragments = [
+                # 移除 "I have also" 相關片段
+                r'\bI\s+have\s+also\s*[,.\s]*(?:and\s+detail\s+accuracy\s+rule\s*[,.\s]*)?',
+                # 移除 "and their locations" 孤立片段
+                r'[,.\s]*and\s+their\s+locations[,.\s]*',
+                # 移除 "on describing in a clear" 片段
+                r'[,.\s]*on\s+describing\s+in\s+a\s+clear(?:\s+and\s+concise)?(?:\s+manner)?[,.\s]*',
+                # 移除 "detail accuracy rule" 相關
+                r'[,.\s]*(?:and\s+)?detail\s+accuracy\s+rule[,.\s]*',
+                # 移除孤立的 "avoiding any assumptions"
+                r'[,.\s]*avoiding\s+any\s+assumptions[,.\s]*',
+                # 移除 "Additionally, I have" 開頭的破碎片段
+                r'Additionally,?\s*I\s+have\s*[,.\s]*(?:and\s+have\s+focused\s*[,.\s]*)?',
+                # 移除 "using transitional phrases" 相關
+                r'[,.\s]*using\s+transitional\s+phrases(?:\s+and\s+varying\s+sentence\s+structures)?[,.\s]*',
+                # 移除 "to create a natural flow"
+                r'[,.\s]*to\s+create\s+a\s+natural\s+flow[,.\s]*',
+            ]
+            for pattern in specific_fragments:
+                cleaned_text = re.sub(pattern, '', cleaned_text, flags=re.IGNORECASE)
+            # 階段3：移除任何以問題關鍵詞開頭的殘留片段
+            problem_starters = [
+                r'^[,.\s]*Note\s+that.*?[,.\s]*',
+                r'^[,.\s]*I\s+have\s+(?:strictly\s+)?(?:adhered|followed|ensured).*?[,.\s]*',
+                r'^[,.\s]*avoiding\s+any.*?[,.\s]*',
+                r'^[,.\s]*Additionally.*?[,.\s]*',
             ]
+            for pattern in problem_starters:
+                cleaned_text = re.sub(pattern, '', cleaned_text, flags=re.IGNORECASE | re.MULTILINE)
+            # 階段4：清理標點符號和格式問題
             # 移除多餘的逗號和句號
+            cleaned_text = re.sub(r'\s*,\s*,+\s*', ', ', cleaned_text)
+            cleaned_text = re.sub(r'\s*\.+\s*\.+\s*', '. ', cleaned_text)
+            cleaned_text = re.sub(r'\s*,\s*\.\s*', '. ', cleaned_text)
+            # 移除開頭和結尾的標點符號
+            cleaned_text = re.sub(r'^[,.\s]+', '', cleaned_text)
+            cleaned_text = re.sub(r'[,.\s]+$', '', cleaned_text)
+            # 修復句子間的標點問題
+            cleaned_text = re.sub(r'([.!?])\s*,\s*([A-Z])', r'\1 \2', cleaned_text)
+            cleaned_text = re.sub(r',\s*([A-Z])', r'. \1', cleaned_text)
+            # 階段5：傳統段落級處理（保持原有邏輯）
             traditional_note_patterns = [
                 r'(?:^|\n)Note:.*?(?:\n|$)',
                 r'(?:^|\n)I have (?:followed|adhered to|ensured).*?(?:\n|$)',
                 r'(?:^|\n)The enhanced description (?:maintains|preserves).*?(?:\n|$)'
             ]
+            paragraphs = [p.strip() for p in cleaned_text.split('\n\n') if p.strip()]
             if len(paragraphs) == 1:
                 for pattern in traditional_note_patterns:
                     paragraphs[0] = re.sub(pattern, '', paragraphs[0], flags=re.IGNORECASE)
                 result = paragraphs[0].strip()
             else:
                 content_paragraphs = []
                 for paragraph in paragraphs:
                     is_note = False
                             is_note = True
                             break
                     if paragraph.lower().startswith(('note:', 'please note:', 'remember:')):
                         is_note = True
                 result = '\n\n'.join(content_paragraphs).strip()
+            # 階段6：最終驗證和格式化
             if result:
                 # 標準化空格
                 result = re.sub(r'\s+', ' ', result)
                 # 確保句子以適當的標點結尾
                 result = result.strip()
                 if result and not result.endswith(('.', '!', '?')):
                     result += '.'
+                # 最終檢查：如果結果太短，使用更保守的方法
+                if len(result.split()) < 5:
+                    conservative_result = original_response
+                    # 只移除最明顯的問題片段
+                    conservative_patterns = [
+                        r'Note\s+that\s+I\s+have.*?manner[,.\s]*',
+                        r'avoiding\s+any\s+assumptions.*?locations[,.\s]*',
+                        r'Additionally,?\s*I\s+have.*?flow[,.\s]*'
+                    ]
+                    for pattern in conservative_patterns:
+                        conservative_result = re.sub(pattern, '', conservative_result, flags=re.IGNORECASE)
+                    conservative_result = re.sub(r'\s+', ' ', conservative_result).strip()
+                    return conservative_result if conservative_result else original_response
                 return result
+            # 如果所有處理後結果為空，返回原始內容
+            return original_response
         except Exception as e:
+            if hasattr(self, 'logger'):
+                self.logger.error(f"Failed to remove explanatory notes: {str(e)}")
             return response
     def get_processor_info(self) -> Dict[str, Any]: