Spaces:

DawnC
/

VisionScout

Running on Zero

App Files Files Community

DawnC commited on 27 days ago

Commit

8868978

verified ·

1 Parent(s): 4453070

Update response_processor.py

Browse files

Files changed (1) hide show

response_processor.py +82 -66

response_processor.py CHANGED Viewed

@@ -1220,7 +1220,7 @@ class ResponseProcessor:
     def remove_explanatory_notes(self, response: str) -> str:
         """
-        移除解釋性注釋和說明，特別針對 "Note that I..."
         Args:
             response: 包含可能注釋的回應
@@ -1229,37 +1229,51 @@ class ResponseProcessor:
             str: 移除注釋後的回應
         """
         try:
-            # 專門針對 "Note that I..." 和相關解釋性敘述
-            specific_note_patterns = [
-                # Note that I have...
-                r'(?:^|\s)Note\s+that\s+I\s+have.*?(?=\s[A-Z]|\.|$)',
-                # I have strictly adhered...
-                r'(?:^|\s)I\s+have\s+strictly\s+adhered\s+to.*?(?=\s[A-Z]|\.|$)',
-                # I have followed/ensured...
-                r'(?:^|\s)I\s+have\s+(?:followed|ensured|also\s+followed).*?(?=\s[A-Z]|\.|$)',
-                # Additionally, I have...
-                r'(?:^|\s)Additionally,?\s*I\s+have.*?(?=\s[A-Z]|\.|$)',
-                # avoiding any assumptions...
-                r'(?:^|\s)avoiding\s+any\s+(?:assumptions|inferences).*?(?=\s[A-Z]|\.|$)',
-                # object whitelist and detail accuracy rule
-                r'(?:^|\s)(?:object\s+whitelist\s+and\s+detail\s+accuracy\s+rule|detail\s+accuracy\s+rule).*?(?=\s[A-Z]|\.|$)',
-                # using transitional phrases
-                r'(?:^|\s)using\s+transitional\s+phrases.*?(?=\s[A-Z]|\.|$)',
-                # create a natural flow
-                r'(?:^|\s)(?:and\s+have\s+focused\s+on|focused\s+on)\s+describing.*?natural\s+flow.*?(?=\s[A-Z]|\.|$)',
-                # critical adherence to input rule
-                r'(?:^|\s)critical\s+adherence\s+to\s+input\s+rule.*?(?=\s[A-Z]|\.|$)'
             ]
-            # 傳統的注釋和解釋模式
             traditional_note_patterns = [
                 r'(?:^|\n)Note:.*?(?:\n|$)',
                 r'(?:^|\n)I have (?:followed|adhered to|ensured).*?(?:\n|$)',
@@ -1267,59 +1281,61 @@ class ResponseProcessor:
                 r'(?:^|\n)The enhanced description (?:maintains|preserves).*?(?:\n|$)'
             ]
-            # 首先移除特定的 "Note that I..."
-            cleaned_response = response
-            for pattern in specific_note_patterns:
-                cleaned_response = re.sub(pattern, '', cleaned_response, flags=re.IGNORECASE)
-            # 清理多餘的標點符號和空格
-            cleaned_response = re.sub(r'\s*,\s*,\s*', ', ', cleaned_response)
-            cleaned_response = re.sub(r'\s*\.\s*\.\s*', '. ', cleaned_response)
-            cleaned_response = re.sub(r'\s+', ' ', cleaned_response)
-            # 修復可能出現的句子結尾問題
-            cleaned_response = re.sub(r'(\w)\s*,\s*$', r'\1.', cleaned_response)
-            cleaned_response = re.sub(r'(\w)\s*,\s*([A-Z])', r'\1. \2', cleaned_response)
-            # 尋找段落進行傳統處理
             paragraphs = [p.strip() for p in cleaned_response.split('\n\n') if p.strip()]
-            # 如果只有一個段落，檢查並清理傳統注釋模式
             if len(paragraphs) == 1:
                 for pattern in traditional_note_patterns:
                     paragraphs[0] = re.sub(pattern, '', paragraphs[0], flags=re.IGNORECASE)
-                return paragraphs[0].strip()
-            # 如果有多個段落，移除傳統注釋段落
-            content_paragraphs = []
-            for paragraph in paragraphs:
-                is_note = False
-                # 檢查傳統注釋模式
-                for pattern in traditional_note_patterns:
-                    if re.search(pattern, paragraph, flags=re.IGNORECASE):
                         is_note = True
-                        break
-                # 檢查段落是否以常見的注釋詞開頭
-                if paragraph.lower().startswith(('note:', 'please note:', 'remember:')):
-                    is_note = True
-                if not is_note:
-                    content_paragraphs.append(paragraph)
-            result = '\n\n'.join(content_paragraphs).strip()
-            # 最終檢查：確保結果不為空
-            if not result or len(result.strip()) < 10:
-                # 如果處理後內容過短，返回去除特定模式後的原始內容
-                fallback_result = response
-                for pattern in specific_note_patterns:
-                    fallback_result = re.sub(pattern, '', fallback_result, flags=re.IGNORECASE)
-                fallback_result = re.sub(r'\s+', ' ', fallback_result).strip()
-                return fallback_result if fallback_result else response
-            return result
         except Exception as e:
             self.logger.error(f"Failed to remove explanatory notes: {str(e)}")

     def remove_explanatory_notes(self, response: str) -> str:
         """
+        移除解釋性注釋和說明，特別處理破碎的解釋性片段
         Args:
             response: 包含可能注釋的回應
             str: 移除注釋後的回應
         """
         try:
+            if not response or not response.strip():
+                return response
+            # 第一步：移除完整的解釋性句子片段
+            problematic_fragments = [
+                # 處理 "strictly adhered to..." 相關片段
+                r'\bstrictly\s+adhered\s+to\s+the\s+(?:critical\s+adherence\s+to\s+input\s+rule|or\s+inferences\s+beyond\s+the\s+explicitly\s+provided\s+information)\.?',
+                # 處理 "or inferences beyond..." 片段
+                r'\bor\s+inferences\s+beyond\s+the\s+explicitly\s+provided\s+information\.?',
+                # 處理 "the mentioning only..." 片段
+                r'\bthe\s+mentioning\s+only\s+the\s+objects\s+and\s+their\s+locations\.?',
+                # 處理 "avoided speculating..." 片段
+                r'\bavoided\s+speculating\s+on\s+object\s+quantities,?\s*spatial\s+relationships,?\s*and\s+atmospheres,?\.?',
+                # 處理更一般的解釋性片段
+                r'\b(?:have\s+)?strictly\s+adhered\s+to.*?(?:information|rule)\.?',
+                r'\b(?:have\s+)?followed\s+the.*?(?:whitelist|rule)\.?',
+                r'\b(?:have\s+)?avoided\s+(?:any\s+)?(?:assumptions|speculation).*?\.?',
+                r'\bmentioning\s+only\s+the\s+objects.*?\.?',
+                # 處理孤立的片段詞組
+                r'\bthe\s+mentioning\s+only\b',
+                r'\bavoided\s+speculating\b',
+                r'\bstrictly\s+adhered\s+to\s+the\b',
+                r'\bor\s+inferences\s+beyond\b',
             ]
+            cleaned_response = response
+            for pattern in problematic_fragments:
+                cleaned_response = re.sub(pattern, '', cleaned_response, flags=re.IGNORECASE)
+            # 第二步：清理標點符號問題
+            # 移除多餘的逗號和句號
+            cleaned_response = re.sub(r'\s*,\s*,+\s*', ', ', cleaned_response)
+            cleaned_response = re.sub(r'\s*\.+\s*\.+\s*', '. ', cleaned_response)
+            cleaned_response = re.sub(r'\s*,\s*\.\s*', '. ', cleaned_response)
+            # 修復句子結尾的孤立標點
+            cleaned_response = re.sub(r'\s+,\s*$', '.', cleaned_response)
+            cleaned_response = re.sub(r'\s+,\s*(?=\s+[A-Z])', '. ', cleaned_response)
+            # 第三步：傳統的段落級處理
             traditional_note_patterns = [
                 r'(?:^|\n)Note:.*?(?:\n|$)',
                 r'(?:^|\n)I have (?:followed|adhered to|ensured).*?(?:\n|$)',
                 r'(?:^|\n)The enhanced description (?:maintains|preserves).*?(?:\n|$)'
             ]
+            # 尋找段落
             paragraphs = [p.strip() for p in cleaned_response.split('\n\n') if p.strip()]
+            # 如果只有一個段落，檢查並清理它
             if len(paragraphs) == 1:
                 for pattern in traditional_note_patterns:
                     paragraphs[0] = re.sub(pattern, '', paragraphs[0], flags=re.IGNORECASE)
+                result = paragraphs[0].strip()
+            else:
+                # 如果有多個段落，移除注釋段落
+                content_paragraphs = []
+                for paragraph in paragraphs:
+                    is_note = False
+                    for pattern in traditional_note_patterns:
+                        if re.search(pattern, paragraph, flags=re.IGNORECASE):
+                            is_note = True
+                            break
+                    # 檢查段落是否以常見的注釋詞開頭
+                    if paragraph.lower().startswith(('note:', 'please note:', 'remember:')):
                         is_note = True
+                    if not is_note:
+                        content_paragraphs.append(paragraph)
+                result = '\n\n'.join(content_paragraphs).strip()
+            # 第四步：最終清理和格式化
+            if result:
+                # 標準化空格
+                result = re.sub(r'\s+', ' ', result)
+                # 修復句子間的間距
+                result = re.sub(r'([.!?])\s*([A-Z])', r'\1 \2', result)
+                # 確保句子以適當的標點結尾
+                result = result.strip()
+                if result and not result.endswith(('.', '!', '?')):
+                    result += '.'
+                return result
+            # 如果結果為空，嘗試更保守的清理
+            fallback_result = response
+            conservative_patterns = [
+                r'\bstrictly\s+adhered\s+to.*?information\.?',
+                r'\bavoided\s+speculating.*?atmospheres,?\.?',
+                r'\bthe\s+mentioning\s+only.*?locations\.?'
+            ]
+            for pattern in conservative_patterns:
+                fallback_result = re.sub(pattern, '', fallback_result, flags=re.IGNORECASE)
+            fallback_result = re.sub(r'\s+', ' ', fallback_result).strip()
+            return fallback_result if fallback_result else response
         except Exception as e:
             self.logger.error(f"Failed to remove explanatory notes: {str(e)}")