Spaces:
Running
on
Zero
Running
on
Zero
Update response_processor.py
Browse files- response_processor.py +82 -66
response_processor.py
CHANGED
@@ -1220,7 +1220,7 @@ class ResponseProcessor:
|
|
1220 |
|
1221 |
def remove_explanatory_notes(self, response: str) -> str:
|
1222 |
"""
|
1223 |
-
|
1224 |
|
1225 |
Args:
|
1226 |
response: 包含可能注釋的回應
|
@@ -1229,37 +1229,51 @@ class ResponseProcessor:
|
|
1229 |
str: 移除注釋後的回應
|
1230 |
"""
|
1231 |
try:
|
1232 |
-
|
1233 |
-
|
1234 |
-
# Note that I have...
|
1235 |
-
r'(?:^|\s)Note\s+that\s+I\s+have.*?(?=\s[A-Z]|\.|$)',
|
1236 |
-
|
1237 |
-
# I have strictly adhered...
|
1238 |
-
r'(?:^|\s)I\s+have\s+strictly\s+adhered\s+to.*?(?=\s[A-Z]|\.|$)',
|
1239 |
-
|
1240 |
-
# I have followed/ensured...
|
1241 |
-
r'(?:^|\s)I\s+have\s+(?:followed|ensured|also\s+followed).*?(?=\s[A-Z]|\.|$)',
|
1242 |
|
1243 |
-
|
1244 |
-
|
|
|
|
|
1245 |
|
1246 |
-
#
|
1247 |
-
r'
|
1248 |
|
1249 |
-
#
|
1250 |
-
r'
|
1251 |
|
1252 |
-
#
|
1253 |
-
r'
|
1254 |
|
1255 |
-
#
|
1256 |
-
r'(
|
|
|
|
|
|
|
1257 |
|
1258 |
-
#
|
1259 |
-
r'
|
|
|
|
|
|
|
1260 |
]
|
1261 |
|
1262 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1263 |
traditional_note_patterns = [
|
1264 |
r'(?:^|\n)Note:.*?(?:\n|$)',
|
1265 |
r'(?:^|\n)I have (?:followed|adhered to|ensured).*?(?:\n|$)',
|
@@ -1267,59 +1281,61 @@ class ResponseProcessor:
|
|
1267 |
r'(?:^|\n)The enhanced description (?:maintains|preserves).*?(?:\n|$)'
|
1268 |
]
|
1269 |
|
1270 |
-
#
|
1271 |
-
cleaned_response = response
|
1272 |
-
for pattern in specific_note_patterns:
|
1273 |
-
cleaned_response = re.sub(pattern, '', cleaned_response, flags=re.IGNORECASE)
|
1274 |
-
|
1275 |
-
# 清理多餘的標點符號和空格
|
1276 |
-
cleaned_response = re.sub(r'\s*,\s*,\s*', ', ', cleaned_response)
|
1277 |
-
cleaned_response = re.sub(r'\s*\.\s*\.\s*', '. ', cleaned_response)
|
1278 |
-
cleaned_response = re.sub(r'\s+', ' ', cleaned_response)
|
1279 |
-
|
1280 |
-
# 修復可能出現的句子結尾問題
|
1281 |
-
cleaned_response = re.sub(r'(\w)\s*,\s*$', r'\1.', cleaned_response)
|
1282 |
-
cleaned_response = re.sub(r'(\w)\s*,\s*([A-Z])', r'\1. \2', cleaned_response)
|
1283 |
-
|
1284 |
-
# 尋找段落進行傳統處理
|
1285 |
paragraphs = [p.strip() for p in cleaned_response.split('\n\n') if p.strip()]
|
1286 |
|
1287 |
-
#
|
1288 |
if len(paragraphs) == 1:
|
1289 |
for pattern in traditional_note_patterns:
|
1290 |
paragraphs[0] = re.sub(pattern, '', paragraphs[0], flags=re.IGNORECASE)
|
1291 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1292 |
|
1293 |
-
|
1294 |
-
|
1295 |
-
for paragraph in paragraphs:
|
1296 |
-
is_note = False
|
1297 |
-
|
1298 |
-
# 檢查傳統注釋模式
|
1299 |
-
for pattern in traditional_note_patterns:
|
1300 |
-
if re.search(pattern, paragraph, flags=re.IGNORECASE):
|
1301 |
is_note = True
|
1302 |
-
break
|
1303 |
|
1304 |
-
|
1305 |
-
|
1306 |
-
is_note = True
|
1307 |
|
1308 |
-
|
1309 |
-
content_paragraphs.append(paragraph)
|
1310 |
-
|
1311 |
-
result = '\n\n'.join(content_paragraphs).strip()
|
1312 |
|
1313 |
-
#
|
1314 |
-
if
|
1315 |
-
#
|
1316 |
-
|
1317 |
-
for pattern in specific_note_patterns:
|
1318 |
-
fallback_result = re.sub(pattern, '', fallback_result, flags=re.IGNORECASE)
|
1319 |
-
fallback_result = re.sub(r'\s+', ' ', fallback_result).strip()
|
1320 |
-
return fallback_result if fallback_result else response
|
1321 |
|
1322 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1323 |
|
1324 |
except Exception as e:
|
1325 |
self.logger.error(f"Failed to remove explanatory notes: {str(e)}")
|
|
|
1220 |
|
1221 |
def remove_explanatory_notes(self, response: str) -> str:
|
1222 |
"""
|
1223 |
+
移除解釋性注釋和說明,特別處理破碎的解釋性片段
|
1224 |
|
1225 |
Args:
|
1226 |
response: 包含可能注釋的回應
|
|
|
1229 |
str: 移除注釋後的回應
|
1230 |
"""
|
1231 |
try:
|
1232 |
+
if not response or not response.strip():
|
1233 |
+
return response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1234 |
|
1235 |
+
# 第一步:移除完整的解釋性句子片段
|
1236 |
+
problematic_fragments = [
|
1237 |
+
# 處理 "strictly adhered to..." 相關片段
|
1238 |
+
r'\bstrictly\s+adhered\s+to\s+the\s+(?:critical\s+adherence\s+to\s+input\s+rule|or\s+inferences\s+beyond\s+the\s+explicitly\s+provided\s+information)\.?',
|
1239 |
|
1240 |
+
# 處理 "or inferences beyond..." 片段
|
1241 |
+
r'\bor\s+inferences\s+beyond\s+the\s+explicitly\s+provided\s+information\.?',
|
1242 |
|
1243 |
+
# 處理 "the mentioning only..." 片段
|
1244 |
+
r'\bthe\s+mentioning\s+only\s+the\s+objects\s+and\s+their\s+locations\.?',
|
1245 |
|
1246 |
+
# 處理 "avoided speculating..." 片段
|
1247 |
+
r'\bavoided\s+speculating\s+on\s+object\s+quantities,?\s*spatial\s+relationships,?\s*and\s+atmospheres,?\.?',
|
1248 |
|
1249 |
+
# 處理更一般的解釋性片段
|
1250 |
+
r'\b(?:have\s+)?strictly\s+adhered\s+to.*?(?:information|rule)\.?',
|
1251 |
+
r'\b(?:have\s+)?followed\s+the.*?(?:whitelist|rule)\.?',
|
1252 |
+
r'\b(?:have\s+)?avoided\s+(?:any\s+)?(?:assumptions|speculation).*?\.?',
|
1253 |
+
r'\bmentioning\s+only\s+the\s+objects.*?\.?',
|
1254 |
|
1255 |
+
# 處理孤立的片段詞組
|
1256 |
+
r'\bthe\s+mentioning\s+only\b',
|
1257 |
+
r'\bavoided\s+speculating\b',
|
1258 |
+
r'\bstrictly\s+adhered\s+to\s+the\b',
|
1259 |
+
r'\bor\s+inferences\s+beyond\b',
|
1260 |
]
|
1261 |
|
1262 |
+
cleaned_response = response
|
1263 |
+
for pattern in problematic_fragments:
|
1264 |
+
cleaned_response = re.sub(pattern, '', cleaned_response, flags=re.IGNORECASE)
|
1265 |
+
|
1266 |
+
# 第二步:清理標點符號問題
|
1267 |
+
# 移除多餘的逗號和句號
|
1268 |
+
cleaned_response = re.sub(r'\s*,\s*,+\s*', ', ', cleaned_response)
|
1269 |
+
cleaned_response = re.sub(r'\s*\.+\s*\.+\s*', '. ', cleaned_response)
|
1270 |
+
cleaned_response = re.sub(r'\s*,\s*\.\s*', '. ', cleaned_response)
|
1271 |
+
|
1272 |
+
# 修復句子結尾的孤立標點
|
1273 |
+
cleaned_response = re.sub(r'\s+,\s*$', '.', cleaned_response)
|
1274 |
+
cleaned_response = re.sub(r'\s+,\s*(?=\s+[A-Z])', '. ', cleaned_response)
|
1275 |
+
|
1276 |
+
# 第三步:傳統的段落級處理
|
1277 |
traditional_note_patterns = [
|
1278 |
r'(?:^|\n)Note:.*?(?:\n|$)',
|
1279 |
r'(?:^|\n)I have (?:followed|adhered to|ensured).*?(?:\n|$)',
|
|
|
1281 |
r'(?:^|\n)The enhanced description (?:maintains|preserves).*?(?:\n|$)'
|
1282 |
]
|
1283 |
|
1284 |
+
# 尋找段落
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1285 |
paragraphs = [p.strip() for p in cleaned_response.split('\n\n') if p.strip()]
|
1286 |
|
1287 |
+
# 如果只有一個段落,檢查並清理它
|
1288 |
if len(paragraphs) == 1:
|
1289 |
for pattern in traditional_note_patterns:
|
1290 |
paragraphs[0] = re.sub(pattern, '', paragraphs[0], flags=re.IGNORECASE)
|
1291 |
+
result = paragraphs[0].strip()
|
1292 |
+
else:
|
1293 |
+
# 如果有多個段落,移除注釋段落
|
1294 |
+
content_paragraphs = []
|
1295 |
+
for paragraph in paragraphs:
|
1296 |
+
is_note = False
|
1297 |
+
for pattern in traditional_note_patterns:
|
1298 |
+
if re.search(pattern, paragraph, flags=re.IGNORECASE):
|
1299 |
+
is_note = True
|
1300 |
+
break
|
1301 |
|
1302 |
+
# 檢查段落是否以常見的注釋詞開頭
|
1303 |
+
if paragraph.lower().startswith(('note:', 'please note:', 'remember:')):
|
|
|
|
|
|
|
|
|
|
|
|
|
1304 |
is_note = True
|
|
|
1305 |
|
1306 |
+
if not is_note:
|
1307 |
+
content_paragraphs.append(paragraph)
|
|
|
1308 |
|
1309 |
+
result = '\n\n'.join(content_paragraphs).strip()
|
|
|
|
|
|
|
1310 |
|
1311 |
+
# 第四步:最終清理和格式化
|
1312 |
+
if result:
|
1313 |
+
# 標準化空格
|
1314 |
+
result = re.sub(r'\s+', ' ', result)
|
|
|
|
|
|
|
|
|
1315 |
|
1316 |
+
# 修復句子間的間距
|
1317 |
+
result = re.sub(r'([.!?])\s*([A-Z])', r'\1 \2', result)
|
1318 |
+
|
1319 |
+
# 確保句子以適當的標點結尾
|
1320 |
+
result = result.strip()
|
1321 |
+
if result and not result.endswith(('.', '!', '?')):
|
1322 |
+
result += '.'
|
1323 |
+
|
1324 |
+
return result
|
1325 |
+
|
1326 |
+
# 如果結果為空,嘗試更保守的清理
|
1327 |
+
fallback_result = response
|
1328 |
+
conservative_patterns = [
|
1329 |
+
r'\bstrictly\s+adhered\s+to.*?information\.?',
|
1330 |
+
r'\bavoided\s+speculating.*?atmospheres,?\.?',
|
1331 |
+
r'\bthe\s+mentioning\s+only.*?locations\.?'
|
1332 |
+
]
|
1333 |
+
|
1334 |
+
for pattern in conservative_patterns:
|
1335 |
+
fallback_result = re.sub(pattern, '', fallback_result, flags=re.IGNORECASE)
|
1336 |
+
|
1337 |
+
fallback_result = re.sub(r'\s+', ' ', fallback_result).strip()
|
1338 |
+
return fallback_result if fallback_result else response
|
1339 |
|
1340 |
except Exception as e:
|
1341 |
self.logger.error(f"Failed to remove explanatory notes: {str(e)}")
|