DawnC commited on
Commit
8868978
·
verified ·
1 Parent(s): 4453070

Update response_processor.py

Browse files
Files changed (1) hide show
  1. response_processor.py +82 -66
response_processor.py CHANGED
@@ -1220,7 +1220,7 @@ class ResponseProcessor:
1220
 
1221
  def remove_explanatory_notes(self, response: str) -> str:
1222
  """
1223
- 移除解釋性注釋和說明,特別針對 "Note that I..."
1224
 
1225
  Args:
1226
  response: 包含可能注釋的回應
@@ -1229,37 +1229,51 @@ class ResponseProcessor:
1229
  str: 移除注釋後的回應
1230
  """
1231
  try:
1232
- # 專門針對 "Note that I..." 和相關解釋性敘述
1233
- specific_note_patterns = [
1234
- # Note that I have...
1235
- r'(?:^|\s)Note\s+that\s+I\s+have.*?(?=\s[A-Z]|\.|$)',
1236
-
1237
- # I have strictly adhered...
1238
- r'(?:^|\s)I\s+have\s+strictly\s+adhered\s+to.*?(?=\s[A-Z]|\.|$)',
1239
-
1240
- # I have followed/ensured...
1241
- r'(?:^|\s)I\s+have\s+(?:followed|ensured|also\s+followed).*?(?=\s[A-Z]|\.|$)',
1242
 
1243
- # Additionally, I have...
1244
- r'(?:^|\s)Additionally,?\s*I\s+have.*?(?=\s[A-Z]|\.|$)',
 
 
1245
 
1246
- # avoiding any assumptions...
1247
- r'(?:^|\s)avoiding\s+any\s+(?:assumptions|inferences).*?(?=\s[A-Z]|\.|$)',
1248
 
1249
- # object whitelist and detail accuracy rule
1250
- r'(?:^|\s)(?:object\s+whitelist\s+and\s+detail\s+accuracy\s+rule|detail\s+accuracy\s+rule).*?(?=\s[A-Z]|\.|$)',
1251
 
1252
- # using transitional phrases
1253
- r'(?:^|\s)using\s+transitional\s+phrases.*?(?=\s[A-Z]|\.|$)',
1254
 
1255
- # create a natural flow
1256
- r'(?:^|\s)(?:and\s+have\s+focused\s+on|focused\s+on)\s+describing.*?natural\s+flow.*?(?=\s[A-Z]|\.|$)',
 
 
 
1257
 
1258
- # critical adherence to input rule
1259
- r'(?:^|\s)critical\s+adherence\s+to\s+input\s+rule.*?(?=\s[A-Z]|\.|$)'
 
 
 
1260
  ]
1261
 
1262
- # 傳統的注釋和解釋模式
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1263
  traditional_note_patterns = [
1264
  r'(?:^|\n)Note:.*?(?:\n|$)',
1265
  r'(?:^|\n)I have (?:followed|adhered to|ensured).*?(?:\n|$)',
@@ -1267,59 +1281,61 @@ class ResponseProcessor:
1267
  r'(?:^|\n)The enhanced description (?:maintains|preserves).*?(?:\n|$)'
1268
  ]
1269
 
1270
- # 首先移除特定的 "Note that I..."
1271
- cleaned_response = response
1272
- for pattern in specific_note_patterns:
1273
- cleaned_response = re.sub(pattern, '', cleaned_response, flags=re.IGNORECASE)
1274
-
1275
- # 清理多餘的標點符號和空格
1276
- cleaned_response = re.sub(r'\s*,\s*,\s*', ', ', cleaned_response)
1277
- cleaned_response = re.sub(r'\s*\.\s*\.\s*', '. ', cleaned_response)
1278
- cleaned_response = re.sub(r'\s+', ' ', cleaned_response)
1279
-
1280
- # 修復可能出現的句子結尾問題
1281
- cleaned_response = re.sub(r'(\w)\s*,\s*$', r'\1.', cleaned_response)
1282
- cleaned_response = re.sub(r'(\w)\s*,\s*([A-Z])', r'\1. \2', cleaned_response)
1283
-
1284
- # 尋找段落進行傳統處理
1285
  paragraphs = [p.strip() for p in cleaned_response.split('\n\n') if p.strip()]
1286
 
1287
- # 如果只有一個段落,檢查並清理傳統注釋模式
1288
  if len(paragraphs) == 1:
1289
  for pattern in traditional_note_patterns:
1290
  paragraphs[0] = re.sub(pattern, '', paragraphs[0], flags=re.IGNORECASE)
1291
- return paragraphs[0].strip()
 
 
 
 
 
 
 
 
 
1292
 
1293
- # 如果有多個段落,移除傳統注釋段落
1294
- content_paragraphs = []
1295
- for paragraph in paragraphs:
1296
- is_note = False
1297
-
1298
- # 檢查傳統注釋模式
1299
- for pattern in traditional_note_patterns:
1300
- if re.search(pattern, paragraph, flags=re.IGNORECASE):
1301
  is_note = True
1302
- break
1303
 
1304
- # 檢查段落是否以常見的注釋詞開頭
1305
- if paragraph.lower().startswith(('note:', 'please note:', 'remember:')):
1306
- is_note = True
1307
 
1308
- if not is_note:
1309
- content_paragraphs.append(paragraph)
1310
-
1311
- result = '\n\n'.join(content_paragraphs).strip()
1312
 
1313
- # 最終檢查:確保結果不為空
1314
- if not result or len(result.strip()) < 10:
1315
- # 如果處理後內容過短,返回去除特定模式後的原始內容
1316
- fallback_result = response
1317
- for pattern in specific_note_patterns:
1318
- fallback_result = re.sub(pattern, '', fallback_result, flags=re.IGNORECASE)
1319
- fallback_result = re.sub(r'\s+', ' ', fallback_result).strip()
1320
- return fallback_result if fallback_result else response
1321
 
1322
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1323
 
1324
  except Exception as e:
1325
  self.logger.error(f"Failed to remove explanatory notes: {str(e)}")
 
1220
 
1221
  def remove_explanatory_notes(self, response: str) -> str:
1222
  """
1223
+ 移除解釋性注釋和說明,特別處理破碎的解釋性片段
1224
 
1225
  Args:
1226
  response: 包含可能注釋的回應
 
1229
  str: 移除注釋後的回應
1230
  """
1231
  try:
1232
+ if not response or not response.strip():
1233
+ return response
 
 
 
 
 
 
 
 
1234
 
1235
+ # 第一步:移除完整的解釋性句子片段
1236
+ problematic_fragments = [
1237
+ # 處理 "strictly adhered to..." 相關片段
1238
+ r'\bstrictly\s+adhered\s+to\s+the\s+(?:critical\s+adherence\s+to\s+input\s+rule|or\s+inferences\s+beyond\s+the\s+explicitly\s+provided\s+information)\.?',
1239
 
1240
+ # 處理 "or inferences beyond..." 片段
1241
+ r'\bor\s+inferences\s+beyond\s+the\s+explicitly\s+provided\s+information\.?',
1242
 
1243
+ # 處理 "the mentioning only..." 片段
1244
+ r'\bthe\s+mentioning\s+only\s+the\s+objects\s+and\s+their\s+locations\.?',
1245
 
1246
+ # 處理 "avoided speculating..." 片段
1247
+ r'\bavoided\s+speculating\s+on\s+object\s+quantities,?\s*spatial\s+relationships,?\s*and\s+atmospheres,?\.?',
1248
 
1249
+ # 處理更一般的解釋性片段
1250
+ r'\b(?:have\s+)?strictly\s+adhered\s+to.*?(?:information|rule)\.?',
1251
+ r'\b(?:have\s+)?followed\s+the.*?(?:whitelist|rule)\.?',
1252
+ r'\b(?:have\s+)?avoided\s+(?:any\s+)?(?:assumptions|speculation).*?\.?',
1253
+ r'\bmentioning\s+only\s+the\s+objects.*?\.?',
1254
 
1255
+ # 處理孤立的片段詞組
1256
+ r'\bthe\s+mentioning\s+only\b',
1257
+ r'\bavoided\s+speculating\b',
1258
+ r'\bstrictly\s+adhered\s+to\s+the\b',
1259
+ r'\bor\s+inferences\s+beyond\b',
1260
  ]
1261
 
1262
+ cleaned_response = response
1263
+ for pattern in problematic_fragments:
1264
+ cleaned_response = re.sub(pattern, '', cleaned_response, flags=re.IGNORECASE)
1265
+
1266
+ # 第二步:清理標點符號問題
1267
+ # 移除多餘的逗號和句號
1268
+ cleaned_response = re.sub(r'\s*,\s*,+\s*', ', ', cleaned_response)
1269
+ cleaned_response = re.sub(r'\s*\.+\s*\.+\s*', '. ', cleaned_response)
1270
+ cleaned_response = re.sub(r'\s*,\s*\.\s*', '. ', cleaned_response)
1271
+
1272
+ # 修復句子結尾的孤立標點
1273
+ cleaned_response = re.sub(r'\s+,\s*$', '.', cleaned_response)
1274
+ cleaned_response = re.sub(r'\s+,\s*(?=\s+[A-Z])', '. ', cleaned_response)
1275
+
1276
+ # 第三步:傳統的段落級處理
1277
  traditional_note_patterns = [
1278
  r'(?:^|\n)Note:.*?(?:\n|$)',
1279
  r'(?:^|\n)I have (?:followed|adhered to|ensured).*?(?:\n|$)',
 
1281
  r'(?:^|\n)The enhanced description (?:maintains|preserves).*?(?:\n|$)'
1282
  ]
1283
 
1284
+ # 尋找段落
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1285
  paragraphs = [p.strip() for p in cleaned_response.split('\n\n') if p.strip()]
1286
 
1287
+ # 如果只有一個段落,檢查並清理它
1288
  if len(paragraphs) == 1:
1289
  for pattern in traditional_note_patterns:
1290
  paragraphs[0] = re.sub(pattern, '', paragraphs[0], flags=re.IGNORECASE)
1291
+ result = paragraphs[0].strip()
1292
+ else:
1293
+ # 如果有多個段落,移除注釋段落
1294
+ content_paragraphs = []
1295
+ for paragraph in paragraphs:
1296
+ is_note = False
1297
+ for pattern in traditional_note_patterns:
1298
+ if re.search(pattern, paragraph, flags=re.IGNORECASE):
1299
+ is_note = True
1300
+ break
1301
 
1302
+ # 檢查段落是否以常見的注釋詞開頭
1303
+ if paragraph.lower().startswith(('note:', 'please note:', 'remember:')):
 
 
 
 
 
 
1304
  is_note = True
 
1305
 
1306
+ if not is_note:
1307
+ content_paragraphs.append(paragraph)
 
1308
 
1309
+ result = '\n\n'.join(content_paragraphs).strip()
 
 
 
1310
 
1311
+ # 第四步:最終清理和格式化
1312
+ if result:
1313
+ # 標準化空格
1314
+ result = re.sub(r'\s+', ' ', result)
 
 
 
 
1315
 
1316
+ # 修復句子間的間距
1317
+ result = re.sub(r'([.!?])\s*([A-Z])', r'\1 \2', result)
1318
+
1319
+ # 確保句子以適當的標點結尾
1320
+ result = result.strip()
1321
+ if result and not result.endswith(('.', '!', '?')):
1322
+ result += '.'
1323
+
1324
+ return result
1325
+
1326
+ # 如果結果為空,嘗試更保守的清理
1327
+ fallback_result = response
1328
+ conservative_patterns = [
1329
+ r'\bstrictly\s+adhered\s+to.*?information\.?',
1330
+ r'\bavoided\s+speculating.*?atmospheres,?\.?',
1331
+ r'\bthe\s+mentioning\s+only.*?locations\.?'
1332
+ ]
1333
+
1334
+ for pattern in conservative_patterns:
1335
+ fallback_result = re.sub(pattern, '', fallback_result, flags=re.IGNORECASE)
1336
+
1337
+ fallback_result = re.sub(r'\s+', ' ', fallback_result).strip()
1338
+ return fallback_result if fallback_result else response
1339
 
1340
  except Exception as e:
1341
  self.logger.error(f"Failed to remove explanatory notes: {str(e)}")