DawnC commited on
Commit
30671de
·
verified ·
1 Parent(s): 1c7033a

Update response_processor.py

Browse files
Files changed (1) hide show
  1. response_processor.py +32 -135
response_processor.py CHANGED
@@ -1218,157 +1218,54 @@ class ResponseProcessor:
1218
  if not re.search(r'[.!?]', response):
1219
  raise ResponseProcessingError("Response lacks proper sentence structure")
1220
 
1221
- def remove_explanatory_notes(self, response: str) -> str:
1222
  """
1223
- 移除解釋性注釋和說明,採用多階段清理策略處理破碎片段
1224
-
1225
  Args:
1226
  response: 包含可能注釋的回應
1227
-
1228
  Returns:
1229
  str: 移除注釋後的回應
1230
  """
1231
  try:
1232
- if not response or not response.strip():
1233
- return response
1234
-
1235
- original_response = response
1236
-
1237
- # 階段1:移除明確的完整問題句型
1238
- complete_problem_patterns = [
1239
- # 完整的破碎句型(貪婪匹配)
1240
- r'Note\s+that\s+I\s+have\s*[,.\s]*.*?(?:natural\s+flow|concise\s+manner)[,.\s]*',
1241
-
1242
- # 從 Note that 開始到句號結束的整個片段
1243
- r'Note\s+that\s+I\s+have\s*[,.\s]*.*?\.',
1244
-
1245
- # 處理包含 avoiding assumptions 的整個片段
1246
- r'[,.\s]*avoiding\s+any\s+assumptions.*?(?:manner|flow|locations)[,.\s]*',
1247
- ]
1248
-
1249
- cleaned_text = response
1250
- for pattern in complete_problem_patterns:
1251
- cleaned_text = re.sub(pattern, '', cleaned_text, flags=re.IGNORECASE | re.DOTALL)
1252
-
1253
- # 階段2:移除具體的問題關鍵詞組合
1254
- specific_fragments = [
1255
- # 移除 "I have also" 相關片段
1256
- r'\bI\s+have\s+also\s*[,.\s]*(?:and\s+detail\s+accuracy\s+rule\s*[,.\s]*)?',
1257
-
1258
- # 移除 "and their locations" 孤立片段
1259
- r'[,.\s]*and\s+their\s+locations[,.\s]*',
1260
-
1261
- # 移除 "on describing in a clear" 片段
1262
- r'[,.\s]*on\s+describing\s+in\s+a\s+clear(?:\s+and\s+concise)?(?:\s+manner)?[,.\s]*',
1263
-
1264
- # 移除 "detail accuracy rule" 相關
1265
- r'[,.\s]*(?:and\s+)?detail\s+accuracy\s+rule[,.\s]*',
1266
-
1267
- # 移除孤立的 "avoiding any assumptions"
1268
- r'[,.\s]*avoiding\s+any\s+assumptions[,.\s]*',
1269
-
1270
- # 移除 "Additionally, I have" 開頭的破碎片段
1271
- r'Additionally,?\s*I\s+have\s*[,.\s]*(?:and\s+have\s+focused\s*[,.\s]*)?',
1272
-
1273
- # 移除 "using transitional phrases" 相關
1274
- r'[,.\s]*using\s+transitional\s+phrases(?:\s+and\s+varying\s+sentence\s+structures)?[,.\s]*',
1275
-
1276
- # 移除 "to create a natural flow"
1277
- r'[,.\s]*to\s+create\s+a\s+natural\s+flow[,.\s]*',
1278
- ]
1279
-
1280
- for pattern in specific_fragments:
1281
- cleaned_text = re.sub(pattern, '', cleaned_text, flags=re.IGNORECASE)
1282
-
1283
- # 階段3:移除任何以問題關鍵詞開頭的殘留片段
1284
- problem_starters = [
1285
- r'^[,.\s]*Note\s+that.*?[,.\s]*',
1286
- r'^[,.\s]*I\s+have\s+(?:strictly\s+)?(?:adhered|followed|ensured).*?[,.\s]*',
1287
- r'^[,.\s]*avoiding\s+any.*?[,.\s]*',
1288
- r'^[,.\s]*Additionally.*?[,.\s]*',
1289
- ]
1290
-
1291
- for pattern in problem_starters:
1292
- cleaned_text = re.sub(pattern, '', cleaned_text, flags=re.IGNORECASE | re.MULTILINE)
1293
-
1294
- # 階段4:清理標點符號和格式問題
1295
- # 移除多餘的逗號和句號
1296
- cleaned_text = re.sub(r'\s*,\s*,+\s*', ', ', cleaned_text)
1297
- cleaned_text = re.sub(r'\s*\.+\s*\.+\s*', '. ', cleaned_text)
1298
- cleaned_text = re.sub(r'\s*,\s*\.\s*', '. ', cleaned_text)
1299
-
1300
- # 移除開頭和結尾的標點符號
1301
- cleaned_text = re.sub(r'^[,.\s]+', '', cleaned_text)
1302
- cleaned_text = re.sub(r'[,.\s]+$', '', cleaned_text)
1303
-
1304
- # 修復句子間的標點問題
1305
- cleaned_text = re.sub(r'([.!?])\s*,\s*([A-Z])', r'\1 \2', cleaned_text)
1306
- cleaned_text = re.sub(r',\s*([A-Z])', r'. \1', cleaned_text)
1307
-
1308
- # 階段5:傳統段落級處理(保持原有邏輯)
1309
- traditional_note_patterns = [
1310
  r'(?:^|\n)Note:.*?(?:\n|$)',
1311
  r'(?:^|\n)I have (?:followed|adhered to|ensured).*?(?:\n|$)',
1312
  r'(?:^|\n)This description (?:follows|adheres to|maintains).*?(?:\n|$)',
1313
  r'(?:^|\n)The enhanced description (?:maintains|preserves).*?(?:\n|$)'
1314
  ]
1315
-
1316
- paragraphs = [p.strip() for p in cleaned_text.split('\n\n') if p.strip()]
1317
-
 
 
1318
  if len(paragraphs) == 1:
1319
- for pattern in traditional_note_patterns:
1320
  paragraphs[0] = re.sub(pattern, '', paragraphs[0], flags=re.IGNORECASE)
1321
- result = paragraphs[0].strip()
1322
- else:
1323
- content_paragraphs = []
1324
- for paragraph in paragraphs:
1325
- is_note = False
1326
- for pattern in traditional_note_patterns:
1327
- if re.search(pattern, paragraph, flags=re.IGNORECASE):
1328
- is_note = True
1329
- break
1330
-
1331
- if paragraph.lower().startswith(('note:', 'please note:', 'remember:')):
1332
  is_note = True
1333
-
1334
- if not is_note:
1335
- content_paragraphs.append(paragraph)
1336
-
1337
- result = '\n\n'.join(content_paragraphs).strip()
1338
-
1339
- # 階段6:最終驗證和格式化
1340
- if result:
1341
- # 標準化空格
1342
- result = re.sub(r'\s+', ' ', result)
1343
-
1344
- # 確保句子以適當的標點結尾
1345
- result = result.strip()
1346
- if result and not result.endswith(('.', '!', '?')):
1347
- result += '.'
1348
-
1349
- # 最終檢查:如果結果太短,使用更保守的方法
1350
- if len(result.split()) < 5:
1351
- conservative_result = original_response
1352
- # 只移除最明顯的問題片段
1353
- conservative_patterns = [
1354
- r'Note\s+that\s+I\s+have.*?manner[,.\s]*',
1355
- r'avoiding\s+any\s+assumptions.*?locations[,.\s]*',
1356
- r'Additionally,?\s*I\s+have.*?flow[,.\s]*'
1357
- ]
1358
- for pattern in conservative_patterns:
1359
- conservative_result = re.sub(pattern, '', conservative_result, flags=re.IGNORECASE)
1360
-
1361
- conservative_result = re.sub(r'\s+', ' ', conservative_result).strip()
1362
- return conservative_result if conservative_result else original_response
1363
-
1364
- return result
1365
-
1366
- # 如果所有處理後結果為空,返回原始內容
1367
- return original_response
1368
-
1369
  except Exception as e:
1370
- if hasattr(self, 'logger'):
1371
- self.logger.error(f"Failed to remove explanatory notes: {str(e)}")
1372
  return response
1373
 
1374
  def get_processor_info(self) -> Dict[str, Any]:
 
1218
  if not re.search(r'[.!?]', response):
1219
  raise ResponseProcessingError("Response lacks proper sentence structure")
1220
 
1221
+ def remove_explanatory_notes(self, response: str) -> str:
1222
  """
1223
+ 移除解釋性注釋和說明
1224
+
1225
  Args:
1226
  response: 包含可能注釋的回應
1227
+
1228
  Returns:
1229
  str: 移除注釋後的回應
1230
  """
1231
  try:
1232
+ # 識別常見的注釋和解釋模式
1233
+ note_patterns = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1234
  r'(?:^|\n)Note:.*?(?:\n|$)',
1235
  r'(?:^|\n)I have (?:followed|adhered to|ensured).*?(?:\n|$)',
1236
  r'(?:^|\n)This description (?:follows|adheres to|maintains).*?(?:\n|$)',
1237
  r'(?:^|\n)The enhanced description (?:maintains|preserves).*?(?:\n|$)'
1238
  ]
1239
+
1240
+ # 尋找段落
1241
+ paragraphs = [p.strip() for p in response.split('\n\n') if p.strip()]
1242
+
1243
+ # 如果只有一個段落,檢查並清理它
1244
  if len(paragraphs) == 1:
1245
+ for pattern in note_patterns:
1246
  paragraphs[0] = re.sub(pattern, '', paragraphs[0], flags=re.IGNORECASE)
1247
+ return paragraphs[0].strip()
1248
+
1249
+ # 如果有多個段落,移除注釋段落
1250
+ content_paragraphs = []
1251
+ for paragraph in paragraphs:
1252
+ is_note = False
1253
+ for pattern in note_patterns:
1254
+ if re.search(pattern, paragraph, flags=re.IGNORECASE):
 
 
 
1255
  is_note = True
1256
+ break
1257
+
1258
+ # 檢查段落是否以常見的注釋詞開頭
1259
+ if paragraph.lower().startswith(('note:', 'please note:', 'remember:')):
1260
+ is_note = True
1261
+
1262
+ if not is_note:
1263
+ content_paragraphs.append(paragraph)
1264
+
1265
+ return '\n\n'.join(content_paragraphs).strip()
1266
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1267
  except Exception as e:
1268
+ self.logger.error(f"Failed to remove explanatory notes: {str(e)}")
 
1269
  return response
1270
 
1271
  def get_processor_info(self) -> Dict[str, Any]: