File size: 7,047 Bytes
0a69927 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 |
import re
from typing import Dict, List, Union
import logging
import json
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
class UltimateLLMResponseParser:
def __init__(self):
self.decision_keywords = {
'refine': ['refine', 'need more info', 'insufficient', 'unclear', 'more research', 'additional search'],
'answer': ['answer', 'sufficient', 'enough info', 'can respond', 'adequate', 'comprehensive']
}
self.section_identifiers = [
('decision', r'(?i)decision\s*:'),
('reasoning', r'(?i)reasoning\s*:'),
('selected_results', r'(?i)selected results\s*:'),
('response', r'(?i)response\s*:')
]
def parse_llm_response(self, response: str) -> Dict[str, Union[str, List[int]]]:
logger.info("Starting to parse LLM response")
# Initialize result dictionary
result = {
'decision': None,
'reasoning': None,
'selected_results': [],
'response': None
}
# Define parsing strategies
parsing_strategies = [
self._parse_structured_response,
self._parse_json_response,
self._parse_unstructured_response,
self._parse_implicit_response
]
# Try each parsing strategy
for strategy in parsing_strategies:
try:
parsed_result = strategy(response)
if self._is_valid_result(parsed_result):
result.update(parsed_result)
logger.info(f"Successfully parsed using strategy: {strategy.__name__}")
break
except Exception as e:
logger.warning(f"Error in parsing strategy {strategy.__name__}: {str(e)}")
# If no strategy succeeded, use fallback parsing
if not self._is_valid_result(result):
logger.warning("All parsing strategies failed. Using fallback parsing.")
result = self._fallback_parsing(response)
# Post-process the result
result = self._post_process_result(result)
logger.info("Finished parsing LLM response")
return result
def _parse_structured_response(self, response: str) -> Dict[str, Union[str, List[int]]]:
result = {}
for key, pattern in self.section_identifiers:
match = re.search(f'{pattern}(.*?)(?={"|".join([p for k, p in self.section_identifiers if k != key])}|$)', response, re.IGNORECASE | re.DOTALL)
if match:
result[key] = match.group(1).strip()
if 'selected_results' in result:
result['selected_results'] = self._extract_numbers(result['selected_results'])
return result
def _parse_json_response(self, response: str) -> Dict[str, Union[str, List[int]]]:
try:
json_match = re.search(r'\{.*\}', response, re.DOTALL)
if json_match:
json_str = json_match.group(0)
parsed_json = json.loads(json_str)
return {k: v for k, v in parsed_json.items() if k in ['decision', 'reasoning', 'selected_results', 'response']}
except json.JSONDecodeError:
pass
return {}
def _parse_unstructured_response(self, response: str) -> Dict[str, Union[str, List[int]]]:
result = {}
lines = response.split('\n')
current_section = None
for line in lines:
section_match = re.match(r'(.+?)[:.-](.+)', line)
if section_match:
key = self._match_section_to_key(section_match.group(1))
if key:
current_section = key
result[key] = section_match.group(2).strip()
elif current_section:
result[current_section] += ' ' + line.strip()
if 'selected_results' in result:
result['selected_results'] = self._extract_numbers(result['selected_results'])
return result
def _parse_implicit_response(self, response: str) -> Dict[str, Union[str, List[int]]]:
result = {}
decision = self._infer_decision(response)
if decision:
result['decision'] = decision
numbers = self._extract_numbers(response)
if numbers:
result['selected_results'] = numbers
if not result:
result['response'] = response.strip()
return result
def _fallback_parsing(self, response: str) -> Dict[str, Union[str, List[int]]]:
result = {
'decision': self._infer_decision(response),
'reasoning': None,
'selected_results': self._extract_numbers(response),
'response': response.strip()
}
return result
def _post_process_result(self, result: Dict[str, Union[str, List[int]]]) -> Dict[str, Union[str, List[int]]]:
if result['decision'] not in ['refine', 'answer']:
result['decision'] = self._infer_decision(str(result))
if not isinstance(result['selected_results'], list):
result['selected_results'] = self._extract_numbers(str(result['selected_results']))
result['selected_results'] = result['selected_results'][:2]
if not result['reasoning']:
result['reasoning'] = f"Based on the {'presence' if result['selected_results'] else 'absence'} of selected results and the overall content."
if not result['response']:
result['response'] = result.get('reasoning', 'No clear response found.')
return result
def _match_section_to_key(self, section: str) -> Union[str, None]:
for key, pattern in self.section_identifiers:
if re.search(pattern, section, re.IGNORECASE):
return key
return None
def _extract_numbers(self, text: str) -> List[int]:
return [int(num) for num in re.findall(r'\b(?:10|[1-9])\b', text)]
def _infer_decision(self, text: str) -> str:
text = text.lower()
refine_score = sum(text.count(keyword) for keyword in self.decision_keywords['refine'])
answer_score = sum(text.count(keyword) for keyword in self.decision_keywords['answer'])
return 'refine' if refine_score > answer_score else 'answer'
def _is_valid_result(self, result: Dict[str, Union[str, List[int]]]) -> bool:
return bool(result.get('decision') or result.get('response') or result.get('selected_results'))
# Example usage
if __name__ == "__main__":
parser = UltimateLLMResponseParser()
test_response = """
Decision: answer
Reasoning: The scraped content provides comprehensive information about recent AI breakthroughs.
Selected Results: 1, 3
Response: Based on the scraped content, there have been several significant breakthroughs in AI recently...
"""
parsed_result = parser.parse_llm_response(test_response)
print(json.dumps(parsed_result, indent=2))
|