Severian commited on
Commit
2531f2f
·
verified ·
1 Parent(s): 7e0a2e6

Update response_formatter.py

Browse files
Files changed (1) hide show
  1. response_formatter.py +90 -149
response_formatter.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import Dict, Optional, Tuple, List, Any
2
  import re
3
  import xml.etree.ElementTree as ET
4
  from datetime import datetime
@@ -17,6 +17,34 @@ if not logger.handlers:
17
  ch.setFormatter(formatter)
18
  logger.addHandler(ch)
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  class ToolType:
21
  DUCKDUCKGO = "duckduckgo_search"
22
  REDDIT_NEWS = "reddit_x_gnews_newswire_crunchbase"
@@ -31,173 +59,81 @@ class ToolType:
31
  DQS = "dqs_nhis_adult_summary_health_statistics"
32
 
33
  class ResponseFormatter:
34
- @staticmethod
 
 
 
 
 
 
 
 
35
  def format_thought(
 
36
  thought: str,
37
  observation: str,
38
- citations: List[Dict] = None,
39
- metadata: Dict = None,
40
- tool_outputs: List[Dict] = None
41
- ) -> Tuple[str, str]:
42
- """Format agent thought for both terminal and XML output"""
43
- # Terminal format
44
- terminal_output = {
45
- "type": "agent_thought",
46
- "content": thought,
47
- "metadata": metadata or {}
48
- }
49
-
50
- if tool_outputs:
51
- terminal_output["tool_outputs"] = tool_outputs
52
-
53
- # XML format
54
  root = ET.Element("agent_response")
55
- thought_elem = ET.SubElement(root, "thought")
56
- thought_elem.text = thought
57
 
 
 
 
 
58
  if observation:
59
  obs_elem = ET.SubElement(root, "observation")
60
  obs_elem.text = observation
61
-
62
  if tool_outputs:
63
  tools_elem = ET.SubElement(root, "tool_outputs")
64
- for tool_output in tool_outputs:
65
  tool_elem = ET.SubElement(tools_elem, "tool_output")
66
- tool_elem.attrib["type"] = tool_output.get("type", "")
67
- tool_elem.text = tool_output.get("content", "")
68
-
69
- if citations:
70
- cites_elem = ET.SubElement(root, "citations")
71
- for citation in citations:
72
- cite_elem = ET.SubElement(cites_elem, "citation")
73
- for key, value in citation.items():
74
- cite_elem.attrib[key] = str(value)
75
 
76
  xml_output = ET.tostring(root, encoding='unicode')
77
- return json.dumps(terminal_output), xml_output
78
-
79
- @staticmethod
80
- def _create_tool_element(parent: ET.Element, tool_name: str, tool_data: Dict) -> ET.Element:
81
- """Create XML element for specific tool type with appropriate structure"""
82
- tool_elem = ET.SubElement(parent, "tool")
83
- tool_elem.set("name", tool_name)
84
-
85
- # Handle different tool types
86
- if tool_name == ToolType.CENSUS:
87
- ResponseFormatter._format_census_data(tool_elem, tool_data)
88
- elif tool_name == ToolType.MERMAID:
89
- ResponseFormatter._format_mermaid_data(tool_elem, tool_data)
90
- elif tool_name in [ToolType.WISQARS, ToolType.WONDER, ToolType.NCHS]:
91
- ResponseFormatter._format_health_data(tool_elem, tool_data)
92
- else:
93
- # Generic tool output format
94
- content_elem = ET.SubElement(tool_elem, "content")
95
- content_elem.text = ResponseFormatter._clean_markdown(str(tool_data))
96
-
97
- return tool_elem
98
-
99
- @staticmethod
100
- def _format_census_data(tool_elem: ET.Element, data: Dict) -> None:
101
- """Format census data with specific structure"""
102
- try:
103
- # Extract census tract data
104
- tracts_elem = ET.SubElement(tool_elem, "census_tracts")
105
-
106
- # Parse the llm_result to extract structured data
107
- if "llm_result" in data:
108
- result = json.loads(data["llm_result"])
109
- for tract_data in result.get("tracts", []):
110
- tract_elem = ET.SubElement(tracts_elem, "tract")
111
- tract_elem.set("id", str(tract_data.get("tract", "")))
112
-
113
- # Add tract details
114
- for key, value in tract_data.items():
115
- if key != "tract":
116
- detail_elem = ET.SubElement(tract_elem, key.replace("_", ""))
117
- detail_elem.text = str(value)
118
- except:
119
- # Fallback to simple format if parsing fails
120
- content_elem = ET.SubElement(tool_elem, "content")
121
- content_elem.text = ResponseFormatter._clean_markdown(str(data))
122
-
123
- @staticmethod
124
- def _format_mermaid_data(tool_elem: ET.Element, data: Dict) -> None:
125
- """Format mermaid diagram data with improved error handling"""
126
- try:
127
- diagram_elem = ET.SubElement(tool_elem, "diagram")
128
 
129
- # Extract content from data
130
- content = ""
131
- if isinstance(data, dict):
132
- content = data.get("content", data.get("mermaid_diagram", ""))
133
- elif isinstance(data, str):
134
- content = data
135
-
136
- # Clean any remaining markdown/JSON formatting
137
- content = re.sub(r'```mermaid\s*|\s*```', '', content)
138
- content = re.sub(r'tool response:.*?{', '{', content)
139
- content = re.sub(r'}\s*\.$', '}', content)
140
-
141
- # Set cleaned content
142
- diagram_elem.text = content.strip()
143
-
144
- except Exception as e:
145
- logger.error(f"Error formatting mermaid data: {e}")
146
- content_elem = ET.SubElement(tool_elem, "content")
147
- content_elem.text = "Error formatting diagram"
148
-
149
- @staticmethod
150
- def _format_health_data(tool_elem: ET.Element, data: Dict) -> None:
151
- """Format health-related data from WISQARS, WONDER, etc."""
152
- try:
153
- if isinstance(data, dict):
154
- for key, value in data.items():
155
- category_elem = ET.SubElement(tool_elem, key.replace("_", ""))
156
- if isinstance(value, dict):
157
- for sub_key, sub_value in value.items():
158
- sub_elem = ET.SubElement(category_elem, sub_key.replace("_", ""))
159
- sub_elem.text = str(sub_value)
160
- else:
161
- category_elem.text = str(value)
162
- except:
163
- content_elem = ET.SubElement(tool_elem, "content")
164
- content_elem.text = ResponseFormatter._clean_markdown(str(data))
165
-
166
- @staticmethod
167
- def _extract_tool_outputs(observation: str) -> Dict[str, Any]:
168
- """Extract and clean tool outputs from observation"""
169
- tool_outputs = {}
170
- try:
171
- if isinstance(observation, str):
172
- data = json.loads(observation)
173
- for key, value in data.items():
174
- if isinstance(value, str) and "llm_result" in value:
175
- try:
176
- tool_result = json.loads(value)
177
- tool_outputs[key] = tool_result
178
- except:
179
- tool_outputs[key] = value
180
- except:
181
- pass
182
- return tool_outputs
183
-
184
- @staticmethod
185
- def format_message(message: str) -> Tuple[str, str]:
186
- """Format agent message for both terminal and XML output"""
187
- # Terminal format
188
- terminal_output = message.strip()
189
-
190
- # XML format
191
  root = ET.Element("agent_response")
192
  msg_elem = ET.SubElement(root, "message")
193
- msg_elem.text = message.strip()
194
 
195
  xml_output = ET.tostring(root, encoding='unicode')
196
- return terminal_output, xml_output
197
-
198
- @staticmethod
199
- def format_error(error: str) -> Tuple[str, str]:
 
 
 
 
200
  """Format error message for both terminal and XML output"""
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  # Terminal format
202
  terminal_output = f"Error: {error}"
203
 
@@ -207,6 +143,11 @@ class ResponseFormatter:
207
  error_elem.text = error
208
 
209
  xml_output = ET.tostring(root, encoding='unicode')
 
 
 
 
 
210
  return terminal_output, xml_output
211
 
212
  @staticmethod
 
1
+ from typing import Dict, Optional, Tuple, List, Any, Set
2
  import re
3
  import xml.etree.ElementTree as ET
4
  from datetime import datetime
 
17
  ch.setFormatter(formatter)
18
  logger.addHandler(ch)
19
 
20
+ class StreamingFormatter:
21
+ def __init__(self):
22
+ self.processed_events = set()
23
+ self.current_tool_outputs = []
24
+ self.current_citations = []
25
+ self.current_metadata = {}
26
+ self.current_message_id = None
27
+ self.current_message_buffer = ""
28
+
29
+ def reset(self):
30
+ """Reset the formatter state"""
31
+ self.processed_events.clear()
32
+ self.current_tool_outputs.clear()
33
+ self.current_citations.clear()
34
+ self.current_metadata.clear()
35
+ self.current_message_id = None
36
+ self.current_message_buffer = ""
37
+
38
+ def append_to_buffer(self, text: str):
39
+ """Append text to the current message buffer"""
40
+ self.current_message_buffer += text
41
+
42
+ def get_and_clear_buffer(self) -> str:
43
+ """Get the current buffer content and clear it"""
44
+ content = self.current_message_buffer
45
+ self.current_message_buffer = ""
46
+ return content
47
+
48
  class ToolType:
49
  DUCKDUCKGO = "duckduckgo_search"
50
  REDDIT_NEWS = "reddit_x_gnews_newswire_crunchbase"
 
59
  DQS = "dqs_nhis_adult_summary_health_statistics"
60
 
61
  class ResponseFormatter:
62
+ _instance = None
63
+
64
+ def __new__(cls):
65
+ if cls._instance is None:
66
+ cls._instance = super(ResponseFormatter, cls).__new__(cls)
67
+ cls._instance.streaming_state = StreamingFormatter()
68
+ cls._instance.logger = logger
69
+ return cls._instance
70
+
71
  def format_thought(
72
+ self,
73
  thought: str,
74
  observation: str,
75
+ tool_outputs: List[Dict] = None,
76
+ event_id: str = None,
77
+ message_id: str = None
78
+ ) -> Optional[Tuple[str, str]]:
79
+ """Format thought and tool outputs as XML"""
 
 
 
 
 
 
 
 
 
 
 
80
  root = ET.Element("agent_response")
 
 
81
 
82
+ if thought:
83
+ thought_elem = ET.SubElement(root, "thought")
84
+ thought_elem.text = thought
85
+
86
  if observation:
87
  obs_elem = ET.SubElement(root, "observation")
88
  obs_elem.text = observation
89
+
90
  if tool_outputs:
91
  tools_elem = ET.SubElement(root, "tool_outputs")
92
+ for output in tool_outputs:
93
  tool_elem = ET.SubElement(tools_elem, "tool_output")
94
+ tool_elem.attrib["type"] = output.get("type", "")
95
+ tool_elem.text = output.get("content", "")
 
 
 
 
 
 
 
96
 
97
  xml_output = ET.tostring(root, encoding='unicode')
98
+ return thought, xml_output
99
+
100
+ def format_message(
101
+ self,
102
+ message: str,
103
+ event_id: str = None,
104
+ message_id: str = None
105
+ ) -> Optional[Tuple[str, str]]:
106
+ """Format message as XML for frontend"""
107
+ if not message:
108
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  root = ET.Element("agent_response")
111
  msg_elem = ET.SubElement(root, "message")
112
+ msg_elem.text = message
113
 
114
  xml_output = ET.tostring(root, encoding='unicode')
115
+ return message, xml_output
116
+
117
+ def format_error(
118
+ self,
119
+ error: str,
120
+ event_id: str = None,
121
+ message_id: str = None
122
+ ) -> Optional[Tuple[str, str]]:
123
  """Format error message for both terminal and XML output"""
124
+ # Skip if already processed
125
+ if event_id and event_id in self.streaming_state.processed_events:
126
+ return None
127
+
128
+ # Handle message state
129
+ if message_id != self.streaming_state.current_message_id:
130
+ self.streaming_state.reset()
131
+ self.streaming_state.current_message_id = message_id
132
+
133
+ # Skip empty errors
134
+ if not error:
135
+ return None
136
+
137
  # Terminal format
138
  terminal_output = f"Error: {error}"
139
 
 
143
  error_elem.text = error
144
 
145
  xml_output = ET.tostring(root, encoding='unicode')
146
+
147
+ # Track processed event
148
+ if event_id:
149
+ self.streaming_state.processed_events.add(event_id)
150
+
151
  return terminal_output, xml_output
152
 
153
  @staticmethod