Severian commited on
Commit
3c1c117
·
verified ·
1 Parent(s): a7a635b

Update response_formatter.py

Browse files
Files changed (1) hide show
  1. response_formatter.py +310 -120
response_formatter.py CHANGED
@@ -1,127 +1,317 @@
1
- export class LogicController {
2
- constructor() {
3
- this.storage = {
4
- xmlBuffer: '',
5
- isFirstMessage: true,
6
- toolOutputs: new Map()
7
- };
8
-
9
- this.eventDispatcher = new EventTarget();
10
- }
11
-
12
- processStreamData(chunk) {
13
- try {
14
- const rawData = chunk.data || chunk;
15
- console.log('Raw chunk received:', rawData);
16
-
17
- if (typeof rawData === 'string' && rawData.includes('<agent_response>')) {
18
- this.storage.xmlBuffer += rawData;
19
-
20
- // Process complete messages
21
- while (this.storage.xmlBuffer.includes('<agent_response>')) {
22
- const startTag = '<agent_response>';
23
- const endTag = '</agent_response>';
24
-
25
- const startIndex = this.storage.xmlBuffer.indexOf(startTag);
26
- const endIndex = this.storage.xmlBuffer.indexOf(endTag);
27
-
28
- if (startIndex === -1 || endIndex === -1) break;
29
-
30
- // Extract complete message
31
- const completeMessage = this.storage.xmlBuffer.slice(
32
- startIndex,
33
- endIndex + endTag.length
34
- );
35
-
36
- // Process message content
37
- this.processMessage(completeMessage);
38
-
39
- // Remove processed message
40
- this.storage.xmlBuffer = this.storage.xmlBuffer.slice(
41
- endIndex + endTag.length
42
- );
43
- }
44
- }
45
- } catch (error) {
46
- console.error('Error processing stream data:', error);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  }
48
- }
49
-
50
- processMessage(xmlMessage) {
51
- try {
52
- // Parse message content
53
- const messageMatch = xmlMessage.match(/<message>(.*?)<\/message>/s);
54
- const thoughtMatch = xmlMessage.match(/<thought>(.*?)<\/thought>/s);
55
- const toolOutputsMatch = xmlMessage.match(/<tool_outputs>(.*?)<\/tool_outputs>/s);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
- // Handle regular message
58
- if (messageMatch?.[1]) {
59
- this.dispatchUpdate('chatwindow', {
60
- message: messageMatch[1].trim(),
61
- replace: false,
62
- format: true
63
- });
64
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
- // Handle thought content
67
- if (thoughtMatch?.[1]) {
68
- this.dispatchUpdate('chatwindow', {
69
- message: thoughtMatch[1].trim(),
70
- replace: false,
71
- format: true,
72
- isThought: true
73
- });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  }
75
 
76
- // Handle tool outputs
77
- if (toolOutputsMatch?.[1]) {
78
- this.processToolOutputs(toolOutputsMatch[1]);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  }
 
 
 
 
80
 
81
- } catch (error) {
82
- console.error('Error processing message:', error);
83
- }
84
- }
85
-
86
- processToolOutputs(toolOutputsXml) {
87
- try {
88
- const parser = new DOMParser();
89
- const doc = parser.parseFromString(toolOutputsXml, 'text/xml');
90
- const outputs = doc.getElementsByTagName('tool_output');
91
-
92
- Array.from(outputs).forEach(output => {
93
- const type = output.getAttribute('type');
94
- const content = output.textContent;
95
-
96
- // Store tool output
97
- const outputId = `${type}-${Date.now()}`;
98
- this.storage.toolOutputs.set(outputId, {
99
- type,
100
- content,
101
- timestamp: Date.now()
102
- });
103
-
104
- // Dispatch tool-specific update
105
- this.dispatchUpdate('maincontent', {
106
- observationWidget: {
107
- id: outputId,
108
- type,
109
- tool: type,
110
- observation: content
111
- }
112
- });
113
- });
114
-
115
- } catch (error) {
116
- console.error('Error processing tool outputs:', error);
117
- }
118
- }
119
-
120
- dispatchUpdate(component, data) {
121
- console.log('Dispatching update:', component, data);
122
- const event = new CustomEvent('componentUpdate', {
123
- detail: { component, data }
124
- });
125
- this.eventDispatcher.dispatchEvent(event);
126
- }
127
- }
 
1
+ from typing import Dict, Optional, Tuple, List, Any, Set, Union
2
+ import re
3
+ import xml.etree.ElementTree as ET
4
+ from datetime import datetime
5
+ import json
6
+ import logging
7
+ from enum import Enum
8
+
9
+ # Setup logger
10
+ logger = logging.getLogger(__name__)
11
+ logger.setLevel(logging.INFO)
12
+
13
+ # Create console handler if needed
14
+ if not logger.handlers:
15
+ ch = logging.StreamHandler()
16
+ ch.setLevel(logging.INFO)
17
+ formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
18
+ ch.setFormatter(formatter)
19
+ logger.addHandler(ch)
20
+
21
+ class StreamingFormatter:
22
+ def __init__(self):
23
+ self.processed_events = set()
24
+ self.current_tool_outputs = []
25
+ self.current_citations = []
26
+ self.current_metadata = {}
27
+ self.current_message_id = None
28
+ self.current_message_buffer = ""
29
+
30
+ def reset(self):
31
+ """Reset the formatter state"""
32
+ self.processed_events.clear()
33
+ self.current_tool_outputs.clear()
34
+ self.current_citations.clear()
35
+ self.current_metadata.clear()
36
+ self.current_message_id = None
37
+ self.current_message_buffer = ""
38
+
39
+ def append_to_buffer(self, text: str):
40
+ """Append text to the current message buffer"""
41
+ self.current_message_buffer += text
42
+
43
+ def get_and_clear_buffer(self) -> str:
44
+ """Get the current buffer content and clear it"""
45
+ content = self.current_message_buffer
46
+ self.current_message_buffer = ""
47
+ return content
48
+
49
+ class ToolType(Enum):
50
+ """Enum for supported tool types"""
51
+ DUCKDUCKGO = "ddgo_search"
52
+ REDDIT_NEWS = "reddit_x_gnews_newswire_crunchbase"
53
+ PUBMED = "pubmed_search"
54
+ CENSUS = "get_census_data"
55
+ HEATMAP = "heatmap_code"
56
+ MERMAID = "mermaid_output"
57
+ WISQARS = "wisqars"
58
+ WONDER = "wonder"
59
+ NCHS = "nchs"
60
+ ONESTEP = "onestep"
61
+ DQS = "dqs_nhis_adult_summary_health_statistics"
62
+
63
+ @classmethod
64
+ def get_tool_type(cls, tool_name: str) -> Optional['ToolType']:
65
+ """Get enum member from tool name string"""
66
+ try:
67
+ return cls[tool_name.upper()]
68
+ except KeyError:
69
+ return None
70
+
71
+ class ResponseFormatter:
72
+ _instance = None
73
+
74
+ def __new__(cls):
75
+ if cls._instance is None:
76
+ cls._instance = super(ResponseFormatter, cls).__new__(cls)
77
+ cls._instance.streaming_state = StreamingFormatter()
78
+ cls._instance.logger = logger
79
+ return cls._instance
80
+
81
+ def format_thought(
82
+ self,
83
+ thought: str,
84
+ observation: str,
85
+ citations: List[Dict] = None,
86
+ metadata: Dict = None,
87
+ tool_outputs: List[Dict] = None,
88
+ event_id: str = None,
89
+ message_id: str = None
90
+ ) -> Optional[Tuple[str, str]]:
91
+ """Format agent thought for both terminal and XML output"""
92
+ # Skip if already processed in streaming mode
93
+ if event_id and event_id in self.streaming_state.processed_events:
94
+ return None
95
+
96
+ # Handle message state
97
+ if message_id != self.streaming_state.current_message_id:
98
+ self.streaming_state.reset()
99
+ self.streaming_state.current_message_id = message_id
100
+
101
+ # Skip empty thoughts
102
+ if not thought and not observation and not tool_outputs:
103
+ return None
104
+
105
+ # Terminal format
106
+ terminal_output = {
107
+ "type": "agent_thought",
108
+ "content": thought,
109
+ "metadata": metadata or {}
110
  }
111
+
112
+ if tool_outputs:
113
+ # Deduplicate tool outputs
114
+ seen_outputs = set()
115
+ unique_outputs = []
116
+ for output in tool_outputs:
117
+ output_key = f"{output.get('type')}:{output.get('content')}"
118
+ if output_key not in seen_outputs:
119
+ seen_outputs.add(output_key)
120
+ unique_outputs.append(output)
121
+ terminal_output["tool_outputs"] = unique_outputs
122
+
123
+ # XML format
124
+ root = ET.Element("agent_response")
125
+
126
+ if thought:
127
+ thought_elem = ET.SubElement(root, "thought")
128
+ thought_elem.text = thought
129
+
130
+ if observation:
131
+ obs_elem = ET.SubElement(root, "observation")
132
+ obs_elem.text = observation
133
+
134
+ if tool_outputs:
135
+ tools_elem = ET.SubElement(root, "tool_outputs")
136
+ for tool_output in unique_outputs:
137
+ tool_elem = ET.SubElement(tools_elem, "tool_output")
138
+ tool_elem.attrib["type"] = tool_output.get("type", "")
139
+ tool_elem.text = tool_output.get("content", "")
140
+
141
+ if citations:
142
+ cites_elem = ET.SubElement(root, "citations")
143
+ for citation in citations:
144
+ cite_elem = ET.SubElement(cites_elem, "citation")
145
+ for key, value in citation.items():
146
+ cite_elem.attrib[key] = str(value)
147
+
148
+ xml_output = ET.tostring(root, encoding='unicode')
149
+
150
+ # Track processed event
151
+ if event_id:
152
+ self.streaming_state.processed_events.add(event_id)
153
+
154
+ return json.dumps(terminal_output), xml_output
155
+
156
+ def format_message(
157
+ self,
158
+ message: str,
159
+ event_id: str = None,
160
+ message_id: str = None
161
+ ) -> Optional[Tuple[str, str]]:
162
+ """Format agent message for both terminal and XML output"""
163
+ # Skip if already processed
164
+ if event_id and event_id in self.streaming_state.processed_events:
165
+ return None
166
 
167
+ # Handle message state
168
+ if message_id != self.streaming_state.current_message_id:
169
+ self.streaming_state.reset()
170
+ self.streaming_state.current_message_id = message_id
171
+
172
+ # Accumulate message content
173
+ self.streaming_state.append_to_buffer(message)
174
+
175
+ # Only output if we have meaningful content
176
+ if not self.streaming_state.current_message_buffer.strip():
177
+ return None
178
+
179
+ # Terminal format
180
+ terminal_output = self.streaming_state.current_message_buffer.strip()
181
+
182
+ # XML format
183
+ root = ET.Element("agent_response")
184
+ msg_elem = ET.SubElement(root, "message")
185
+ msg_elem.text = terminal_output
186
+
187
+ xml_output = ET.tostring(root, encoding='unicode')
188
+
189
+ # Track processed event
190
+ if event_id:
191
+ self.streaming_state.processed_events.add(event_id)
192
+
193
+ return terminal_output, xml_output
194
+
195
+ def format_error(
196
+ self,
197
+ error: str,
198
+ event_id: str = None,
199
+ message_id: str = None
200
+ ) -> Optional[Tuple[str, str]]:
201
+ """Format error message for both terminal and XML output"""
202
+ # Skip if already processed
203
+ if event_id and event_id in self.streaming_state.processed_events:
204
+ return None
205
+
206
+ # Handle message state
207
+ if message_id != self.streaming_state.current_message_id:
208
+ self.streaming_state.reset()
209
+ self.streaming_state.current_message_id = message_id
210
+
211
+ # Skip empty errors
212
+ if not error:
213
+ return None
214
+
215
+ # Terminal format
216
+ terminal_output = f"Error: {error}"
217
 
218
+ # XML format
219
+ root = ET.Element("agent_response")
220
+ error_elem = ET.SubElement(root, "error")
221
+ error_elem.text = error
222
+
223
+ xml_output = ET.tostring(root, encoding='unicode')
224
+
225
+ # Track processed event
226
+ if event_id:
227
+ self.streaming_state.processed_events.add(event_id)
228
+
229
+ return terminal_output, xml_output
230
+
231
+ def format_tool_output(
232
+ self,
233
+ tool_type: str,
234
+ content: Union[str, Dict],
235
+ metadata: Optional[Dict] = None
236
+ ) -> Dict:
237
+ """Format tool output into standardized structure"""
238
+ try:
239
+ # Get enum tool type
240
+ tool = ToolType.get_tool_type(tool_type)
241
+ if not tool:
242
+ self.logger.warning(f"Unknown tool type: {tool_type}")
243
+ return {
244
+ "type": tool_type,
245
+ "content": content,
246
+ "metadata": metadata or {}
247
+ }
248
+
249
+ # Format based on tool type
250
+ if tool == ToolType.MERMAID:
251
+ return {
252
+ "type": "mermaid",
253
+ "content": self._clean_mermaid_content(content),
254
+ "metadata": metadata or {}
255
+ }
256
+ elif tool == ToolType.HEATMAP:
257
+ return {
258
+ "type": "heatmap",
259
+ "content": self._format_heatmap_data(content),
260
+ "metadata": metadata or {}
261
+ }
262
+ else:
263
+ # Default formatting for other tools
264
+ return {
265
+ "type": tool.value,
266
+ "content": content,
267
+ "metadata": metadata or {}
268
+ }
269
+
270
+ except Exception as e:
271
+ self.logger.error(f"Error formatting tool output: {str(e)}")
272
+ return {
273
+ "type": "error",
274
+ "content": str(e),
275
+ "metadata": metadata or {}
276
  }
277
 
278
+ def _clean_mermaid_content(self, content: Union[str, Dict]) -> str:
279
+ """Clean and standardize mermaid diagram content"""
280
+ try:
281
+ if isinstance(content, dict):
282
+ content = content.get("mermaid_diagram", "")
283
+
284
+ # Remove markdown formatting
285
+ content = re.sub(r'```mermaid\s*|\s*```', '', content)
286
+
287
+ # Clean up whitespace
288
+ content = content.strip()
289
+
290
+ return content
291
+
292
+ except Exception as e:
293
+ self.logger.error(f"Error cleaning mermaid content: {str(e)}")
294
+ return str(content)
295
+
296
+ def _format_heatmap_data(self, content: Union[str, Dict]) -> Dict:
297
+ """Format heatmap data into standardized structure"""
298
+ try:
299
+ if isinstance(content, str):
300
+ content = json.loads(content)
301
+
302
+ return {
303
+ "data": content.get("data", []),
304
+ "options": content.get("options", {}),
305
+ "metadata": content.get("metadata", {})
306
  }
307
+
308
+ except Exception as e:
309
+ self.logger.error(f"Error formatting heatmap data: {str(e)}")
310
+ return {"error": str(e)}
311
 
312
+ @staticmethod
313
+ def _clean_markdown(text: str) -> str:
314
+ """Clean markdown formatting from text"""
315
+ text = re.sub(r'```.*?```', '', text, flags=re.DOTALL)
316
+ text = re.sub(r'[*_`#]', '', text)
317
+ return re.sub(r'\n{3,}', '\n\n', text.strip())