Spaces:

Underground-Digital
/

cc-api

Sleeping

App Files Files Community

Severian commited on Dec 11, 2024

Commit

2531f2f

verified ·

1 Parent(s): 7e0a2e6

Update response_formatter.py

Browse files

Files changed (1) hide show

response_formatter.py +90 -149

response_formatter.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Tuple, List, Any
 import re
 import xml.etree.ElementTree as ET
 from datetime import datetime
@@ -17,6 +17,34 @@ if not logger.handlers:
     ch.setFormatter(formatter)
     logger.addHandler(ch)
 class ToolType:
     DUCKDUCKGO = "duckduckgo_search"
     REDDIT_NEWS = "reddit_x_gnews_newswire_crunchbase"
@@ -31,173 +59,81 @@ class ToolType:
     DQS = "dqs_nhis_adult_summary_health_statistics"
 class ResponseFormatter:
-    @staticmethod
     def format_thought(
         thought: str,
         observation: str,
-        citations: List[Dict] = None,
-        metadata: Dict = None,
-        tool_outputs: List[Dict] = None
-    ) -> Tuple[str, str]:
-        """Format agent thought for both terminal and XML output"""
-        # Terminal format
-        terminal_output = {
-            "type": "agent_thought",
-            "content": thought,
-            "metadata": metadata or {}
-        }
-        if tool_outputs:
-            terminal_output["tool_outputs"] = tool_outputs
-        # XML format
         root = ET.Element("agent_response")
-        thought_elem = ET.SubElement(root, "thought")
-        thought_elem.text = thought
         if observation:
             obs_elem = ET.SubElement(root, "observation")
             obs_elem.text = observation
         if tool_outputs:
             tools_elem = ET.SubElement(root, "tool_outputs")
-            for tool_output in tool_outputs:
                 tool_elem = ET.SubElement(tools_elem, "tool_output")
-                tool_elem.attrib["type"] = tool_output.get("type", "")
-                tool_elem.text = tool_output.get("content", "")
-        if citations:
-            cites_elem = ET.SubElement(root, "citations")
-            for citation in citations:
-                cite_elem = ET.SubElement(cites_elem, "citation")
-                for key, value in citation.items():
-                    cite_elem.attrib[key] = str(value)
         xml_output = ET.tostring(root, encoding='unicode')
-        return json.dumps(terminal_output), xml_output
-    @staticmethod
-    def _create_tool_element(parent: ET.Element, tool_name: str, tool_data: Dict) -> ET.Element:
-        """Create XML element for specific tool type with appropriate structure"""
-        tool_elem = ET.SubElement(parent, "tool")
-        tool_elem.set("name", tool_name)
-        # Handle different tool types
-        if tool_name == ToolType.CENSUS:
-            ResponseFormatter._format_census_data(tool_elem, tool_data)
-        elif tool_name == ToolType.MERMAID:
-            ResponseFormatter._format_mermaid_data(tool_elem, tool_data)
-        elif tool_name in [ToolType.WISQARS, ToolType.WONDER, ToolType.NCHS]:
-            ResponseFormatter._format_health_data(tool_elem, tool_data)
-        else:
-            # Generic tool output format
-            content_elem = ET.SubElement(tool_elem, "content")
-            content_elem.text = ResponseFormatter._clean_markdown(str(tool_data))
-        return tool_elem
-    @staticmethod
-    def _format_census_data(tool_elem: ET.Element, data: Dict) -> None:
-        """Format census data with specific structure"""
-        try:
-            # Extract census tract data
-            tracts_elem = ET.SubElement(tool_elem, "census_tracts")
-            # Parse the llm_result to extract structured data
-            if "llm_result" in data:
-                result = json.loads(data["llm_result"])
-                for tract_data in result.get("tracts", []):
-                    tract_elem = ET.SubElement(tracts_elem, "tract")
-                    tract_elem.set("id", str(tract_data.get("tract", "")))
-                    # Add tract details
-                    for key, value in tract_data.items():
-                        if key != "tract":
-                            detail_elem = ET.SubElement(tract_elem, key.replace("_", ""))
-                            detail_elem.text = str(value)
-        except:
-            # Fallback to simple format if parsing fails
-            content_elem = ET.SubElement(tool_elem, "content")
-            content_elem.text = ResponseFormatter._clean_markdown(str(data))
-    @staticmethod
-    def _format_mermaid_data(tool_elem: ET.Element, data: Dict) -> None:
-        """Format mermaid diagram data with improved error handling"""
-        try:
-            diagram_elem = ET.SubElement(tool_elem, "diagram")
-            # Extract content from data
-            content = ""
-            if isinstance(data, dict):
-                content = data.get("content", data.get("mermaid_diagram", ""))
-            elif isinstance(data, str):
-                content = data
-            # Clean any remaining markdown/JSON formatting
-            content = re.sub(r'```mermaid\s*|\s*```', '', content)
-            content = re.sub(r'tool response:.*?{', '{', content)
-            content = re.sub(r'}\s*\.$', '}', content)
-            # Set cleaned content
-            diagram_elem.text = content.strip()
-        except Exception as e:
-            logger.error(f"Error formatting mermaid data: {e}")
-            content_elem = ET.SubElement(tool_elem, "content")
-            content_elem.text = "Error formatting diagram"
-    @staticmethod
-    def _format_health_data(tool_elem: ET.Element, data: Dict) -> None:
-        """Format health-related data from WISQARS, WONDER, etc."""
-        try:
-            if isinstance(data, dict):
-                for key, value in data.items():
-                    category_elem = ET.SubElement(tool_elem, key.replace("_", ""))
-                    if isinstance(value, dict):
-                        for sub_key, sub_value in value.items():
-                            sub_elem = ET.SubElement(category_elem, sub_key.replace("_", ""))
-                            sub_elem.text = str(sub_value)
-                    else:
-                        category_elem.text = str(value)
-        except:
-            content_elem = ET.SubElement(tool_elem, "content")
-            content_elem.text = ResponseFormatter._clean_markdown(str(data))
-    @staticmethod
-    def _extract_tool_outputs(observation: str) -> Dict[str, Any]:
-        """Extract and clean tool outputs from observation"""
-        tool_outputs = {}
-        try:
-            if isinstance(observation, str):
-                data = json.loads(observation)
-                for key, value in data.items():
-                    if isinstance(value, str) and "llm_result" in value:
-                        try:
-                            tool_result = json.loads(value)
-                            tool_outputs[key] = tool_result
-                        except:
-                            tool_outputs[key] = value
-        except:
-            pass
-        return tool_outputs
-    @staticmethod
-    def format_message(message: str) -> Tuple[str, str]:
-        """Format agent message for both terminal and XML output"""
-        # Terminal format
-        terminal_output = message.strip()
-        # XML format
         root = ET.Element("agent_response")
         msg_elem = ET.SubElement(root, "message")
-        msg_elem.text = message.strip()
         xml_output = ET.tostring(root, encoding='unicode')
-        return terminal_output, xml_output
-    @staticmethod
-    def format_error(error: str) -> Tuple[str, str]:
         """Format error message for both terminal and XML output"""
         # Terminal format
         terminal_output = f"Error: {error}"
@@ -207,6 +143,11 @@ class ResponseFormatter:
         error_elem.text = error
         xml_output = ET.tostring(root, encoding='unicode')
         return terminal_output, xml_output
     @staticmethod

+from typing import Dict, Optional, Tuple, List, Any, Set
 import re
 import xml.etree.ElementTree as ET
 from datetime import datetime
     ch.setFormatter(formatter)
     logger.addHandler(ch)
+class StreamingFormatter:
+    def __init__(self):
+        self.processed_events = set()
+        self.current_tool_outputs = []
+        self.current_citations = []
+        self.current_metadata = {}
+        self.current_message_id = None
+        self.current_message_buffer = ""
+    def reset(self):
+        """Reset the formatter state"""
+        self.processed_events.clear()
+        self.current_tool_outputs.clear()
+        self.current_citations.clear()
+        self.current_metadata.clear()
+        self.current_message_id = None
+        self.current_message_buffer = ""
+    def append_to_buffer(self, text: str):
+        """Append text to the current message buffer"""
+        self.current_message_buffer += text
+    def get_and_clear_buffer(self) -> str:
+        """Get the current buffer content and clear it"""
+        content = self.current_message_buffer
+        self.current_message_buffer = ""
+        return content
 class ToolType:
     DUCKDUCKGO = "duckduckgo_search"
     REDDIT_NEWS = "reddit_x_gnews_newswire_crunchbase"
     DQS = "dqs_nhis_adult_summary_health_statistics"
 class ResponseFormatter:
+    _instance = None
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super(ResponseFormatter, cls).__new__(cls)
+            cls._instance.streaming_state = StreamingFormatter()
+            cls._instance.logger = logger
+        return cls._instance
     def format_thought(
+        self,
         thought: str,
         observation: str,
+        tool_outputs: List[Dict] = None,
+        event_id: str = None,
+        message_id: str = None
+    ) -> Optional[Tuple[str, str]]:
+        """Format thought and tool outputs as XML"""
         root = ET.Element("agent_response")
+        if thought:
+            thought_elem = ET.SubElement(root, "thought")
+            thought_elem.text = thought
         if observation:
             obs_elem = ET.SubElement(root, "observation")
             obs_elem.text = observation
         if tool_outputs:
             tools_elem = ET.SubElement(root, "tool_outputs")
+            for output in tool_outputs:
                 tool_elem = ET.SubElement(tools_elem, "tool_output")
+                tool_elem.attrib["type"] = output.get("type", "")
+                tool_elem.text = output.get("content", "")
         xml_output = ET.tostring(root, encoding='unicode')
+        return thought, xml_output
+    def format_message(
+        self,
+        message: str,
+        event_id: str = None,
+        message_id: str = None
+    ) -> Optional[Tuple[str, str]]:
+        """Format message as XML for frontend"""
+        if not message:
+            return None
         root = ET.Element("agent_response")
         msg_elem = ET.SubElement(root, "message")
+        msg_elem.text = message
         xml_output = ET.tostring(root, encoding='unicode')
+        return message, xml_output
+    def format_error(
+        self,
+        error: str,
+        event_id: str = None,
+        message_id: str = None
+    ) -> Optional[Tuple[str, str]]:
         """Format error message for both terminal and XML output"""
+        # Skip if already processed
+        if event_id and event_id in self.streaming_state.processed_events:
+            return None
+        # Handle message state
+        if message_id != self.streaming_state.current_message_id:
+            self.streaming_state.reset()
+            self.streaming_state.current_message_id = message_id
+        # Skip empty errors
+        if not error:
+            return None
         # Terminal format
         terminal_output = f"Error: {error}"
         error_elem.text = error
         xml_output = ET.tostring(root, encoding='unicode')
+        # Track processed event
+        if event_id:
+            self.streaming_state.processed_events.add(event_id)
         return terminal_output, xml_output
     @staticmethod