""" Utilities for cleaning and validating Manim code generated by LLMs. """ import re import logging import json logger = logging.getLogger(__name__) def clean_manim_code(raw_code): """ Clean Manim code from LLM responses by removing markdown formatting and ensuring proper structure. Args: raw_code (str): The raw code from the LLM response Returns: str: Cleaned, executable Python code """ # Start with the raw code code = raw_code # Extract code from markdown code blocks if present if "```python" in code: parts = code.split("```python") if len(parts) > 1: code = parts[1] if "```" in code: code = code.split("```")[0] elif "```" in code: parts = code.split("```") if len(parts) > 1: code = parts[1] if "```" in parts[1]: code = code.split("```")[0] # Remove any remaining backticks code = code.replace('```', '') # Ensure code begins with the necessary import if not code.strip().startswith('from manim import'): code = 'from manim import *\n\n' + code # Verify the code contains a Scene class if 'class' not in code or 'Scene' not in code: logger.warning("Generated code does not contain a proper Scene class") # Add a basic scene structure if missing if 'class ManimScene(Scene):' not in code: code = 'from manim import *\n\nclass ManimScene(Scene):\n def construct(self):\n ' + code # Verify the code has a construct method if 'def construct(self)' not in code: logger.warning("Generated code does not contain a construct method") # Try to find where the class is defined and add construct method class_match = re.search(r'class\s+\w+\s*\(\s*Scene\s*\)\s*:', code) if class_match: insert_pos = class_match.end() code = code[:insert_pos] + '\n def construct(self):\n pass\n' + code[insert_pos:] # Ensure there's a wait at the end if not present if 'self.wait(' not in code.split('def construct')[-1]: # Find the end of the construct method to add wait construct_body_match = re.search(r'def\s+construct\s*\(\s*self\s*\)\s*:', code) if construct_body_match: # Check if the method has content method_content = code[construct_body_match.end():] indentation = ' ' # Default indentation # Try to determine indentation from code indent_match = re.search(r'\n(\s+)', method_content) if indent_match: indentation = indent_match.group(1) # Find a good place to insert the wait if '}' in method_content.splitlines()[-1]: # If last line closes something code = code.rstrip() + f'\n{indentation}self.wait(1)\n' else: code = code.rstrip() + f'\n{indentation}self.wait(1)\n' return code.strip() def parse_scenario_from_llm_response(content): """ Extract structured scenario information from an LLM response. Args: content (str): The LLM response text Returns: dict: Extracted scenario dictionary """ try: # Try to find and extract a JSON object json_match = re.search(r'\{.*\}', content, re.DOTALL) if json_match: json_str = json_match.group(0) scenario_dict = json.loads(json_str) return scenario_dict except Exception as e: logger.error(f"Error parsing scenario JSON: {e}") # Manual parsing fallback scenario = { "title": "", "objects": [], "transformations": [], "equations": [] } # Simple pattern matching to extract information title_match = re.search(r'title["\s:]+([^"]+)', content, re.IGNORECASE) if title_match: scenario["title"] = title_match.group(1).strip() # Extract lists with various possible formats objects_pattern = r'objects[":\s\[]+([^\]]+)' objects_match = re.search(objects_pattern, content, re.IGNORECASE | re.DOTALL) if objects_match: objects_text = objects_match.group(1) # Handle both comma-separated and quote-wrapped items objects = re.findall(r'"([^"]+)"', objects_text) if not objects: objects = [item.strip() for item in objects_text.split(',')] scenario["objects"] = objects # Similar extraction for transformations trans_pattern = r'transformations[":\s\[]+([^\]]+)' trans_match = re.search(trans_pattern, content, re.IGNORECASE | re.DOTALL) if trans_match: trans_text = trans_match.group(1) transformations = re.findall(r'"([^"]+)"', trans_text) if not transformations: transformations = [item.strip() for item in trans_text.split(',')] scenario["transformations"] = transformations # Extract equations if present equations_pattern = r'equations[":\s\[]+([^\]]+)' equations_match = re.search(equations_pattern, content, re.IGNORECASE | re.DOTALL) if equations_match: equations_text = equations_match.group(1) if equations_text.lower().strip() in ['null', 'none']: scenario["equations"] = None else: equations = re.findall(r'"([^"]+)"', equations_text) if not equations: equations = [item.strip() for item in equations_text.split(',')] scenario["equations"] = equations return scenario def validate_manim_code(code): """ Perform basic validation on Manim code to catch common issues. Args: code (str): The Manim code to validate Returns: tuple: (is_valid, error_message) """ # Check for basic Python syntax errors try: compile(code, '', 'exec') except SyntaxError as e: return False, f"Syntax error: {str(e)}" # Check for necessary components if 'from manim import' not in code: return False, "Missing Manim import" if 'class' not in code or 'Scene' not in code: return False, "No Scene class defined" if 'def construct(self)' not in code: return False, "No construct method defined" # Check for common Manim issues if 'self.play(' not in code and 'self.add(' not in code: return False, "No objects added to scene (missing self.play or self.add calls)" # All checks passed return True, "Code appears valid"