text2manim / code_cleaner.py
thanhkt's picture
Upload 13 files
1645305 verified
"""
Utilities for cleaning and validating Manim code generated by LLMs.
"""
import re
import logging
import json
logger = logging.getLogger(__name__)
def clean_manim_code(raw_code):
"""
Clean Manim code from LLM responses by removing markdown formatting
and ensuring proper structure.
Args:
raw_code (str): The raw code from the LLM response
Returns:
str: Cleaned, executable Python code
"""
# Start with the raw code
code = raw_code
# Extract code from markdown code blocks if present
if "```python" in code:
parts = code.split("```python")
if len(parts) > 1:
code = parts[1]
if "```" in code:
code = code.split("```")[0]
elif "```" in code:
parts = code.split("```")
if len(parts) > 1:
code = parts[1]
if "```" in parts[1]:
code = code.split("```")[0]
# Remove any remaining backticks
code = code.replace('```', '')
# Ensure code begins with the necessary import
if not code.strip().startswith('from manim import'):
code = 'from manim import *\n\n' + code
# Verify the code contains a Scene class
if 'class' not in code or 'Scene' not in code:
logger.warning("Generated code does not contain a proper Scene class")
# Add a basic scene structure if missing
if 'class ManimScene(Scene):' not in code:
code = 'from manim import *\n\nclass ManimScene(Scene):\n def construct(self):\n ' + code
# Verify the code has a construct method
if 'def construct(self)' not in code:
logger.warning("Generated code does not contain a construct method")
# Try to find where the class is defined and add construct method
class_match = re.search(r'class\s+\w+\s*\(\s*Scene\s*\)\s*:', code)
if class_match:
insert_pos = class_match.end()
code = code[:insert_pos] + '\n def construct(self):\n pass\n' + code[insert_pos:]
# Ensure there's a wait at the end if not present
if 'self.wait(' not in code.split('def construct')[-1]:
# Find the end of the construct method to add wait
construct_body_match = re.search(r'def\s+construct\s*\(\s*self\s*\)\s*:', code)
if construct_body_match:
# Check if the method has content
method_content = code[construct_body_match.end():]
indentation = ' ' # Default indentation
# Try to determine indentation from code
indent_match = re.search(r'\n(\s+)', method_content)
if indent_match:
indentation = indent_match.group(1)
# Find a good place to insert the wait
if '}' in method_content.splitlines()[-1]: # If last line closes something
code = code.rstrip() + f'\n{indentation}self.wait(1)\n'
else:
code = code.rstrip() + f'\n{indentation}self.wait(1)\n'
return code.strip()
def parse_scenario_from_llm_response(content):
"""
Extract structured scenario information from an LLM response.
Args:
content (str): The LLM response text
Returns:
dict: Extracted scenario dictionary
"""
try:
# Try to find and extract a JSON object
json_match = re.search(r'\{.*\}', content, re.DOTALL)
if json_match:
json_str = json_match.group(0)
scenario_dict = json.loads(json_str)
return scenario_dict
except Exception as e:
logger.error(f"Error parsing scenario JSON: {e}")
# Manual parsing fallback
scenario = {
"title": "",
"objects": [],
"transformations": [],
"equations": []
}
# Simple pattern matching to extract information
title_match = re.search(r'title["\s:]+([^"]+)', content, re.IGNORECASE)
if title_match:
scenario["title"] = title_match.group(1).strip()
# Extract lists with various possible formats
objects_pattern = r'objects[":\s\[]+([^\]]+)'
objects_match = re.search(objects_pattern, content, re.IGNORECASE | re.DOTALL)
if objects_match:
objects_text = objects_match.group(1)
# Handle both comma-separated and quote-wrapped items
objects = re.findall(r'"([^"]+)"', objects_text)
if not objects:
objects = [item.strip() for item in objects_text.split(',')]
scenario["objects"] = objects
# Similar extraction for transformations
trans_pattern = r'transformations[":\s\[]+([^\]]+)'
trans_match = re.search(trans_pattern, content, re.IGNORECASE | re.DOTALL)
if trans_match:
trans_text = trans_match.group(1)
transformations = re.findall(r'"([^"]+)"', trans_text)
if not transformations:
transformations = [item.strip() for item in trans_text.split(',')]
scenario["transformations"] = transformations
# Extract equations if present
equations_pattern = r'equations[":\s\[]+([^\]]+)'
equations_match = re.search(equations_pattern, content, re.IGNORECASE | re.DOTALL)
if equations_match:
equations_text = equations_match.group(1)
if equations_text.lower().strip() in ['null', 'none']:
scenario["equations"] = None
else:
equations = re.findall(r'"([^"]+)"', equations_text)
if not equations:
equations = [item.strip() for item in equations_text.split(',')]
scenario["equations"] = equations
return scenario
def validate_manim_code(code):
"""
Perform basic validation on Manim code to catch common issues.
Args:
code (str): The Manim code to validate
Returns:
tuple: (is_valid, error_message)
"""
# Check for basic Python syntax errors
try:
compile(code, '<string>', 'exec')
except SyntaxError as e:
return False, f"Syntax error: {str(e)}"
# Check for necessary components
if 'from manim import' not in code:
return False, "Missing Manim import"
if 'class' not in code or 'Scene' not in code:
return False, "No Scene class defined"
if 'def construct(self)' not in code:
return False, "No construct method defined"
# Check for common Manim issues
if 'self.play(' not in code and 'self.add(' not in code:
return False, "No objects added to scene (missing self.play or self.add calls)"
# All checks passed
return True, "Code appears valid"