"""
Utilities for cleaning and validating Manim code generated by LLMs.
"""

import re
import logging
import json

logger = logging.getLogger(__name__)

def clean_manim_code(raw_code):
    """
    Clean Manim code from LLM responses by removing markdown formatting
    and ensuring proper structure.
    
    Args:
        raw_code (str): The raw code from the LLM response
        
    Returns:
        str: Cleaned, executable Python code
    """
    # Start with the raw code
    code = raw_code
    
    # Extract code from markdown code blocks if present
    if "```python" in code:
        parts = code.split("```python")
        if len(parts) > 1:
            code = parts[1]
            if "```" in code:
                code = code.split("```")[0]
    elif "```" in code:
        parts = code.split("```")
        if len(parts) > 1:
            code = parts[1]
            if "```" in parts[1]:
                code = code.split("```")[0]
    
    # Remove any remaining backticks
    code = code.replace('```', '')
    
    # Ensure code begins with the necessary import
    if not code.strip().startswith('from manim import'):
        code = 'from manim import *\n\n' + code
    
    # Verify the code contains a Scene class
    if 'class' not in code or 'Scene' not in code:
        logger.warning("Generated code does not contain a proper Scene class")
        # Add a basic scene structure if missing
        if 'class ManimScene(Scene):' not in code:
            code = 'from manim import *\n\nclass ManimScene(Scene):\n    def construct(self):\n        ' + code
    
    # Verify the code has a construct method
    if 'def construct(self)' not in code:
        logger.warning("Generated code does not contain a construct method")
        # Try to find where the class is defined and add construct method
        class_match = re.search(r'class\s+\w+\s*\(\s*Scene\s*\)\s*:', code)
        if class_match:
            insert_pos = class_match.end()
            code = code[:insert_pos] + '\n    def construct(self):\n        pass\n' + code[insert_pos:]
    
    # Ensure there's a wait at the end if not present
    if 'self.wait(' not in code.split('def construct')[-1]:
        # Find the end of the construct method to add wait
        construct_body_match = re.search(r'def\s+construct\s*\(\s*self\s*\)\s*:', code)
        if construct_body_match:
            # Check if the method has content
            method_content = code[construct_body_match.end():]
            indentation = '        '  # Default indentation
            
            # Try to determine indentation from code
            indent_match = re.search(r'\n(\s+)', method_content)
            if indent_match:
                indentation = indent_match.group(1)
            
            # Find a good place to insert the wait
            if '}' in method_content.splitlines()[-1]:  # If last line closes something
                code = code.rstrip() + f'\n{indentation}self.wait(1)\n'
            else:
                code = code.rstrip() + f'\n{indentation}self.wait(1)\n'
    
    return code.strip()

def parse_scenario_from_llm_response(content):
    """
    Extract structured scenario information from an LLM response.
    
    Args:
        content (str): The LLM response text
        
    Returns:
        dict: Extracted scenario dictionary
    """
    try:
        # Try to find and extract a JSON object
        json_match = re.search(r'\{.*\}', content, re.DOTALL)
        if json_match:
            json_str = json_match.group(0)
            scenario_dict = json.loads(json_str)
            return scenario_dict
    except Exception as e:
        logger.error(f"Error parsing scenario JSON: {e}")
    
    # Manual parsing fallback
    scenario = {
        "title": "",
        "objects": [],
        "transformations": [],
        "equations": []
    }
    
    # Simple pattern matching to extract information
    title_match = re.search(r'title["\s:]+([^"]+)', content, re.IGNORECASE)
    if title_match:
        scenario["title"] = title_match.group(1).strip()
    
    # Extract lists with various possible formats
    objects_pattern = r'objects[":\s\[]+([^\]]+)'
    objects_match = re.search(objects_pattern, content, re.IGNORECASE | re.DOTALL)
    if objects_match:
        objects_text = objects_match.group(1)
        # Handle both comma-separated and quote-wrapped items
        objects = re.findall(r'"([^"]+)"', objects_text)
        if not objects:
            objects = [item.strip() for item in objects_text.split(',')]
        scenario["objects"] = objects
    
    # Similar extraction for transformations
    trans_pattern = r'transformations[":\s\[]+([^\]]+)'
    trans_match = re.search(trans_pattern, content, re.IGNORECASE | re.DOTALL)
    if trans_match:
        trans_text = trans_match.group(1)
        transformations = re.findall(r'"([^"]+)"', trans_text)
        if not transformations:
            transformations = [item.strip() for item in trans_text.split(',')]
        scenario["transformations"] = transformations
    
    # Extract equations if present
    equations_pattern = r'equations[":\s\[]+([^\]]+)'
    equations_match = re.search(equations_pattern, content, re.IGNORECASE | re.DOTALL)
    if equations_match:
        equations_text = equations_match.group(1)
        if equations_text.lower().strip() in ['null', 'none']:
            scenario["equations"] = None
        else:
            equations = re.findall(r'"([^"]+)"', equations_text)
            if not equations:
                equations = [item.strip() for item in equations_text.split(',')]
            scenario["equations"] = equations
    
    return scenario

def validate_manim_code(code):
    """
    Perform basic validation on Manim code to catch common issues.
    
    Args:
        code (str): The Manim code to validate
        
    Returns:
        tuple: (is_valid, error_message)
    """
    # Check for basic Python syntax errors
    try:
        compile(code, '<string>', 'exec')
    except SyntaxError as e:
        return False, f"Syntax error: {str(e)}"
    
    # Check for necessary components
    if 'from manim import' not in code:
        return False, "Missing Manim import"
    
    if 'class' not in code or 'Scene' not in code:
        return False, "No Scene class defined"
    
    if 'def construct(self)' not in code:
        return False, "No construct method defined"
    
    # Check for common Manim issues
    if 'self.play(' not in code and 'self.add(' not in code:
        return False, "No objects added to scene (missing self.play or self.add calls)"
    
    # All checks passed
    return True, "Code appears valid"