Spaces:

Kamocodes
/

math

Runtime error

File size: 16,997 Bytes

import os
import torch
import json
import time
import logging
from datetime import datetime
from threading import Thread
from queue import Queue
from transformers import AutoTokenizer, AutoModelForCausalLM

# Configuration
PRIMARY_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # First model to try
SECONDARY_MODEL = "facebook/opt-1.3b"  # More powerful backup model
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 5  # Process 5 chapters at a time
MAX_RETRIES = 3
OUTPUT_DIR = "calculus_textbook_output"
LOG_FILE = "textbook_generation.log"

# Setup logging
os.makedirs(OUTPUT_DIR, exist_ok=True)
logging.basicConfig(
    filename=os.path.join(OUTPUT_DIR, LOG_FILE),
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

class ModelManager:
    """Manages loading and switching between language models for text generation."""
    
    def __init__(self):
        self.models = {}
        self.tokenizers = {}
        self.current_model = None
        
    def load_model(self, model_name):
        """Load a model and its tokenizer if not already loaded."""
        if model_name not in self.models:
            try:
                logging.info(f"Loading model: {model_name}")
                tokenizer = AutoTokenizer.from_pretrained(model_name)
                model = AutoModelForCausalLM.from_pretrained(
                    model_name,
                    torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
                    device_map="auto" if DEVICE == "cuda" else None
                )
                model.eval()
                
                self.models[model_name] = model
                self.tokenizers[model_name] = tokenizer
                logging.info(f"Successfully loaded model: {model_name}")
                return True
            except Exception as e:
                logging.error(f"Failed to load model {model_name}: {str(e)}")
                return False
        return True
    
    def set_current_model(self, model_name):
        """Set the current model to use for generation."""
        if model_name not in self.models and not self.load_model(model_name):
            return False
        self.current_model = model_name
        return True
    
    def generate_text(self, prompt, max_length=1024):
        """Generate text using the current model."""
        if not self.current_model:
            raise ValueError("No model selected. Call set_current_model first.")
        
        model = self.models[self.current_model]
        tokenizer = self.tokenizers[self.current_model]
        
        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
        
        # Generate with some randomness for creativity
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_length=max_length,
                temperature=0.7,
                top_p=0.9,
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id
            )
            
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        # Extract only the generated part
        generated_text = response[len(tokenizer.decode(inputs['input_ids'][0], skip_special_tokens=True)):].strip()
        return generated_text

class CalculusTextbookGenerator:
    """Generates a complete calculus textbook with questions and solutions."""
    
    def __init__(self):
        self.model_manager = ModelManager()
        self.textbook_data = self.create_initial_textbook_structure()
        
    def create_initial_textbook_structure(self):
        """Create the initial structure of the calculus textbook."""
        return {
            "books": [
                {
                    "name": "Calculus 1: Early Transcendentals",
                    "details": "Introduction to single-variable calculus including limits, derivatives, and basic integration techniques.",
                    "chapters": [
                        {
                            "chapterTitle": "Chapter 6: Applications of Integration",
                            "subChapters": [
                                "6.1: Areas Between Curves", 
                                "6.2: Volumes", 
                                "6.3: Volumes by Cylindrical Shells",
                                "6.4: Work",
                                "6.5: Average Value of a Function"
                            ],
                            "questions": []  # Will be filled with generated questions
                        },
                        {
                            "chapterTitle": "Chapter 8: Further Applications of Integration",
                            "subChapters": [
                                "8.1: Arc Length",
                                "8.2: Area of a Surface of Revolution",
                                "8.3: Applications to Physics and Engineering",
                                "8.4: Applications to Economics and Biology",
                                "8.5: Probability"
                            ],
                            "questions": []
                        },
                        {
                            "chapterTitle": "Chapter 9: Differential Equations",
                            "subChapters": [
                                "9.1: Modeling with Differential Equations",
                                "9.2: Direction Fields and Euler's Method",
                                "9.3: Separable Equations",
                                "9.4: Models for Population Growth",
                                "9.5: Linear Equations",
                                "9.6: Predator–Prey Systems"
                            ],
                            "questions": []
                        },
                        {
                            "chapterTitle": "Chapter 10: Parametric Equations and Polar Coordinates",
                            "subChapters": [
                                "10.1: Curves Defined by Parametric Equations",
                                "10.2: Calculus with Parametric Curves",
                                "10.3: Polar Coordinates",
                                "10.4: Calculus in Polar Coordinates",
                                "10.5: Conic Sections",
                                "10.6: Conic Sections in Polar Coordinates"
                            ],
                            "questions": []
                        },
                        {
                            "chapterTitle": "Chapter 11: Sequences, Series, and Power Series",
                            "subChapters": [
                                "11.1: Sequences",
                                "11.2: Series",
                                "11.3: The Integral Test and Estimates of Sums",
                                "11.4: The Comparison Tests",
                                "11.5: Alternating Series and Absolute Convergence",
                                "11.6: The Ratio and Root Tests",
                                "11.7: Power Series"
                            ],
                            "questions": []
                        }
                    ]
                },
                {
                    "name": "Calculus 2: Advanced Concepts",
                    "details": "Advances into series, sequences, techniques of integration, and vector calculus.",
                    "chapters": [
                        {
                            "chapterTitle": "Chapter 12: Vectors and the Geometry of Space",
                            "subChapters": [
                                "12.1: Three-Dimensional Coordinate Systems",
                                "12.2: Vectors",
                                "12.3: The Dot Product",
                                "12.4: The Cross Product",
                                "12.5: Equations of Lines and Planes",
                                "12.6: Cylinders and Quadric Surfaces"
                            ],
                            "questions": []
                        },
                        {
                            "chapterTitle": "Chapter 13: Vector Functions",
                            "subChapters": [
                                "13.1: Vector Functions and Space Curves",
                                "13.2: Derivatives and Integrals of Vector Functions",
                                "13.3: Arc Length and Curvature",
                                "13.4: Motion in Space: Velocity and Acceleration"
                            ],
                            "questions": []
                        },
                        {
                            "chapterTitle": "Chapter 14: Partial Derivatives",
                            "subChapters": [
                                "14.1: Functions of Several Variables",
                                "14.2: Limits and Continuity",
                                "14.3: Partial Derivatives",
                                "14.4: Tangent Planes and Linear Approximation",
                                "14.5: The Chain Rule"
                            ],
                            "questions": []
                        }
                    ]
                }
            ]
        }
        
    def generate_question_set(self, chapter_title, subchapter_titles, num_questions=3):
        """Generate a set of questions with step-by-step solutions for a chapter."""
        
        # Try the primary model first
        self.model_manager.set_current_model(PRIMARY_MODEL)
        
        prompt = f"""Create {num_questions} calculus questions with detailed step-by-step solutions for:
{chapter_title}

The questions should cover these subchapters:
{', '.join(subchapter_titles)}

For each question:
1. Write a clear, university-level calculus problem
2. Provide a comprehensive step-by-step solution with all math steps shown
3. Include a final answer

Format each question as:
QUESTION: [Problem statement]
SOLUTION:
Step 1: [First step with explanation]
Step 2: [Next step]
...
Final Answer: [The solution]

Make sure to use proper mathematical notation and include a variety of question types.
"""

        try:
            generated_content = self.model_manager.generate_text(prompt, max_length=2048)
            
            # Check if the content looks good
            if len(generated_content) < 200 or "QUESTION" not in generated_content:
                # Try the secondary model if the primary one gave poor results
                logging.warning(f"Primary model gave insufficient results for {chapter_title}. Trying secondary model.")
                self.model_manager.set_current_model(SECONDARY_MODEL)
                generated_content = self.model_manager.generate_text(prompt, max_length=2048)
            
            # Parse the generated content into question objects
            questions = self.parse_questions(generated_content)
            
            if not questions or len(questions) == 0:
                logging.warning(f"Failed to parse any questions from content for {chapter_title}")
                return []
                
            return questions
            
        except Exception as e:
            logging.error(f"Error generating questions for {chapter_title}: {str(e)}")
            return []
            
    def parse_questions(self, content):
        """Parse the generated content into structured question objects."""
        questions = []
        
        # Split by "QUESTION:" or similar markers
        parts = content.split("QUESTION:")
        
        for i, part in enumerate(parts):
            if i == 0:
                continue  # Skip the first part (before the first QUESTION:)
                
            try:
                # Split into question and solution
                if "SOLUTION:" in part:
                    question_text, solution = part.split("SOLUTION:", 1)
                else:
                    # Try alternative formats
                    for marker in ["Solution:", "STEPS:", "Steps:"]:
                        if marker in part:
                            question_text, solution = part.split(marker, 1)
                            break
                    else:
                        question_text = part
                        solution = ""
                
                questions.append({
                    "question": question_text.strip(),
                    "solution": solution.strip()
                })
            except Exception as e:
                logging.error(f"Error parsing question {i}: {str(e)}")
                continue
                
        return questions
    
    def worker_function(self, queue, results):
        """Worker thread function to process chapters from queue."""
        while True:
            item = queue.get()
            if item is None:  # None signals to exit
                queue.task_done()
                break
            
            book_idx, chapter_idx, chapter = item
            chapter_title = chapter["chapterTitle"]
            subchapters = chapter.get("subChapters", [])
            
            logging.info(f"Processing: {chapter_title}")
            
            # Try to generate questions with retries
            for attempt in range(MAX_RETRIES):
                try:
                    questions = self.generate_question_set(chapter_title, subchapters, num_questions=4)
                    if questions:
                        # Save the questions to the chapter
                        self.textbook_data["books"][book_idx]["chapters"][chapter_idx]["questions"] = questions
                        
                        logging.info(f"✓ Generated {len(questions)} questions for {chapter_title}")
                        break  # Success, exit retry loop
                    else:
                        logging.warning(f"No questions generated for {chapter_title} on attempt {attempt+1}")
                        
                except Exception as e:
                    logging.error(f"Attempt {attempt+1}/{MAX_RETRIES} failed for {chapter_title}: {str(e)}")
                    time.sleep(2)  # Wait before retrying
            
            # Save current state to file
            self.save_current_state()
            queue.task_done()
    
    def save_current_state(self):
        """Save the current state of the textbook generation."""
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        with open(os.path.join(OUTPUT_DIR, f"textbook_state_{timestamp}.json"), "w") as f:
            json.dump(self.textbook_data, f, indent=2)
        
        # Also save to a fixed filename for the latest state
        with open(os.path.join(OUTPUT_DIR, "textbook_latest.json"), "w") as f:
            json.dump(self.textbook_data, f, indent=2)
    
    def process_in_batches(self):
        """Process all chapters in batches."""
        queue = Queue()
        
        # Queue all chapters for processing
        for book_idx, book in enumerate(self.textbook_data["books"]):
            for chapter_idx, chapter in enumerate(book["chapters"]):
                queue.put((book_idx, chapter_idx, chapter))
        
        # Create and start worker thread
        worker = Thread(target=self.worker_function, args=(queue, None))
        worker.daemon = True  # Allow the program to exit even if the thread is running
        worker.start()
        
        # Process in batches
        total_chapters = queue.qsize()
        processed = 0
        
        while processed < total_chapters:
            # Wait for the batch to be processed
            start_size = queue.qsize()
            batch_size = min(BATCH_SIZE, start_size)
            
            logging.info(f"Processing batch of {batch_size} chapters. {start_size} remaining.")
            
            # Wait until this batch is done
            while queue.qsize() > start_size - batch_size:
                time.sleep(2)
            
            processed += batch_size
            logging.info(f"Batch complete. {processed}/{total_chapters} chapters processed.")
            
            # Save current state
            self.save_current_state()
        
        # Signal worker to exit
        queue.put(None)
        worker.join()
        
        # Save final state
        self.save_current_state()
        logging.info("All chapters processed. Textbook generation complete.")

def main():
    start_time = datetime.now()
    logging.info(f"Starting textbook generation at {start_time}")
    
    generator = CalculusTextbookGenerator()
    generator.process_in_batches()
    
    end_time = datetime.now()
    duration = end_time - start_time
    logging.info(f"Textbook generation completed in {duration}")
    logging.info(f"Final textbook saved to {os.path.join(OUTPUT_DIR, 'textbook_latest.json')}")

if __name__ == "__main__":
    main()