import os import torch import json import time import logging from datetime import datetime from threading import Thread from queue import Queue from transformers import AutoTokenizer, AutoModelForCausalLM # Configuration PRIMARY_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # First model to try SECONDARY_MODEL = "facebook/opt-1.3b" # More powerful backup model DEVICE = "cuda" if torch.cuda.is_available() else "cpu" BATCH_SIZE = 5 # Process 5 chapters at a time MAX_RETRIES = 3 OUTPUT_DIR = "calculus_textbook_output" LOG_FILE = "textbook_generation.log" # Setup logging os.makedirs(OUTPUT_DIR, exist_ok=True) logging.basicConfig( filename=os.path.join(OUTPUT_DIR, LOG_FILE), level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) class ModelManager: """Manages loading and switching between language models for text generation.""" def __init__(self): self.models = {} self.tokenizers = {} self.current_model = None def load_model(self, model_name): """Load a model and its tokenizer if not already loaded.""" if model_name not in self.models: try: logging.info(f"Loading model: {model_name}") tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32, device_map="auto" if DEVICE == "cuda" else None ) model.eval() self.models[model_name] = model self.tokenizers[model_name] = tokenizer logging.info(f"Successfully loaded model: {model_name}") return True except Exception as e: logging.error(f"Failed to load model {model_name}: {str(e)}") return False return True def set_current_model(self, model_name): """Set the current model to use for generation.""" if model_name not in self.models and not self.load_model(model_name): return False self.current_model = model_name return True def generate_text(self, prompt, max_length=1024): """Generate text using the current model.""" if not self.current_model: raise ValueError("No model selected. Call set_current_model first.") model = self.models[self.current_model] tokenizer = self.tokenizers[self.current_model] inputs = tokenizer(prompt, return_tensors="pt").to(model.device) # Generate with some randomness for creativity with torch.no_grad(): outputs = model.generate( **inputs, max_length=max_length, temperature=0.7, top_p=0.9, do_sample=True, pad_token_id=tokenizer.eos_token_id ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) # Extract only the generated part generated_text = response[len(tokenizer.decode(inputs['input_ids'][0], skip_special_tokens=True)):].strip() return generated_text class CalculusTextbookGenerator: """Generates a complete calculus textbook with questions and solutions.""" def __init__(self): self.model_manager = ModelManager() self.textbook_data = self.create_initial_textbook_structure() def create_initial_textbook_structure(self): """Create the initial structure of the calculus textbook.""" return { "books": [ { "name": "Calculus 1: Early Transcendentals", "details": "Introduction to single-variable calculus including limits, derivatives, and basic integration techniques.", "chapters": [ { "chapterTitle": "Chapter 6: Applications of Integration", "subChapters": [ "6.1: Areas Between Curves", "6.2: Volumes", "6.3: Volumes by Cylindrical Shells", "6.4: Work", "6.5: Average Value of a Function" ], "questions": [] # Will be filled with generated questions }, { "chapterTitle": "Chapter 8: Further Applications of Integration", "subChapters": [ "8.1: Arc Length", "8.2: Area of a Surface of Revolution", "8.3: Applications to Physics and Engineering", "8.4: Applications to Economics and Biology", "8.5: Probability" ], "questions": [] }, { "chapterTitle": "Chapter 9: Differential Equations", "subChapters": [ "9.1: Modeling with Differential Equations", "9.2: Direction Fields and Euler's Method", "9.3: Separable Equations", "9.4: Models for Population Growth", "9.5: Linear Equations", "9.6: Predator–Prey Systems" ], "questions": [] }, { "chapterTitle": "Chapter 10: Parametric Equations and Polar Coordinates", "subChapters": [ "10.1: Curves Defined by Parametric Equations", "10.2: Calculus with Parametric Curves", "10.3: Polar Coordinates", "10.4: Calculus in Polar Coordinates", "10.5: Conic Sections", "10.6: Conic Sections in Polar Coordinates" ], "questions": [] }, { "chapterTitle": "Chapter 11: Sequences, Series, and Power Series", "subChapters": [ "11.1: Sequences", "11.2: Series", "11.3: The Integral Test and Estimates of Sums", "11.4: The Comparison Tests", "11.5: Alternating Series and Absolute Convergence", "11.6: The Ratio and Root Tests", "11.7: Power Series" ], "questions": [] } ] }, { "name": "Calculus 2: Advanced Concepts", "details": "Advances into series, sequences, techniques of integration, and vector calculus.", "chapters": [ { "chapterTitle": "Chapter 12: Vectors and the Geometry of Space", "subChapters": [ "12.1: Three-Dimensional Coordinate Systems", "12.2: Vectors", "12.3: The Dot Product", "12.4: The Cross Product", "12.5: Equations of Lines and Planes", "12.6: Cylinders and Quadric Surfaces" ], "questions": [] }, { "chapterTitle": "Chapter 13: Vector Functions", "subChapters": [ "13.1: Vector Functions and Space Curves", "13.2: Derivatives and Integrals of Vector Functions", "13.3: Arc Length and Curvature", "13.4: Motion in Space: Velocity and Acceleration" ], "questions": [] }, { "chapterTitle": "Chapter 14: Partial Derivatives", "subChapters": [ "14.1: Functions of Several Variables", "14.2: Limits and Continuity", "14.3: Partial Derivatives", "14.4: Tangent Planes and Linear Approximation", "14.5: The Chain Rule" ], "questions": [] } ] } ] } def generate_question_set(self, chapter_title, subchapter_titles, num_questions=3): """Generate a set of questions with step-by-step solutions for a chapter.""" # Try the primary model first self.model_manager.set_current_model(PRIMARY_MODEL) prompt = f"""Create {num_questions} calculus questions with detailed step-by-step solutions for: {chapter_title} The questions should cover these subchapters: {', '.join(subchapter_titles)} For each question: 1. Write a clear, university-level calculus problem 2. Provide a comprehensive step-by-step solution with all math steps shown 3. Include a final answer Format each question as: QUESTION: [Problem statement] SOLUTION: Step 1: [First step with explanation] Step 2: [Next step] ... Final Answer: [The solution] Make sure to use proper mathematical notation and include a variety of question types. """ try: generated_content = self.model_manager.generate_text(prompt, max_length=2048) # Check if the content looks good if len(generated_content) < 200 or "QUESTION" not in generated_content: # Try the secondary model if the primary one gave poor results logging.warning(f"Primary model gave insufficient results for {chapter_title}. Trying secondary model.") self.model_manager.set_current_model(SECONDARY_MODEL) generated_content = self.model_manager.generate_text(prompt, max_length=2048) # Parse the generated content into question objects questions = self.parse_questions(generated_content) if not questions or len(questions) == 0: logging.warning(f"Failed to parse any questions from content for {chapter_title}") return [] return questions except Exception as e: logging.error(f"Error generating questions for {chapter_title}: {str(e)}") return [] def parse_questions(self, content): """Parse the generated content into structured question objects.""" questions = [] # Split by "QUESTION:" or similar markers parts = content.split("QUESTION:") for i, part in enumerate(parts): if i == 0: continue # Skip the first part (before the first QUESTION:) try: # Split into question and solution if "SOLUTION:" in part: question_text, solution = part.split("SOLUTION:", 1) else: # Try alternative formats for marker in ["Solution:", "STEPS:", "Steps:"]: if marker in part: question_text, solution = part.split(marker, 1) break else: question_text = part solution = "" questions.append({ "question": question_text.strip(), "solution": solution.strip() }) except Exception as e: logging.error(f"Error parsing question {i}: {str(e)}") continue return questions def worker_function(self, queue, results): """Worker thread function to process chapters from queue.""" while True: item = queue.get() if item is None: # None signals to exit queue.task_done() break book_idx, chapter_idx, chapter = item chapter_title = chapter["chapterTitle"] subchapters = chapter.get("subChapters", []) logging.info(f"Processing: {chapter_title}") # Try to generate questions with retries for attempt in range(MAX_RETRIES): try: questions = self.generate_question_set(chapter_title, subchapters, num_questions=4) if questions: # Save the questions to the chapter self.textbook_data["books"][book_idx]["chapters"][chapter_idx]["questions"] = questions logging.info(f"✓ Generated {len(questions)} questions for {chapter_title}") break # Success, exit retry loop else: logging.warning(f"No questions generated for {chapter_title} on attempt {attempt+1}") except Exception as e: logging.error(f"Attempt {attempt+1}/{MAX_RETRIES} failed for {chapter_title}: {str(e)}") time.sleep(2) # Wait before retrying # Save current state to file self.save_current_state() queue.task_done() def save_current_state(self): """Save the current state of the textbook generation.""" timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") with open(os.path.join(OUTPUT_DIR, f"textbook_state_{timestamp}.json"), "w") as f: json.dump(self.textbook_data, f, indent=2) # Also save to a fixed filename for the latest state with open(os.path.join(OUTPUT_DIR, "textbook_latest.json"), "w") as f: json.dump(self.textbook_data, f, indent=2) def process_in_batches(self): """Process all chapters in batches.""" queue = Queue() # Queue all chapters for processing for book_idx, book in enumerate(self.textbook_data["books"]): for chapter_idx, chapter in enumerate(book["chapters"]): queue.put((book_idx, chapter_idx, chapter)) # Create and start worker thread worker = Thread(target=self.worker_function, args=(queue, None)) worker.daemon = True # Allow the program to exit even if the thread is running worker.start() # Process in batches total_chapters = queue.qsize() processed = 0 while processed < total_chapters: # Wait for the batch to be processed start_size = queue.qsize() batch_size = min(BATCH_SIZE, start_size) logging.info(f"Processing batch of {batch_size} chapters. {start_size} remaining.") # Wait until this batch is done while queue.qsize() > start_size - batch_size: time.sleep(2) processed += batch_size logging.info(f"Batch complete. {processed}/{total_chapters} chapters processed.") # Save current state self.save_current_state() # Signal worker to exit queue.put(None) worker.join() # Save final state self.save_current_state() logging.info("All chapters processed. Textbook generation complete.") def main(): start_time = datetime.now() logging.info(f"Starting textbook generation at {start_time}") generator = CalculusTextbookGenerator() generator.process_in_batches() end_time = datetime.now() duration = end_time - start_time logging.info(f"Textbook generation completed in {duration}") logging.info(f"Final textbook saved to {os.path.join(OUTPUT_DIR, 'textbook_latest.json')}") if __name__ == "__main__": main()