File size: 16,997 Bytes
e27b2a0
 
ee724af
 
e27b2a0
 
 
 
 
ee724af
e27b2a0
 
 
 
 
 
 
 
ee724af
e27b2a0
 
 
 
 
 
 
ee724af
e27b2a0
 
ee724af
e27b2a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee724af
e27b2a0
 
 
 
 
 
ee724af
e27b2a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee724af
e27b2a0
 
ee724af
e27b2a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee724af
e27b2a0
 
ee724af
 
e27b2a0
 
 
ee724af
e27b2a0
 
 
 
 
ee724af
e27b2a0
ee724af
e27b2a0
ee724af
 
e27b2a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee724af
e27b2a0
 
ee724af
e27b2a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee724af
e27b2a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee724af
e27b2a0
 
 
 
 
 
 
 
 
ee724af
e27b2a0
 
 
ee724af
e27b2a0
 
 
 
ee724af
e27b2a0
 
 
 
ee724af
e27b2a0
 
 
ee724af
e27b2a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee724af
e27b2a0
 
 
ee724af
e27b2a0
 
 
ee724af
e27b2a0
 
 
ee724af
e27b2a0
 
ee724af
e27b2a0
 
 
 
ee724af
e27b2a0
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
import os
import torch
import json
import time
import logging
from datetime import datetime
from threading import Thread
from queue import Queue
from transformers import AutoTokenizer, AutoModelForCausalLM

# Configuration
PRIMARY_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # First model to try
SECONDARY_MODEL = "facebook/opt-1.3b"  # More powerful backup model
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 5  # Process 5 chapters at a time
MAX_RETRIES = 3
OUTPUT_DIR = "calculus_textbook_output"
LOG_FILE = "textbook_generation.log"

# Setup logging
os.makedirs(OUTPUT_DIR, exist_ok=True)
logging.basicConfig(
    filename=os.path.join(OUTPUT_DIR, LOG_FILE),
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

class ModelManager:
    """Manages loading and switching between language models for text generation."""
    
    def __init__(self):
        self.models = {}
        self.tokenizers = {}
        self.current_model = None
        
    def load_model(self, model_name):
        """Load a model and its tokenizer if not already loaded."""
        if model_name not in self.models:
            try:
                logging.info(f"Loading model: {model_name}")
                tokenizer = AutoTokenizer.from_pretrained(model_name)
                model = AutoModelForCausalLM.from_pretrained(
                    model_name,
                    torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
                    device_map="auto" if DEVICE == "cuda" else None
                )
                model.eval()
                
                self.models[model_name] = model
                self.tokenizers[model_name] = tokenizer
                logging.info(f"Successfully loaded model: {model_name}")
                return True
            except Exception as e:
                logging.error(f"Failed to load model {model_name}: {str(e)}")
                return False
        return True
    
    def set_current_model(self, model_name):
        """Set the current model to use for generation."""
        if model_name not in self.models and not self.load_model(model_name):
            return False
        self.current_model = model_name
        return True
    
    def generate_text(self, prompt, max_length=1024):
        """Generate text using the current model."""
        if not self.current_model:
            raise ValueError("No model selected. Call set_current_model first.")
        
        model = self.models[self.current_model]
        tokenizer = self.tokenizers[self.current_model]
        
        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
        
        # Generate with some randomness for creativity
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_length=max_length,
                temperature=0.7,
                top_p=0.9,
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id
            )
            
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        # Extract only the generated part
        generated_text = response[len(tokenizer.decode(inputs['input_ids'][0], skip_special_tokens=True)):].strip()
        return generated_text

class CalculusTextbookGenerator:
    """Generates a complete calculus textbook with questions and solutions."""
    
    def __init__(self):
        self.model_manager = ModelManager()
        self.textbook_data = self.create_initial_textbook_structure()
        
    def create_initial_textbook_structure(self):
        """Create the initial structure of the calculus textbook."""
        return {
            "books": [
                {
                    "name": "Calculus 1: Early Transcendentals",
                    "details": "Introduction to single-variable calculus including limits, derivatives, and basic integration techniques.",
                    "chapters": [
                        {
                            "chapterTitle": "Chapter 6: Applications of Integration",
                            "subChapters": [
                                "6.1: Areas Between Curves", 
                                "6.2: Volumes", 
                                "6.3: Volumes by Cylindrical Shells",
                                "6.4: Work",
                                "6.5: Average Value of a Function"
                            ],
                            "questions": []  # Will be filled with generated questions
                        },
                        {
                            "chapterTitle": "Chapter 8: Further Applications of Integration",
                            "subChapters": [
                                "8.1: Arc Length",
                                "8.2: Area of a Surface of Revolution",
                                "8.3: Applications to Physics and Engineering",
                                "8.4: Applications to Economics and Biology",
                                "8.5: Probability"
                            ],
                            "questions": []
                        },
                        {
                            "chapterTitle": "Chapter 9: Differential Equations",
                            "subChapters": [
                                "9.1: Modeling with Differential Equations",
                                "9.2: Direction Fields and Euler's Method",
                                "9.3: Separable Equations",
                                "9.4: Models for Population Growth",
                                "9.5: Linear Equations",
                                "9.6: Predator–Prey Systems"
                            ],
                            "questions": []
                        },
                        {
                            "chapterTitle": "Chapter 10: Parametric Equations and Polar Coordinates",
                            "subChapters": [
                                "10.1: Curves Defined by Parametric Equations",
                                "10.2: Calculus with Parametric Curves",
                                "10.3: Polar Coordinates",
                                "10.4: Calculus in Polar Coordinates",
                                "10.5: Conic Sections",
                                "10.6: Conic Sections in Polar Coordinates"
                            ],
                            "questions": []
                        },
                        {
                            "chapterTitle": "Chapter 11: Sequences, Series, and Power Series",
                            "subChapters": [
                                "11.1: Sequences",
                                "11.2: Series",
                                "11.3: The Integral Test and Estimates of Sums",
                                "11.4: The Comparison Tests",
                                "11.5: Alternating Series and Absolute Convergence",
                                "11.6: The Ratio and Root Tests",
                                "11.7: Power Series"
                            ],
                            "questions": []
                        }
                    ]
                },
                {
                    "name": "Calculus 2: Advanced Concepts",
                    "details": "Advances into series, sequences, techniques of integration, and vector calculus.",
                    "chapters": [
                        {
                            "chapterTitle": "Chapter 12: Vectors and the Geometry of Space",
                            "subChapters": [
                                "12.1: Three-Dimensional Coordinate Systems",
                                "12.2: Vectors",
                                "12.3: The Dot Product",
                                "12.4: The Cross Product",
                                "12.5: Equations of Lines and Planes",
                                "12.6: Cylinders and Quadric Surfaces"
                            ],
                            "questions": []
                        },
                        {
                            "chapterTitle": "Chapter 13: Vector Functions",
                            "subChapters": [
                                "13.1: Vector Functions and Space Curves",
                                "13.2: Derivatives and Integrals of Vector Functions",
                                "13.3: Arc Length and Curvature",
                                "13.4: Motion in Space: Velocity and Acceleration"
                            ],
                            "questions": []
                        },
                        {
                            "chapterTitle": "Chapter 14: Partial Derivatives",
                            "subChapters": [
                                "14.1: Functions of Several Variables",
                                "14.2: Limits and Continuity",
                                "14.3: Partial Derivatives",
                                "14.4: Tangent Planes and Linear Approximation",
                                "14.5: The Chain Rule"
                            ],
                            "questions": []
                        }
                    ]
                }
            ]
        }
        
    def generate_question_set(self, chapter_title, subchapter_titles, num_questions=3):
        """Generate a set of questions with step-by-step solutions for a chapter."""
        
        # Try the primary model first
        self.model_manager.set_current_model(PRIMARY_MODEL)
        
        prompt = f"""Create {num_questions} calculus questions with detailed step-by-step solutions for:
{chapter_title}

The questions should cover these subchapters:
{', '.join(subchapter_titles)}

For each question:
1. Write a clear, university-level calculus problem
2. Provide a comprehensive step-by-step solution with all math steps shown
3. Include a final answer

Format each question as:
QUESTION: [Problem statement]
SOLUTION:
Step 1: [First step with explanation]
Step 2: [Next step]
...
Final Answer: [The solution]

Make sure to use proper mathematical notation and include a variety of question types.
"""

        try:
            generated_content = self.model_manager.generate_text(prompt, max_length=2048)
            
            # Check if the content looks good
            if len(generated_content) < 200 or "QUESTION" not in generated_content:
                # Try the secondary model if the primary one gave poor results
                logging.warning(f"Primary model gave insufficient results for {chapter_title}. Trying secondary model.")
                self.model_manager.set_current_model(SECONDARY_MODEL)
                generated_content = self.model_manager.generate_text(prompt, max_length=2048)
            
            # Parse the generated content into question objects
            questions = self.parse_questions(generated_content)
            
            if not questions or len(questions) == 0:
                logging.warning(f"Failed to parse any questions from content for {chapter_title}")
                return []
                
            return questions
            
        except Exception as e:
            logging.error(f"Error generating questions for {chapter_title}: {str(e)}")
            return []
            
    def parse_questions(self, content):
        """Parse the generated content into structured question objects."""
        questions = []
        
        # Split by "QUESTION:" or similar markers
        parts = content.split("QUESTION:")
        
        for i, part in enumerate(parts):
            if i == 0:
                continue  # Skip the first part (before the first QUESTION:)
                
            try:
                # Split into question and solution
                if "SOLUTION:" in part:
                    question_text, solution = part.split("SOLUTION:", 1)
                else:
                    # Try alternative formats
                    for marker in ["Solution:", "STEPS:", "Steps:"]:
                        if marker in part:
                            question_text, solution = part.split(marker, 1)
                            break
                    else:
                        question_text = part
                        solution = ""
                
                questions.append({
                    "question": question_text.strip(),
                    "solution": solution.strip()
                })
            except Exception as e:
                logging.error(f"Error parsing question {i}: {str(e)}")
                continue
                
        return questions
    
    def worker_function(self, queue, results):
        """Worker thread function to process chapters from queue."""
        while True:
            item = queue.get()
            if item is None:  # None signals to exit
                queue.task_done()
                break
            
            book_idx, chapter_idx, chapter = item
            chapter_title = chapter["chapterTitle"]
            subchapters = chapter.get("subChapters", [])
            
            logging.info(f"Processing: {chapter_title}")
            
            # Try to generate questions with retries
            for attempt in range(MAX_RETRIES):
                try:
                    questions = self.generate_question_set(chapter_title, subchapters, num_questions=4)
                    if questions:
                        # Save the questions to the chapter
                        self.textbook_data["books"][book_idx]["chapters"][chapter_idx]["questions"] = questions
                        
                        logging.info(f"✓ Generated {len(questions)} questions for {chapter_title}")
                        break  # Success, exit retry loop
                    else:
                        logging.warning(f"No questions generated for {chapter_title} on attempt {attempt+1}")
                        
                except Exception as e:
                    logging.error(f"Attempt {attempt+1}/{MAX_RETRIES} failed for {chapter_title}: {str(e)}")
                    time.sleep(2)  # Wait before retrying
            
            # Save current state to file
            self.save_current_state()
            queue.task_done()
    
    def save_current_state(self):
        """Save the current state of the textbook generation."""
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        with open(os.path.join(OUTPUT_DIR, f"textbook_state_{timestamp}.json"), "w") as f:
            json.dump(self.textbook_data, f, indent=2)
        
        # Also save to a fixed filename for the latest state
        with open(os.path.join(OUTPUT_DIR, "textbook_latest.json"), "w") as f:
            json.dump(self.textbook_data, f, indent=2)
    
    def process_in_batches(self):
        """Process all chapters in batches."""
        queue = Queue()
        
        # Queue all chapters for processing
        for book_idx, book in enumerate(self.textbook_data["books"]):
            for chapter_idx, chapter in enumerate(book["chapters"]):
                queue.put((book_idx, chapter_idx, chapter))
        
        # Create and start worker thread
        worker = Thread(target=self.worker_function, args=(queue, None))
        worker.daemon = True  # Allow the program to exit even if the thread is running
        worker.start()
        
        # Process in batches
        total_chapters = queue.qsize()
        processed = 0
        
        while processed < total_chapters:
            # Wait for the batch to be processed
            start_size = queue.qsize()
            batch_size = min(BATCH_SIZE, start_size)
            
            logging.info(f"Processing batch of {batch_size} chapters. {start_size} remaining.")
            
            # Wait until this batch is done
            while queue.qsize() > start_size - batch_size:
                time.sleep(2)
            
            processed += batch_size
            logging.info(f"Batch complete. {processed}/{total_chapters} chapters processed.")
            
            # Save current state
            self.save_current_state()
        
        # Signal worker to exit
        queue.put(None)
        worker.join()
        
        # Save final state
        self.save_current_state()
        logging.info("All chapters processed. Textbook generation complete.")

def main():
    start_time = datetime.now()
    logging.info(f"Starting textbook generation at {start_time}")
    
    generator = CalculusTextbookGenerator()
    generator.process_in_batches()
    
    end_time = datetime.now()
    duration = end_time - start_time
    logging.info(f"Textbook generation completed in {duration}")
    logging.info(f"Final textbook saved to {os.path.join(OUTPUT_DIR, 'textbook_latest.json')}")

if __name__ == "__main__":
    main()