Spaces:
Runtime error
Runtime error
import os | |
import torch | |
import json | |
import time | |
import logging | |
from datetime import datetime | |
from threading import Thread | |
from queue import Queue | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
# Configuration | |
PRIMARY_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # First model to try | |
SECONDARY_MODEL = "facebook/opt-1.3b" # More powerful backup model | |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
BATCH_SIZE = 5 # Process 5 chapters at a time | |
MAX_RETRIES = 3 | |
OUTPUT_DIR = "calculus_textbook_output" | |
LOG_FILE = "textbook_generation.log" | |
# Setup logging | |
os.makedirs(OUTPUT_DIR, exist_ok=True) | |
logging.basicConfig( | |
filename=os.path.join(OUTPUT_DIR, LOG_FILE), | |
level=logging.INFO, | |
format='%(asctime)s - %(levelname)s - %(message)s' | |
) | |
class ModelManager: | |
"""Manages loading and switching between language models for text generation.""" | |
def __init__(self): | |
self.models = {} | |
self.tokenizers = {} | |
self.current_model = None | |
def load_model(self, model_name): | |
"""Load a model and its tokenizer if not already loaded.""" | |
if model_name not in self.models: | |
try: | |
logging.info(f"Loading model: {model_name}") | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32, | |
device_map="auto" if DEVICE == "cuda" else None | |
) | |
model.eval() | |
self.models[model_name] = model | |
self.tokenizers[model_name] = tokenizer | |
logging.info(f"Successfully loaded model: {model_name}") | |
return True | |
except Exception as e: | |
logging.error(f"Failed to load model {model_name}: {str(e)}") | |
return False | |
return True | |
def set_current_model(self, model_name): | |
"""Set the current model to use for generation.""" | |
if model_name not in self.models and not self.load_model(model_name): | |
return False | |
self.current_model = model_name | |
return True | |
def generate_text(self, prompt, max_length=1024): | |
"""Generate text using the current model.""" | |
if not self.current_model: | |
raise ValueError("No model selected. Call set_current_model first.") | |
model = self.models[self.current_model] | |
tokenizer = self.tokenizers[self.current_model] | |
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
# Generate with some randomness for creativity | |
with torch.no_grad(): | |
outputs = model.generate( | |
**inputs, | |
max_length=max_length, | |
temperature=0.7, | |
top_p=0.9, | |
do_sample=True, | |
pad_token_id=tokenizer.eos_token_id | |
) | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Extract only the generated part | |
generated_text = response[len(tokenizer.decode(inputs['input_ids'][0], skip_special_tokens=True)):].strip() | |
return generated_text | |
class CalculusTextbookGenerator: | |
"""Generates a complete calculus textbook with questions and solutions.""" | |
def __init__(self): | |
self.model_manager = ModelManager() | |
self.textbook_data = self.create_initial_textbook_structure() | |
def create_initial_textbook_structure(self): | |
"""Create the initial structure of the calculus textbook.""" | |
return { | |
"books": [ | |
{ | |
"name": "Calculus 1: Early Transcendentals", | |
"details": "Introduction to single-variable calculus including limits, derivatives, and basic integration techniques.", | |
"chapters": [ | |
{ | |
"chapterTitle": "Chapter 6: Applications of Integration", | |
"subChapters": [ | |
"6.1: Areas Between Curves", | |
"6.2: Volumes", | |
"6.3: Volumes by Cylindrical Shells", | |
"6.4: Work", | |
"6.5: Average Value of a Function" | |
], | |
"questions": [] # Will be filled with generated questions | |
}, | |
{ | |
"chapterTitle": "Chapter 8: Further Applications of Integration", | |
"subChapters": [ | |
"8.1: Arc Length", | |
"8.2: Area of a Surface of Revolution", | |
"8.3: Applications to Physics and Engineering", | |
"8.4: Applications to Economics and Biology", | |
"8.5: Probability" | |
], | |
"questions": [] | |
}, | |
{ | |
"chapterTitle": "Chapter 9: Differential Equations", | |
"subChapters": [ | |
"9.1: Modeling with Differential Equations", | |
"9.2: Direction Fields and Euler's Method", | |
"9.3: Separable Equations", | |
"9.4: Models for Population Growth", | |
"9.5: Linear Equations", | |
"9.6: Predator–Prey Systems" | |
], | |
"questions": [] | |
}, | |
{ | |
"chapterTitle": "Chapter 10: Parametric Equations and Polar Coordinates", | |
"subChapters": [ | |
"10.1: Curves Defined by Parametric Equations", | |
"10.2: Calculus with Parametric Curves", | |
"10.3: Polar Coordinates", | |
"10.4: Calculus in Polar Coordinates", | |
"10.5: Conic Sections", | |
"10.6: Conic Sections in Polar Coordinates" | |
], | |
"questions": [] | |
}, | |
{ | |
"chapterTitle": "Chapter 11: Sequences, Series, and Power Series", | |
"subChapters": [ | |
"11.1: Sequences", | |
"11.2: Series", | |
"11.3: The Integral Test and Estimates of Sums", | |
"11.4: The Comparison Tests", | |
"11.5: Alternating Series and Absolute Convergence", | |
"11.6: The Ratio and Root Tests", | |
"11.7: Power Series" | |
], | |
"questions": [] | |
} | |
] | |
}, | |
{ | |
"name": "Calculus 2: Advanced Concepts", | |
"details": "Advances into series, sequences, techniques of integration, and vector calculus.", | |
"chapters": [ | |
{ | |
"chapterTitle": "Chapter 12: Vectors and the Geometry of Space", | |
"subChapters": [ | |
"12.1: Three-Dimensional Coordinate Systems", | |
"12.2: Vectors", | |
"12.3: The Dot Product", | |
"12.4: The Cross Product", | |
"12.5: Equations of Lines and Planes", | |
"12.6: Cylinders and Quadric Surfaces" | |
], | |
"questions": [] | |
}, | |
{ | |
"chapterTitle": "Chapter 13: Vector Functions", | |
"subChapters": [ | |
"13.1: Vector Functions and Space Curves", | |
"13.2: Derivatives and Integrals of Vector Functions", | |
"13.3: Arc Length and Curvature", | |
"13.4: Motion in Space: Velocity and Acceleration" | |
], | |
"questions": [] | |
}, | |
{ | |
"chapterTitle": "Chapter 14: Partial Derivatives", | |
"subChapters": [ | |
"14.1: Functions of Several Variables", | |
"14.2: Limits and Continuity", | |
"14.3: Partial Derivatives", | |
"14.4: Tangent Planes and Linear Approximation", | |
"14.5: The Chain Rule" | |
], | |
"questions": [] | |
} | |
] | |
} | |
] | |
} | |
def generate_question_set(self, chapter_title, subchapter_titles, num_questions=3): | |
"""Generate a set of questions with step-by-step solutions for a chapter.""" | |
# Try the primary model first | |
self.model_manager.set_current_model(PRIMARY_MODEL) | |
prompt = f"""Create {num_questions} calculus questions with detailed step-by-step solutions for: | |
{chapter_title} | |
The questions should cover these subchapters: | |
{', '.join(subchapter_titles)} | |
For each question: | |
1. Write a clear, university-level calculus problem | |
2. Provide a comprehensive step-by-step solution with all math steps shown | |
3. Include a final answer | |
Format each question as: | |
QUESTION: [Problem statement] | |
SOLUTION: | |
Step 1: [First step with explanation] | |
Step 2: [Next step] | |
... | |
Final Answer: [The solution] | |
Make sure to use proper mathematical notation and include a variety of question types. | |
""" | |
try: | |
generated_content = self.model_manager.generate_text(prompt, max_length=2048) | |
# Check if the content looks good | |
if len(generated_content) < 200 or "QUESTION" not in generated_content: | |
# Try the secondary model if the primary one gave poor results | |
logging.warning(f"Primary model gave insufficient results for {chapter_title}. Trying secondary model.") | |
self.model_manager.set_current_model(SECONDARY_MODEL) | |
generated_content = self.model_manager.generate_text(prompt, max_length=2048) | |
# Parse the generated content into question objects | |
questions = self.parse_questions(generated_content) | |
if not questions or len(questions) == 0: | |
logging.warning(f"Failed to parse any questions from content for {chapter_title}") | |
return [] | |
return questions | |
except Exception as e: | |
logging.error(f"Error generating questions for {chapter_title}: {str(e)}") | |
return [] | |
def parse_questions(self, content): | |
"""Parse the generated content into structured question objects.""" | |
questions = [] | |
# Split by "QUESTION:" or similar markers | |
parts = content.split("QUESTION:") | |
for i, part in enumerate(parts): | |
if i == 0: | |
continue # Skip the first part (before the first QUESTION:) | |
try: | |
# Split into question and solution | |
if "SOLUTION:" in part: | |
question_text, solution = part.split("SOLUTION:", 1) | |
else: | |
# Try alternative formats | |
for marker in ["Solution:", "STEPS:", "Steps:"]: | |
if marker in part: | |
question_text, solution = part.split(marker, 1) | |
break | |
else: | |
question_text = part | |
solution = "" | |
questions.append({ | |
"question": question_text.strip(), | |
"solution": solution.strip() | |
}) | |
except Exception as e: | |
logging.error(f"Error parsing question {i}: {str(e)}") | |
continue | |
return questions | |
def worker_function(self, queue, results): | |
"""Worker thread function to process chapters from queue.""" | |
while True: | |
item = queue.get() | |
if item is None: # None signals to exit | |
queue.task_done() | |
break | |
book_idx, chapter_idx, chapter = item | |
chapter_title = chapter["chapterTitle"] | |
subchapters = chapter.get("subChapters", []) | |
logging.info(f"Processing: {chapter_title}") | |
# Try to generate questions with retries | |
for attempt in range(MAX_RETRIES): | |
try: | |
questions = self.generate_question_set(chapter_title, subchapters, num_questions=4) | |
if questions: | |
# Save the questions to the chapter | |
self.textbook_data["books"][book_idx]["chapters"][chapter_idx]["questions"] = questions | |
logging.info(f"✓ Generated {len(questions)} questions for {chapter_title}") | |
break # Success, exit retry loop | |
else: | |
logging.warning(f"No questions generated for {chapter_title} on attempt {attempt+1}") | |
except Exception as e: | |
logging.error(f"Attempt {attempt+1}/{MAX_RETRIES} failed for {chapter_title}: {str(e)}") | |
time.sleep(2) # Wait before retrying | |
# Save current state to file | |
self.save_current_state() | |
queue.task_done() | |
def save_current_state(self): | |
"""Save the current state of the textbook generation.""" | |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
with open(os.path.join(OUTPUT_DIR, f"textbook_state_{timestamp}.json"), "w") as f: | |
json.dump(self.textbook_data, f, indent=2) | |
# Also save to a fixed filename for the latest state | |
with open(os.path.join(OUTPUT_DIR, "textbook_latest.json"), "w") as f: | |
json.dump(self.textbook_data, f, indent=2) | |
def process_in_batches(self): | |
"""Process all chapters in batches.""" | |
queue = Queue() | |
# Queue all chapters for processing | |
for book_idx, book in enumerate(self.textbook_data["books"]): | |
for chapter_idx, chapter in enumerate(book["chapters"]): | |
queue.put((book_idx, chapter_idx, chapter)) | |
# Create and start worker thread | |
worker = Thread(target=self.worker_function, args=(queue, None)) | |
worker.daemon = True # Allow the program to exit even if the thread is running | |
worker.start() | |
# Process in batches | |
total_chapters = queue.qsize() | |
processed = 0 | |
while processed < total_chapters: | |
# Wait for the batch to be processed | |
start_size = queue.qsize() | |
batch_size = min(BATCH_SIZE, start_size) | |
logging.info(f"Processing batch of {batch_size} chapters. {start_size} remaining.") | |
# Wait until this batch is done | |
while queue.qsize() > start_size - batch_size: | |
time.sleep(2) | |
processed += batch_size | |
logging.info(f"Batch complete. {processed}/{total_chapters} chapters processed.") | |
# Save current state | |
self.save_current_state() | |
# Signal worker to exit | |
queue.put(None) | |
worker.join() | |
# Save final state | |
self.save_current_state() | |
logging.info("All chapters processed. Textbook generation complete.") | |
def main(): | |
start_time = datetime.now() | |
logging.info(f"Starting textbook generation at {start_time}") | |
generator = CalculusTextbookGenerator() | |
generator.process_in_batches() | |
end_time = datetime.now() | |
duration = end_time - start_time | |
logging.info(f"Textbook generation completed in {duration}") | |
logging.info(f"Final textbook saved to {os.path.join(OUTPUT_DIR, 'textbook_latest.json')}") | |
if __name__ == "__main__": | |
main() |