Spaces:
Runtime error
Runtime error
import sys | |
import logging | |
from pathlib import Path | |
import os | |
import torch | |
from transformers import set_seed | |
import subprocess | |
# Set environment variables for memory optimization | |
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128' | |
os.environ['TOKENIZERS_PARALLELISM'] = 'false' | |
# Import the necessary modules from your project | |
sys.path.append("./scripts") | |
from scripts.models.model import NarrativeClassifier | |
from scripts.models.dataset import NarrativeDataset | |
from scripts.config.config import TrainingConfig | |
from scripts.data_processing.data_preparation import AdvancedNarrativeProcessor | |
from scripts.training.trainer import NarrativeTrainer | |
def setup_spacy(): | |
"""Ensure spaCy model is installed""" | |
try: | |
import spacy | |
spacy.load("en_core_web_sm") | |
except OSError: | |
print("Downloading spaCy model...") | |
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"], check=True) | |
def setup_logging(): | |
"""Setup logging configuration""" | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(levelname)s - %(message)s', | |
datefmt='%Y-%m-%d %H:%M:%S' | |
) | |
return logging.getLogger(__name__) | |
def main(): | |
# Set up logging | |
logger = setup_logging() | |
logger.info("Initializing training process...") | |
# Setup spaCy | |
setup_spacy() | |
# Set random seeds for reproducibility | |
set_seed(42) | |
torch.manual_seed(42) | |
if torch.cuda.is_available(): | |
torch.cuda.manual_seed_all(42) | |
# Clear GPU cache if available | |
if torch.cuda.is_available(): | |
torch.cuda.empty_cache() | |
logger.info(f"CUDA available. Using GPU: {torch.cuda.get_device_name(0)}") | |
logger.info(f"Available GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB") | |
try: | |
# Load and process the dataset | |
annotations_file = "./data/subtask-2-annotations.txt" | |
raw_dir = "./data/raw" | |
logger.info("Loading and processing dataset...") | |
processor = AdvancedNarrativeProcessor( | |
annotations_file=annotations_file, | |
raw_dir=raw_dir | |
) | |
processed_data = processor.load_and_process_data() | |
# Create datasets | |
train_dataset = NarrativeDataset(processed_data['train']) | |
val_dataset = NarrativeDataset(processed_data['val']) | |
logger.info(f"Loaded dataset with {len(train_dataset)} training samples and {len(val_dataset)} validation samples.") | |
# Initialize model | |
logger.info("Initializing the model...") | |
model = NarrativeClassifier( | |
num_labels=train_dataset.get_num_labels(), | |
model_name="microsoft/deberta-v3-large" | |
) | |
# Define optimized training configuration | |
config = TrainingConfig( | |
output_dir=Path("./output"), | |
num_epochs=5, | |
batch_size=4, # Reduced batch size for memory | |
learning_rate=2e-5, | |
warmup_ratio=0.1, | |
weight_decay=0.01, | |
max_grad_norm=1.0, | |
eval_steps=50, | |
save_steps=50, | |
fp16=True, # Enable mixed precision | |
gradient_accumulation_steps=4, # Gradient accumulation | |
max_length=256 # Reduced sequence length | |
) | |
logger.info("Training configuration:") | |
for key, value in vars(config).items(): | |
logger.info(f" {key}: {value}") | |
# Initialize trainer | |
trainer = NarrativeTrainer( | |
model=model, | |
train_dataset=train_dataset, | |
val_dataset=val_dataset, | |
config=config | |
) | |
# Start training | |
logger.info("Starting the training process...") | |
history = trainer.train() | |
# Log final metrics | |
logger.info("Training completed successfully!") | |
logger.info("Final metrics:") | |
logger.info(f" Best validation F1: {trainer.best_val_f1:.4f}") | |
logger.info(f" Final training loss: {history['train_loss'][-1]:.4f}") | |
except Exception as e: | |
logger.error(f"Training failed with error: {str(e)}") | |
raise | |
finally: | |
# Clean up | |
if torch.cuda.is_available(): | |
torch.cuda.empty_cache() | |
if __name__ == "__main__": | |
main() |