Spaces:
Runtime error
Runtime error
File size: 3,837 Bytes
3ab6d8e 937a410 3ab6d8e 937a410 3ab6d8e 937a410 3ab6d8e 937a410 3ab6d8e 937a410 3ab6d8e 937a410 3ab6d8e 937a410 3ab6d8e 937a410 3ab6d8e 937a410 3ab6d8e 937a410 3ab6d8e 937a410 3ab6d8e 937a410 3ab6d8e 937a410 3ab6d8e 937a410 3ab6d8e 937a410 3ab6d8e 937a410 3ab6d8e 937a410 3ab6d8e 937a410 3ab6d8e 937a410 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
import sys
import logging
from pathlib import Path
import os
import torch
from transformers import set_seed
# Set environment variables for memory optimization
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'
os.environ['TOKENIZERS_PARALLELISM'] = 'false'
# Import the necessary modules from your project
sys.path.append("./scripts")
from scripts.models.model import NarrativeClassifier
from scripts.models.dataset import NarrativeDataset
from scripts.config.config import TrainingConfig
from scripts.data_processing.advanced_preprocessor import AdvancedNarrativeProcessor
from scripts.training.trainer import NarrativeTrainer
def setup_logging():
"""Setup logging configuration"""
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
return logging.getLogger(__name__)
def main():
# Set up logging
logger = setup_logging()
logger.info("Initializing training process...")
# Set random seeds for reproducibility
set_seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(42)
# Clear GPU cache if available
if torch.cuda.is_available():
torch.cuda.empty_cache()
logger.info(f"CUDA available. Using GPU: {torch.cuda.get_device_name(0)}")
logger.info(f"Available GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
# Load and process the dataset
annotations_file = "./data/subtask-2-annotations.txt"
raw_dir = "./data/raw"
logger.info("Loading and processing dataset...")
processor = AdvancedNarrativeProcessor(
annotations_file=annotations_file,
raw_dir=raw_dir
)
processed_data = processor.load_and_process_data()
# Create datasets
train_dataset = NarrativeDataset(processed_data['train'])
val_dataset = NarrativeDataset(processed_data['val'])
logger.info(f"Loaded dataset with {len(train_dataset)} training samples and {len(val_dataset)} validation samples.")
# Initialize model
logger.info("Initializing the model...")
model = NarrativeClassifier(
num_labels=train_dataset.get_num_labels(),
model_name="microsoft/deberta-v3-large"
)
# Define optimized training configuration
config = TrainingConfig(
output_dir=Path("./output"),
num_epochs=5,
batch_size=4, # Reduced batch size for memory
learning_rate=2e-5,
warmup_ratio=0.1,
weight_decay=0.01,
max_grad_norm=1.0,
eval_steps=50,
save_steps=50,
fp16=True, # Enable mixed precision
gradient_accumulation_steps=4, # Gradient accumulation
max_length=256 # Reduced sequence length
)
logger.info("Training configuration:")
for key, value in vars(config).items():
logger.info(f" {key}: {value}")
try:
# Initialize trainer
trainer = NarrativeTrainer(
model=model,
train_dataset=train_dataset,
val_dataset=val_dataset,
config=config
)
# Start training
logger.info("Starting the training process...")
history = trainer.train()
# Log final metrics
logger.info("Training completed successfully!")
logger.info("Final metrics:")
logger.info(f" Best validation F1: {trainer.best_val_f1:.4f}")
logger.info(f" Final training loss: {history['train_loss'][-1]:.4f}")
except Exception as e:
logger.error(f"Training failed with error: {str(e)}")
raise
finally:
# Clean up
if torch.cuda.is_available():
torch.cuda.empty_cache()
if __name__ == "__main__":
main() |