gpusemeval / app.py
Mohammaderfan koupaei
second
b5e09fa
import sys
import logging
from pathlib import Path
import os
import torch
from transformers import set_seed
import subprocess
# Set environment variables for memory optimization
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'
os.environ['TOKENIZERS_PARALLELISM'] = 'false'
# Import the necessary modules from your project
sys.path.append("./scripts")
from scripts.models.model import NarrativeClassifier
from scripts.models.dataset import NarrativeDataset
from scripts.config.config import TrainingConfig
from scripts.data_processing.data_preparation import AdvancedNarrativeProcessor
from scripts.training.trainer import NarrativeTrainer
def setup_spacy():
"""Ensure spaCy model is installed"""
try:
import spacy
spacy.load("en_core_web_sm")
except OSError:
print("Downloading spaCy model...")
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"], check=True)
def setup_logging():
"""Setup logging configuration"""
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
return logging.getLogger(__name__)
def main():
# Set up logging
logger = setup_logging()
logger.info("Initializing training process...")
# Setup spaCy
setup_spacy()
# Set random seeds for reproducibility
set_seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(42)
# Clear GPU cache if available
if torch.cuda.is_available():
torch.cuda.empty_cache()
logger.info(f"CUDA available. Using GPU: {torch.cuda.get_device_name(0)}")
logger.info(f"Available GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
try:
# Load and process the dataset
annotations_file = "./data/subtask-2-annotations.txt"
raw_dir = "./data/raw"
logger.info("Loading and processing dataset...")
processor = AdvancedNarrativeProcessor(
annotations_file=annotations_file,
raw_dir=raw_dir
)
processed_data = processor.load_and_process_data()
# Create datasets
train_dataset = NarrativeDataset(processed_data['train'])
val_dataset = NarrativeDataset(processed_data['val'])
logger.info(f"Loaded dataset with {len(train_dataset)} training samples and {len(val_dataset)} validation samples.")
# Initialize model
logger.info("Initializing the model...")
model = NarrativeClassifier(
num_labels=train_dataset.get_num_labels(),
model_name="microsoft/deberta-v3-large"
)
# Define optimized training configuration
config = TrainingConfig(
output_dir=Path("./output"),
num_epochs=5,
batch_size=4, # Reduced batch size for memory
learning_rate=2e-5,
warmup_ratio=0.1,
weight_decay=0.01,
max_grad_norm=1.0,
eval_steps=50,
save_steps=50,
fp16=True, # Enable mixed precision
gradient_accumulation_steps=4, # Gradient accumulation
max_length=256 # Reduced sequence length
)
logger.info("Training configuration:")
for key, value in vars(config).items():
logger.info(f" {key}: {value}")
# Initialize trainer
trainer = NarrativeTrainer(
model=model,
train_dataset=train_dataset,
val_dataset=val_dataset,
config=config
)
# Start training
logger.info("Starting the training process...")
history = trainer.train()
# Log final metrics
logger.info("Training completed successfully!")
logger.info("Final metrics:")
logger.info(f" Best validation F1: {trainer.best_val_f1:.4f}")
logger.info(f" Final training loss: {history['train_loss'][-1]:.4f}")
except Exception as e:
logger.error(f"Training failed with error: {str(e)}")
raise
finally:
# Clean up
if torch.cuda.is_available():
torch.cuda.empty_cache()
if __name__ == "__main__":
main()