from dataclasses import dataclass from pathlib import Path import torch @dataclass class TrainingConfig: """Configuration for model training""" # Model parameters model_name: str = "microsoft/deberta-v3-large" dropout: float = 0.1 # Training parameters num_epochs: int = 5 learning_rate: float = 1e-5 # Reduced from 2e-5 warmup_ratio: float = 0.2 # Increased from 0.1 weight_decay: float = 0.01 max_grad_norm: float = 1.0 gradient_accumulation_steps: int = 4 fp16: bool = True # Data parameters max_length: int = 256 batch_size: int = 4 train_ratio: float = 0.8 # Loss parameters pos_weight_multiplier: float = 5.0 # Weight multiplier for positive classes label_smoothing: float = 0.1 # Label smoothing factor # Output parameters output_dir: Path = Path("outputs") save_steps: int = 50 eval_steps: int = 50 # Device device: str = "cuda" if torch.cuda.is_available() else "cpu" def __post_init__(self): """Create output directory if it doesn't exist""" self.output_dir.mkdir(parents=True, exist_ok=True) # Test code if __name__ == "__main__": # Create default config default_config = TrainingConfig() print("\n=== Default Configuration ===") print(f"Model name: {default_config.model_name}") print(f"Batch size: {default_config.batch_size}") print(f"Learning rate: {default_config.learning_rate}") print(f"Device: {default_config.device}")