Mohammaderfan koupaei
second
06ca50d
from dataclasses import dataclass
from pathlib import Path
import torch
@dataclass
class TrainingConfig:
"""Configuration for model training"""
# Model parameters
model_name: str = "microsoft/deberta-v3-large"
dropout: float = 0.1
# Training parameters
num_epochs: int = 5
learning_rate: float = 1e-5 # Reduced from 2e-5
warmup_ratio: float = 0.2 # Increased from 0.1
weight_decay: float = 0.01
max_grad_norm: float = 1.0
gradient_accumulation_steps: int = 4
fp16: bool = True
# Data parameters
max_length: int = 256
batch_size: int = 4
train_ratio: float = 0.8
# Loss parameters
pos_weight_multiplier: float = 5.0 # Weight multiplier for positive classes
label_smoothing: float = 0.1 # Label smoothing factor
# Output parameters
output_dir: Path = Path("outputs")
save_steps: int = 50
eval_steps: int = 50
# Device
device: str = "cuda" if torch.cuda.is_available() else "cpu"
def __post_init__(self):
"""Create output directory if it doesn't exist"""
self.output_dir.mkdir(parents=True, exist_ok=True)
# Test code
if __name__ == "__main__":
# Create default config
default_config = TrainingConfig()
print("\n=== Default Configuration ===")
print(f"Model name: {default_config.model_name}")
print(f"Batch size: {default_config.batch_size}")
print(f"Learning rate: {default_config.learning_rate}")
print(f"Device: {default_config.device}")