ATASET = "task-focus + sample from remain datasets" | |
DATASET_FORMAT = 'input-output' | |
PER_DEVICE_TRAIN_BATCH_SIZE = 2 | |
GRADIENT_ACCUMULATION_STEPS = 4 | |
LEARNING_RATE = 0.0003 | |
LR_SCHEDULER_TYPE = 'cosine' | |
WARMUP_RATIO = 0.03 | |
LORA_R = 192 | |
LORA_ALPHA = 64 | |
LORA_DROPOUT = 0.1 | |
TRAIN_ON_SOURCE = False | |
SOURCE_MAX_LENGTH = 1024 | |
TARGET_MAX_LENGTH = 1024 | |
LOGGING_STEPS = 20 | |
SAVE_STEPS = 100 | |
SAVE_TOTAL_LIMIT = 4 |