Spaces:
Running
Running
export CUDA_VISIBLE_DEVICES=0 | |
data_dir=${1:-"en-indic-exp"} | |
model_name=${2:-"ai4bharat/indictrans2-en-indic-dist-200M"} | |
output_dir=${3:-"output"} | |
src_lang_list=${4:-"eng_Latn"} | |
tgt_lang_list=${5:-"asm_Beng,ben_Beng,guj_Gujr,hin_Deva,kan_Knda,mal_Mlym,mar_Deva,npi_Deva,ory_Orya,pan_Guru,tam_Taml,tel_Telu,urd_Arab"} | |
python3 train_lora.py \ | |
--data_dir $data_dir \ | |
--model_name $model_name \ | |
--output_dir $output_dir \ | |
--src_lang_list $src_lang_list \ | |
--tgt_lang_list $tgt_lang_list \ | |
--save_steps 1000 \ | |
--max_steps 1000000 \ | |
--batch_size 32 \ | |
--grad_accum_steps 4 \ | |
--warmup_steps 4000 \ | |
--max_grad_norm 1.0 \ | |
--learning_rate 2e-4 \ | |
--adam_beta1 0.9 \ | |
--adam_beta2 0.98 \ | |
--optimizer adamw_torch \ | |
--lr_scheduler inverse_sqrt \ | |
--num_workers 16 \ | |
--metric_for_best_model eval_BLEU \ | |
--greater_is_better \ | |
--patience 10 \ | |
--weight_decay 0.01 \ | |
--lora_target_modules "q_proj,k_proj" \ | |
--lora_dropout 0.1 \ | |
--lora_r 16 \ | |
--lora_alpha 32 \ | |
--print_samples |