File size: 1,388 Bytes
79a1e74
681429a
 
ab8e9a5
73ff168
165fac3
ec71a92
a58e975
1
2
3
4
5
6
7
8
9
'teacher', (0, 13436)
'distily_tinyBenchmarks/logs/harness_benchmarks=tinyBenchmarks, learning_rate=0.0001, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8', (13824, 448)
'logis/teacher', (27648, 448)
'distily_tinyBenchmarks/logs/harness_benchmarks=tinyBenchmarks, learning_rate=0.0001, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1', (41472, 448)
'distily_tinyBenchmarks/logs/harness_benchmarks=tinyBenchmarks, learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8', (41984, 448)
'distily_tinyBenchmarks/logs/harness_benchmarks=tinyBenchmarks, learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1', (42496, 448)
'distily_tinyBenchmarks/logs/harness_benchmarks=tinyBenchmarks, learning_rate=0.0002, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8', (43008, 448)
'distily_tinyBenchmarks/logs/harness_benchmarks=tinyBenchmarks, learning_rate=0.0002, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1', (43520, 448)