Llama3.2-Mamba-3B-distill / all_results.json
Jingyu6
add models
2b07191
{
"epoch": 0.999990383783212,
"eval_loss": 375.7167663574219,
"eval_runtime": 60.6518,
"eval_samples": 4096,
"eval_samples_per_second": 11.475,
"eval_steps_per_second": 0.363,
"total_flos": 4.760752445875814e+16,
"train_loss": 199.8116063359082,
"train_runtime": 218479.6594,
"train_samples": 19473081,
"train_samples_per_second": 15.231,
"train_steps_per_second": 0.238
}