|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9767729182110205, |
|
"eval_steps": 500, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.8518019318580627, |
|
"learning_rate": 5.6012058970266934e-05, |
|
"loss": 1.7421, |
|
"max_memory_allocated (GB)": 91.86, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 10, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.8390570878982544, |
|
"learning_rate": 7.287336883921704e-05, |
|
"loss": 1.5281, |
|
"max_memory_allocated (GB)": 91.86, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 20, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.3700675666332245, |
|
"learning_rate": 8.273660282559241e-05, |
|
"loss": 1.3485, |
|
"max_memory_allocated (GB)": 91.86, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 30, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.3168916404247284, |
|
"learning_rate": 8.973467870816715e-05, |
|
"loss": 1.2968, |
|
"max_memory_allocated (GB)": 91.86, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 40, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.24861344695091248, |
|
"learning_rate": 9.516280807158375e-05, |
|
"loss": 1.2689, |
|
"max_memory_allocated (GB)": 91.86, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 50, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.22251686453819275, |
|
"learning_rate": 9.959791269454252e-05, |
|
"loss": 1.2434, |
|
"max_memory_allocated (GB)": 91.86, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 60, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.23426611721515656, |
|
"learning_rate": 9.959204487506375e-05, |
|
"loss": 1.2152, |
|
"max_memory_allocated (GB)": 91.87, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 70, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.45850667357444763, |
|
"learning_rate": 9.908210096889343e-05, |
|
"loss": 1.2108, |
|
"max_memory_allocated (GB)": 91.87, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 80, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.4196653366088867, |
|
"learning_rate": 9.85721570627231e-05, |
|
"loss": 1.1913, |
|
"max_memory_allocated (GB)": 91.87, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 90, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.5248636603355408, |
|
"learning_rate": 9.806221315655279e-05, |
|
"loss": 1.1924, |
|
"max_memory_allocated (GB)": 91.87, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 100, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.3434283137321472, |
|
"learning_rate": 9.755226925038246e-05, |
|
"loss": 1.1558, |
|
"max_memory_allocated (GB)": 91.87, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 110, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.47737815976142883, |
|
"learning_rate": 9.704232534421214e-05, |
|
"loss": 1.1492, |
|
"max_memory_allocated (GB)": 91.87, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 120, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.47788286209106445, |
|
"learning_rate": 9.653238143804181e-05, |
|
"loss": 1.1486, |
|
"max_memory_allocated (GB)": 91.87, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 130, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.45408132672309875, |
|
"learning_rate": 9.60224375318715e-05, |
|
"loss": 1.1456, |
|
"max_memory_allocated (GB)": 91.87, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 140, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.4091607630252838, |
|
"learning_rate": 9.551249362570118e-05, |
|
"loss": 1.1365, |
|
"max_memory_allocated (GB)": 91.87, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 150, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.5064594745635986, |
|
"learning_rate": 9.500254971953085e-05, |
|
"loss": 1.137, |
|
"max_memory_allocated (GB)": 91.87, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 160, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.4288266897201538, |
|
"learning_rate": 9.449260581336054e-05, |
|
"loss": 1.1181, |
|
"max_memory_allocated (GB)": 91.87, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 170, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.3854447901248932, |
|
"learning_rate": 9.398266190719021e-05, |
|
"loss": 1.1091, |
|
"max_memory_allocated (GB)": 91.87, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 180, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.4143249988555908, |
|
"learning_rate": 9.347271800101989e-05, |
|
"loss": 1.1156, |
|
"max_memory_allocated (GB)": 91.87, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 190, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.521230161190033, |
|
"learning_rate": 9.296277409484956e-05, |
|
"loss": 1.1117, |
|
"max_memory_allocated (GB)": 91.87, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 200, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.487106055021286, |
|
"learning_rate": 9.245283018867925e-05, |
|
"loss": 1.1003, |
|
"max_memory_allocated (GB)": 91.87, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 210, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.4616335928440094, |
|
"learning_rate": 9.194288628250894e-05, |
|
"loss": 1.0992, |
|
"max_memory_allocated (GB)": 91.88, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 220, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.3908211290836334, |
|
"learning_rate": 9.14329423763386e-05, |
|
"loss": 1.1074, |
|
"max_memory_allocated (GB)": 91.88, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 230, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.4411673843860626, |
|
"learning_rate": 9.092299847016829e-05, |
|
"loss": 1.1055, |
|
"max_memory_allocated (GB)": 91.88, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 240, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.4123621881008148, |
|
"learning_rate": 9.041305456399796e-05, |
|
"loss": 1.0883, |
|
"max_memory_allocated (GB)": 91.88, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 250, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.5461438298225403, |
|
"learning_rate": 8.990311065782764e-05, |
|
"loss": 1.0928, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 260, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.4907448887825012, |
|
"learning_rate": 8.939316675165733e-05, |
|
"loss": 1.0912, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 270, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.45152169466018677, |
|
"learning_rate": 8.8883222845487e-05, |
|
"loss": 1.0891, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 280, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.41472557187080383, |
|
"learning_rate": 8.837327893931669e-05, |
|
"loss": 1.0864, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 290, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.45566004514694214, |
|
"learning_rate": 8.786333503314635e-05, |
|
"loss": 1.0776, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 300, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.3909231424331665, |
|
"learning_rate": 8.735339112697604e-05, |
|
"loss": 1.0801, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 310, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.39705774188041687, |
|
"learning_rate": 8.684344722080571e-05, |
|
"loss": 1.0746, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 320, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.4257935881614685, |
|
"learning_rate": 8.633350331463539e-05, |
|
"loss": 1.0738, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 330, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.41336777806282043, |
|
"learning_rate": 8.582355940846507e-05, |
|
"loss": 1.0811, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 340, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.3621828854084015, |
|
"learning_rate": 8.531361550229475e-05, |
|
"loss": 1.0762, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 350, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.398189902305603, |
|
"learning_rate": 8.480367159612444e-05, |
|
"loss": 1.0622, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 360, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.37738627195358276, |
|
"learning_rate": 8.42937276899541e-05, |
|
"loss": 1.06, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 370, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.40790703892707825, |
|
"learning_rate": 8.378378378378379e-05, |
|
"loss": 1.0768, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 380, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.35230451822280884, |
|
"learning_rate": 8.327383987761347e-05, |
|
"loss": 1.0631, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 390, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.37737661600112915, |
|
"learning_rate": 8.276389597144315e-05, |
|
"loss": 1.0665, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 400, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.39823117852211, |
|
"learning_rate": 8.225395206527282e-05, |
|
"loss": 1.0739, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 410, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.38277310132980347, |
|
"learning_rate": 8.17440081591025e-05, |
|
"loss": 1.07, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 420, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.34220802783966064, |
|
"learning_rate": 8.123406425293219e-05, |
|
"loss": 1.0698, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 430, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.3858403265476227, |
|
"learning_rate": 8.072412034676186e-05, |
|
"loss": 1.0488, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 440, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.36855727434158325, |
|
"learning_rate": 8.021417644059154e-05, |
|
"loss": 1.0612, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 450, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.4122312664985657, |
|
"learning_rate": 7.970423253442122e-05, |
|
"loss": 1.0566, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 460, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.38682645559310913, |
|
"learning_rate": 7.91942886282509e-05, |
|
"loss": 1.0575, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 470, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.38858598470687866, |
|
"learning_rate": 7.868434472208057e-05, |
|
"loss": 1.0579, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 480, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.3749813139438629, |
|
"learning_rate": 7.817440081591025e-05, |
|
"loss": 1.0531, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 490, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.36404120922088623, |
|
"learning_rate": 7.766445690973994e-05, |
|
"loss": 1.0447, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.4445250332355499, |
|
"learning_rate": 7.715451300356961e-05, |
|
"loss": 1.0526, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 510, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.3644183278083801, |
|
"learning_rate": 7.664456909739929e-05, |
|
"loss": 1.0494, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 520, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.34624868631362915, |
|
"learning_rate": 7.613462519122897e-05, |
|
"loss": 1.0572, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 530, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.3788256347179413, |
|
"learning_rate": 7.562468128505865e-05, |
|
"loss": 1.0502, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 540, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.3667903542518616, |
|
"learning_rate": 7.511473737888832e-05, |
|
"loss": 1.0505, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 550, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.37510526180267334, |
|
"learning_rate": 7.460479347271801e-05, |
|
"loss": 1.045, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 560, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.3509054183959961, |
|
"learning_rate": 7.409484956654769e-05, |
|
"loss": 1.0504, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 570, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.3294220268726349, |
|
"learning_rate": 7.358490566037736e-05, |
|
"loss": 1.0573, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 580, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.34325262904167175, |
|
"learning_rate": 7.307496175420703e-05, |
|
"loss": 1.0445, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 590, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.3679581880569458, |
|
"learning_rate": 7.256501784803672e-05, |
|
"loss": 1.0445, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 600, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.3220757246017456, |
|
"learning_rate": 7.20550739418664e-05, |
|
"loss": 1.0458, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 610, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.3660426139831543, |
|
"learning_rate": 7.154513003569607e-05, |
|
"loss": 1.0447, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 620, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.32533150911331177, |
|
"learning_rate": 7.103518612952576e-05, |
|
"loss": 1.0471, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 630, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.33115923404693604, |
|
"learning_rate": 7.052524222335543e-05, |
|
"loss": 1.0431, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 640, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.337576299905777, |
|
"learning_rate": 7.001529831718512e-05, |
|
"loss": 1.057, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 650, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.3337574303150177, |
|
"learning_rate": 6.950535441101478e-05, |
|
"loss": 1.0408, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 660, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.35560840368270874, |
|
"learning_rate": 6.899541050484447e-05, |
|
"loss": 1.0363, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 670, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.3454528748989105, |
|
"learning_rate": 6.848546659867415e-05, |
|
"loss": 1.0412, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 680, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.3608352541923523, |
|
"learning_rate": 6.797552269250382e-05, |
|
"loss": 1.0578, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 690, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.332792729139328, |
|
"learning_rate": 6.746557878633351e-05, |
|
"loss": 1.0426, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 700, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.4059067666530609, |
|
"learning_rate": 6.695563488016318e-05, |
|
"loss": 1.0441, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 710, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.3741580843925476, |
|
"learning_rate": 6.644569097399287e-05, |
|
"loss": 1.0525, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 720, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.3646301329135895, |
|
"learning_rate": 6.593574706782255e-05, |
|
"loss": 1.0302, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 730, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.35956060886383057, |
|
"learning_rate": 6.542580316165222e-05, |
|
"loss": 1.0439, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 740, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.3517419099807739, |
|
"learning_rate": 6.491585925548191e-05, |
|
"loss": 1.0314, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 750, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.33927640318870544, |
|
"learning_rate": 6.440591534931157e-05, |
|
"loss": 1.042, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 760, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.3502146005630493, |
|
"learning_rate": 6.389597144314126e-05, |
|
"loss": 1.0416, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 770, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.37221387028694153, |
|
"learning_rate": 6.338602753697093e-05, |
|
"loss": 1.0453, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 780, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.3718739449977875, |
|
"learning_rate": 6.287608363080062e-05, |
|
"loss": 1.0392, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 790, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.35249418020248413, |
|
"learning_rate": 6.23661397246303e-05, |
|
"loss": 1.0413, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 800, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.35906028747558594, |
|
"learning_rate": 6.185619581845997e-05, |
|
"loss": 1.0375, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 810, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.33932170271873474, |
|
"learning_rate": 6.134625191228966e-05, |
|
"loss": 1.0286, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 820, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.33107632398605347, |
|
"learning_rate": 6.0836308006119326e-05, |
|
"loss": 1.0319, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 830, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.32848185300827026, |
|
"learning_rate": 6.032636409994901e-05, |
|
"loss": 1.0329, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 840, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.33085334300994873, |
|
"learning_rate": 5.981642019377869e-05, |
|
"loss": 1.0326, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 850, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.3043057322502136, |
|
"learning_rate": 5.930647628760837e-05, |
|
"loss": 1.0379, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 860, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.3407464027404785, |
|
"learning_rate": 5.879653238143804e-05, |
|
"loss": 1.0336, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 870, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.34069886803627014, |
|
"learning_rate": 5.8286588475267726e-05, |
|
"loss": 1.0294, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 880, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.4303439259529114, |
|
"learning_rate": 5.777664456909741e-05, |
|
"loss": 1.0223, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 890, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.3378705382347107, |
|
"learning_rate": 5.7266700662927075e-05, |
|
"loss": 1.042, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 900, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.4227118194103241, |
|
"learning_rate": 5.6756756756756757e-05, |
|
"loss": 1.0301, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 910, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.36343687772750854, |
|
"learning_rate": 5.624681285058644e-05, |
|
"loss": 1.0249, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 920, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.37661993503570557, |
|
"learning_rate": 5.573686894441612e-05, |
|
"loss": 1.0201, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 930, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.38610127568244934, |
|
"learning_rate": 5.5226925038245794e-05, |
|
"loss": 1.0351, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 940, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.32767486572265625, |
|
"learning_rate": 5.4716981132075475e-05, |
|
"loss": 1.0364, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 950, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.36714789271354675, |
|
"learning_rate": 5.4207037225905157e-05, |
|
"loss": 1.0252, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 960, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.34143558144569397, |
|
"learning_rate": 5.369709331973484e-05, |
|
"loss": 1.0266, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 970, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.3287556767463684, |
|
"learning_rate": 5.3187149413564506e-05, |
|
"loss": 1.0265, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 980, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.33613139390945435, |
|
"learning_rate": 5.267720550739419e-05, |
|
"loss": 1.0215, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 990, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.32623520493507385, |
|
"learning_rate": 5.216726160122387e-05, |
|
"loss": 1.0213, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1000, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.3506806790828705, |
|
"learning_rate": 5.165731769505354e-05, |
|
"loss": 1.0302, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1010, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.3803856074810028, |
|
"learning_rate": 5.1147373788883224e-05, |
|
"loss": 1.021, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1020, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.3483767807483673, |
|
"learning_rate": 5.0637429882712906e-05, |
|
"loss": 1.0302, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1030, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.32279789447784424, |
|
"learning_rate": 5.012748597654259e-05, |
|
"loss": 1.0283, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1040, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.3579857349395752, |
|
"learning_rate": 4.961754207037226e-05, |
|
"loss": 1.0245, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1050, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.34313324093818665, |
|
"learning_rate": 4.910759816420194e-05, |
|
"loss": 1.0252, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1060, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.3254711925983429, |
|
"learning_rate": 4.859765425803162e-05, |
|
"loss": 1.0208, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1070, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.31281670928001404, |
|
"learning_rate": 4.80877103518613e-05, |
|
"loss": 1.0265, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1080, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.34415584802627563, |
|
"learning_rate": 4.7577766445690974e-05, |
|
"loss": 1.0241, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1090, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.3235465884208679, |
|
"learning_rate": 4.7067822539520655e-05, |
|
"loss": 1.0134, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1100, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.33159807324409485, |
|
"learning_rate": 4.655787863335033e-05, |
|
"loss": 1.0299, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1110, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.38500943779945374, |
|
"learning_rate": 4.604793472718002e-05, |
|
"loss": 1.0233, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1120, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.40598344802856445, |
|
"learning_rate": 4.553799082100969e-05, |
|
"loss": 1.0148, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1130, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.3900890350341797, |
|
"learning_rate": 4.5028046914839374e-05, |
|
"loss": 1.0104, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1140, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.34100133180618286, |
|
"learning_rate": 4.451810300866905e-05, |
|
"loss": 1.0137, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1150, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.3399648666381836, |
|
"learning_rate": 4.400815910249872e-05, |
|
"loss": 1.0157, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1160, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.3029673397541046, |
|
"learning_rate": 4.3498215196328404e-05, |
|
"loss": 1.0307, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1170, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.36426523327827454, |
|
"learning_rate": 4.2988271290158086e-05, |
|
"loss": 1.0277, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1180, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 0.33748626708984375, |
|
"learning_rate": 4.247832738398777e-05, |
|
"loss": 1.0119, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1190, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 0.31815487146377563, |
|
"learning_rate": 4.196838347781744e-05, |
|
"loss": 1.0277, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1200, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.32347097992897034, |
|
"learning_rate": 4.145843957164712e-05, |
|
"loss": 1.025, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1210, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 0.33658409118652344, |
|
"learning_rate": 4.09484956654768e-05, |
|
"loss": 1.0213, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1220, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 0.34646546840667725, |
|
"learning_rate": 4.043855175930648e-05, |
|
"loss": 1.0226, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1230, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 0.3330092430114746, |
|
"learning_rate": 3.992860785313616e-05, |
|
"loss": 1.0261, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1240, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 0.33180761337280273, |
|
"learning_rate": 3.9418663946965835e-05, |
|
"loss": 1.0199, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1250, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.33087068796157837, |
|
"learning_rate": 3.8908720040795516e-05, |
|
"loss": 1.0162, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1260, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 0.3305363059043884, |
|
"learning_rate": 3.839877613462519e-05, |
|
"loss": 1.0144, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1270, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 0.31946706771850586, |
|
"learning_rate": 3.788883222845487e-05, |
|
"loss": 1.0321, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1280, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 0.3293123245239258, |
|
"learning_rate": 3.737888832228455e-05, |
|
"loss": 1.0158, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1290, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 0.34182894229888916, |
|
"learning_rate": 3.6868944416114235e-05, |
|
"loss": 1.013, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1300, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 0.39147132635116577, |
|
"learning_rate": 3.635900050994391e-05, |
|
"loss": 1.0301, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1310, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 0.3073470890522003, |
|
"learning_rate": 3.5849056603773584e-05, |
|
"loss": 1.0097, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1320, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 0.3350085914134979, |
|
"learning_rate": 3.5339112697603265e-05, |
|
"loss": 1.0195, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1330, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 0.34246358275413513, |
|
"learning_rate": 3.482916879143294e-05, |
|
"loss": 1.0134, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1340, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 0.35482534766197205, |
|
"learning_rate": 3.431922488526262e-05, |
|
"loss": 1.0189, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1350, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 0.4100535809993744, |
|
"learning_rate": 3.38092809790923e-05, |
|
"loss": 1.023, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1360, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 0.3244258761405945, |
|
"learning_rate": 3.3299337072921984e-05, |
|
"loss": 1.0234, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1370, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 0.36571004986763, |
|
"learning_rate": 3.278939316675166e-05, |
|
"loss": 1.0238, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1380, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 0.3075992166996002, |
|
"learning_rate": 3.227944926058134e-05, |
|
"loss": 1.0118, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1390, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 0.338121622800827, |
|
"learning_rate": 3.1769505354411015e-05, |
|
"loss": 1.0036, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1400, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 0.34049445390701294, |
|
"learning_rate": 3.1259561448240696e-05, |
|
"loss": 0.9942, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1410, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 0.31676411628723145, |
|
"learning_rate": 3.074961754207038e-05, |
|
"loss": 1.0095, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1420, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 0.338091641664505, |
|
"learning_rate": 3.0239673635900052e-05, |
|
"loss": 1.0196, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1430, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 0.3421221673488617, |
|
"learning_rate": 2.9729729729729733e-05, |
|
"loss": 1.0077, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1440, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 0.31057775020599365, |
|
"learning_rate": 2.9219785823559408e-05, |
|
"loss": 0.9983, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1450, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 0.3217770755290985, |
|
"learning_rate": 2.8709841917389093e-05, |
|
"loss": 1.0079, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1460, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 0.31658121943473816, |
|
"learning_rate": 2.8199898011218767e-05, |
|
"loss": 1.0098, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1470, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 0.33192959427833557, |
|
"learning_rate": 2.7689954105048445e-05, |
|
"loss": 1.0187, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1480, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 0.3255631923675537, |
|
"learning_rate": 2.7180010198878126e-05, |
|
"loss": 0.9976, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1490, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 0.3432871699333191, |
|
"learning_rate": 2.66700662927078e-05, |
|
"loss": 1.009, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 0.32943135499954224, |
|
"learning_rate": 2.6160122386537482e-05, |
|
"loss": 1.0208, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1510, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.3141104578971863, |
|
"learning_rate": 2.565017848036716e-05, |
|
"loss": 1.0127, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1520, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 0.33610546588897705, |
|
"learning_rate": 2.5140234574196842e-05, |
|
"loss": 1.0109, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1530, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 0.3454563021659851, |
|
"learning_rate": 2.4630290668026516e-05, |
|
"loss": 1.0021, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1540, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 0.31666409969329834, |
|
"learning_rate": 2.4120346761856198e-05, |
|
"loss": 1.0061, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1550, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 0.3174314498901367, |
|
"learning_rate": 2.3610402855685876e-05, |
|
"loss": 1.0262, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1560, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 0.30132123827934265, |
|
"learning_rate": 2.3100458949515554e-05, |
|
"loss": 1.009, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1570, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 0.3597942292690277, |
|
"learning_rate": 2.2590515043345235e-05, |
|
"loss": 1.0138, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1580, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 0.3302927017211914, |
|
"learning_rate": 2.2080571137174913e-05, |
|
"loss": 0.9999, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1590, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 0.3189152181148529, |
|
"learning_rate": 2.157062723100459e-05, |
|
"loss": 1.0107, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1600, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 0.36191776394844055, |
|
"learning_rate": 2.106068332483427e-05, |
|
"loss": 1.0055, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1610, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.34229838848114014, |
|
"learning_rate": 2.0550739418663947e-05, |
|
"loss": 1.0216, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1620, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 0.38762491941452026, |
|
"learning_rate": 2.0040795512493625e-05, |
|
"loss": 1.0092, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1630, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 0.3563317656517029, |
|
"learning_rate": 1.9530851606323306e-05, |
|
"loss": 1.0001, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1640, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 0.3130199611186981, |
|
"learning_rate": 1.9020907700152984e-05, |
|
"loss": 1.0054, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1650, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 0.30740854144096375, |
|
"learning_rate": 1.8510963793982662e-05, |
|
"loss": 1.0125, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1660, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 0.3427772521972656, |
|
"learning_rate": 1.8001019887812343e-05, |
|
"loss": 1.0116, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1670, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 0.325802743434906, |
|
"learning_rate": 1.7491075981642018e-05, |
|
"loss": 1.0003, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1680, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 0.35025614500045776, |
|
"learning_rate": 1.69811320754717e-05, |
|
"loss": 1.0209, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1690, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 0.3218349814414978, |
|
"learning_rate": 1.6471188169301377e-05, |
|
"loss": 1.0133, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1700, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 0.31123262643814087, |
|
"learning_rate": 1.5961244263131055e-05, |
|
"loss": 1.0102, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1710, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 0.3084145784378052, |
|
"learning_rate": 1.5451300356960737e-05, |
|
"loss": 1.0113, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1720, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 0.3246035575866699, |
|
"learning_rate": 1.4941356450790415e-05, |
|
"loss": 1.0147, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1730, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 0.32198068499565125, |
|
"learning_rate": 1.4431412544620093e-05, |
|
"loss": 0.9947, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1740, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 0.32064589858055115, |
|
"learning_rate": 1.3921468638449772e-05, |
|
"loss": 1.0074, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1750, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 0.3246444761753082, |
|
"learning_rate": 1.3411524732279452e-05, |
|
"loss": 1.0125, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1760, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.33215099573135376, |
|
"learning_rate": 1.2901580826109128e-05, |
|
"loss": 1.0049, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1770, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 0.32475972175598145, |
|
"learning_rate": 1.2391636919938808e-05, |
|
"loss": 1.0132, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1780, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 0.3086671829223633, |
|
"learning_rate": 1.1881693013768486e-05, |
|
"loss": 0.9977, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1790, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 0.313253790140152, |
|
"learning_rate": 1.1371749107598164e-05, |
|
"loss": 0.9992, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1800, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 0.34922242164611816, |
|
"learning_rate": 1.0861805201427844e-05, |
|
"loss": 1.012, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1810, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.3331577777862549, |
|
"learning_rate": 1.0351861295257523e-05, |
|
"loss": 1.0024, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1820, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 0.32964372634887695, |
|
"learning_rate": 9.841917389087201e-06, |
|
"loss": 1.0143, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1830, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 0.3171873390674591, |
|
"learning_rate": 9.33197348291688e-06, |
|
"loss": 1.0094, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1840, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 0.3105911314487457, |
|
"learning_rate": 8.822029576746559e-06, |
|
"loss": 1.0078, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1850, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 0.29468920826911926, |
|
"learning_rate": 8.312085670576237e-06, |
|
"loss": 1.0091, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1860, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 0.3044416606426239, |
|
"learning_rate": 7.802141764405915e-06, |
|
"loss": 1.0013, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1870, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 0.3163166046142578, |
|
"learning_rate": 7.2921978582355945e-06, |
|
"loss": 0.9948, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1880, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 0.31333300471305847, |
|
"learning_rate": 6.782253952065273e-06, |
|
"loss": 1.0095, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1890, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 0.3076794147491455, |
|
"learning_rate": 6.272310045894952e-06, |
|
"loss": 1.0081, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1900, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 0.31765928864479065, |
|
"learning_rate": 5.762366139724631e-06, |
|
"loss": 1.0119, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1910, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 0.31293946504592896, |
|
"learning_rate": 5.252422233554309e-06, |
|
"loss": 1.0115, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1920, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 0.30405518412590027, |
|
"learning_rate": 4.742478327383989e-06, |
|
"loss": 1.009, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1930, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 0.30555757880210876, |
|
"learning_rate": 4.2325344212136666e-06, |
|
"loss": 1.0074, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1940, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 0.2989470362663269, |
|
"learning_rate": 3.7225905150433454e-06, |
|
"loss": 1.0065, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1950, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 0.2925046384334564, |
|
"learning_rate": 3.2126466088730238e-06, |
|
"loss": 1.0149, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1960, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 0.2972151041030884, |
|
"learning_rate": 2.702702702702703e-06, |
|
"loss": 1.0044, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1970, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 0.31795012950897217, |
|
"learning_rate": 2.192758796532382e-06, |
|
"loss": 0.9969, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1980, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 0.29080063104629517, |
|
"learning_rate": 1.6828148903620604e-06, |
|
"loss": 1.0104, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 1990, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 0.3263821005821228, |
|
"learning_rate": 1.1728709841917388e-06, |
|
"loss": 1.0022, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 24.39, |
|
"step": 2000, |
|
"total_memory_available (GB)": 94.62 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2022, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 1000, |
|
"total_flos": 2.238964389820498e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|