qwen2-vl-scipaper / checkpoint-400 /trainer_state.json
ZelongWang's picture
Upload 17 files
7841c36 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.686635944700461,
"eval_steps": 500,
"global_step": 400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04608294930875576,
"grad_norm": 5.774722576141357,
"learning_rate": 4.999970619519374e-05,
"loss": 0.9942,
"num_input_tokens_seen": 6072,
"step": 5
},
{
"epoch": 0.09216589861751152,
"grad_norm": 4.252433776855469,
"learning_rate": 4.9998824787680656e-05,
"loss": 0.6317,
"num_input_tokens_seen": 11928,
"step": 10
},
{
"epoch": 0.1382488479262673,
"grad_norm": 1.8097654581069946,
"learning_rate": 4.999735579817769e-05,
"loss": 0.305,
"num_input_tokens_seen": 18040,
"step": 15
},
{
"epoch": 0.18433179723502305,
"grad_norm": 2.511850118637085,
"learning_rate": 4.9995299261212536e-05,
"loss": 0.1734,
"num_input_tokens_seen": 24016,
"step": 20
},
{
"epoch": 0.2304147465437788,
"grad_norm": 4.347301006317139,
"learning_rate": 4.999265522512283e-05,
"loss": 0.2654,
"num_input_tokens_seen": 29848,
"step": 25
},
{
"epoch": 0.2764976958525346,
"grad_norm": 1.3125089406967163,
"learning_rate": 4.998942375205502e-05,
"loss": 0.1326,
"num_input_tokens_seen": 35440,
"step": 30
},
{
"epoch": 0.3225806451612903,
"grad_norm": 2.6110188961029053,
"learning_rate": 4.998560491796287e-05,
"loss": 0.0996,
"num_input_tokens_seen": 41360,
"step": 35
},
{
"epoch": 0.3686635944700461,
"grad_norm": 2.0633397102355957,
"learning_rate": 4.998119881260576e-05,
"loss": 0.1085,
"num_input_tokens_seen": 47016,
"step": 40
},
{
"epoch": 0.4147465437788018,
"grad_norm": 1.6892552375793457,
"learning_rate": 4.997620553954645e-05,
"loss": 0.1384,
"num_input_tokens_seen": 53056,
"step": 45
},
{
"epoch": 0.4608294930875576,
"grad_norm": 2.2047786712646484,
"learning_rate": 4.997062521614876e-05,
"loss": 0.1176,
"num_input_tokens_seen": 58952,
"step": 50
},
{
"epoch": 0.5069124423963134,
"grad_norm": 2.8315374851226807,
"learning_rate": 4.996445797357477e-05,
"loss": 0.0888,
"num_input_tokens_seen": 64760,
"step": 55
},
{
"epoch": 0.5529953917050692,
"grad_norm": 2.423229932785034,
"learning_rate": 4.995770395678171e-05,
"loss": 0.0812,
"num_input_tokens_seen": 70672,
"step": 60
},
{
"epoch": 0.5990783410138248,
"grad_norm": 2.2459795475006104,
"learning_rate": 4.9950363324518584e-05,
"loss": 0.1233,
"num_input_tokens_seen": 76608,
"step": 65
},
{
"epoch": 0.6451612903225806,
"grad_norm": 2.267782211303711,
"learning_rate": 4.9942436249322444e-05,
"loss": 0.1446,
"num_input_tokens_seen": 82336,
"step": 70
},
{
"epoch": 0.6912442396313364,
"grad_norm": 2.2236413955688477,
"learning_rate": 4.993392291751431e-05,
"loss": 0.0736,
"num_input_tokens_seen": 88120,
"step": 75
},
{
"epoch": 0.7373271889400922,
"grad_norm": 1.833316683769226,
"learning_rate": 4.99248235291948e-05,
"loss": 0.1129,
"num_input_tokens_seen": 93808,
"step": 80
},
{
"epoch": 0.783410138248848,
"grad_norm": 2.8768177032470703,
"learning_rate": 4.991513829823945e-05,
"loss": 0.1426,
"num_input_tokens_seen": 99928,
"step": 85
},
{
"epoch": 0.8294930875576036,
"grad_norm": 2.096641778945923,
"learning_rate": 4.990486745229364e-05,
"loss": 0.1401,
"num_input_tokens_seen": 106288,
"step": 90
},
{
"epoch": 0.8755760368663594,
"grad_norm": 2.371711492538452,
"learning_rate": 4.9894011232767294e-05,
"loss": 0.1127,
"num_input_tokens_seen": 112136,
"step": 95
},
{
"epoch": 0.9216589861751152,
"grad_norm": 0.6439377665519714,
"learning_rate": 4.9882569894829144e-05,
"loss": 0.0524,
"num_input_tokens_seen": 117936,
"step": 100
},
{
"epoch": 0.967741935483871,
"grad_norm": 2.412799835205078,
"learning_rate": 4.987054370740083e-05,
"loss": 0.0728,
"num_input_tokens_seen": 124152,
"step": 105
},
{
"epoch": 1.0138248847926268,
"grad_norm": 2.985877513885498,
"learning_rate": 4.9857932953150465e-05,
"loss": 0.0449,
"num_input_tokens_seen": 129872,
"step": 110
},
{
"epoch": 1.0599078341013826,
"grad_norm": 3.5899670124053955,
"learning_rate": 4.984473792848607e-05,
"loss": 0.0506,
"num_input_tokens_seen": 136080,
"step": 115
},
{
"epoch": 1.1059907834101383,
"grad_norm": 2.940323829650879,
"learning_rate": 4.983095894354858e-05,
"loss": 0.0431,
"num_input_tokens_seen": 141712,
"step": 120
},
{
"epoch": 1.1520737327188941,
"grad_norm": 2.948409080505371,
"learning_rate": 4.981659632220455e-05,
"loss": 0.0318,
"num_input_tokens_seen": 147544,
"step": 125
},
{
"epoch": 1.1981566820276497,
"grad_norm": 1.2344759702682495,
"learning_rate": 4.9801650402038555e-05,
"loss": 0.0593,
"num_input_tokens_seen": 153344,
"step": 130
},
{
"epoch": 1.2442396313364055,
"grad_norm": 3.271662473678589,
"learning_rate": 4.9786121534345265e-05,
"loss": 0.0617,
"num_input_tokens_seen": 159160,
"step": 135
},
{
"epoch": 1.2903225806451613,
"grad_norm": 3.5447471141815186,
"learning_rate": 4.977001008412113e-05,
"loss": 0.0456,
"num_input_tokens_seen": 164760,
"step": 140
},
{
"epoch": 1.336405529953917,
"grad_norm": 4.001632213592529,
"learning_rate": 4.9753316430055894e-05,
"loss": 0.068,
"num_input_tokens_seen": 170624,
"step": 145
},
{
"epoch": 1.3824884792626728,
"grad_norm": 1.6239632368087769,
"learning_rate": 4.973604096452361e-05,
"loss": 0.0383,
"num_input_tokens_seen": 176480,
"step": 150
},
{
"epoch": 1.4285714285714286,
"grad_norm": 0.19192902743816376,
"learning_rate": 4.9718184093573475e-05,
"loss": 0.0351,
"num_input_tokens_seen": 182056,
"step": 155
},
{
"epoch": 1.4746543778801844,
"grad_norm": 3.1995279788970947,
"learning_rate": 4.969974623692023e-05,
"loss": 0.1139,
"num_input_tokens_seen": 188032,
"step": 160
},
{
"epoch": 1.52073732718894,
"grad_norm": 2.926187038421631,
"learning_rate": 4.9680727827934354e-05,
"loss": 0.0738,
"num_input_tokens_seen": 194112,
"step": 165
},
{
"epoch": 1.5668202764976957,
"grad_norm": 1.8316713571548462,
"learning_rate": 4.966112931363185e-05,
"loss": 0.0567,
"num_input_tokens_seen": 200368,
"step": 170
},
{
"epoch": 1.6129032258064515,
"grad_norm": 2.283552885055542,
"learning_rate": 4.964095115466373e-05,
"loss": 0.0793,
"num_input_tokens_seen": 206520,
"step": 175
},
{
"epoch": 1.6589861751152073,
"grad_norm": 2.575171709060669,
"learning_rate": 4.962019382530521e-05,
"loss": 0.0564,
"num_input_tokens_seen": 212328,
"step": 180
},
{
"epoch": 1.705069124423963,
"grad_norm": 3.201814651489258,
"learning_rate": 4.959885781344452e-05,
"loss": 0.0677,
"num_input_tokens_seen": 218456,
"step": 185
},
{
"epoch": 1.7511520737327189,
"grad_norm": 2.607283592224121,
"learning_rate": 4.9576943620571507e-05,
"loss": 0.0474,
"num_input_tokens_seen": 224240,
"step": 190
},
{
"epoch": 1.7972350230414746,
"grad_norm": 1.2559226751327515,
"learning_rate": 4.9554451761765766e-05,
"loss": 0.053,
"num_input_tokens_seen": 230456,
"step": 195
},
{
"epoch": 1.8433179723502304,
"grad_norm": 1.7003673315048218,
"learning_rate": 4.953138276568462e-05,
"loss": 0.0477,
"num_input_tokens_seen": 236448,
"step": 200
},
{
"epoch": 1.8894009216589862,
"grad_norm": 2.2908496856689453,
"learning_rate": 4.950773717455061e-05,
"loss": 0.0581,
"num_input_tokens_seen": 242536,
"step": 205
},
{
"epoch": 1.935483870967742,
"grad_norm": 3.7818005084991455,
"learning_rate": 4.948351554413879e-05,
"loss": 0.0583,
"num_input_tokens_seen": 248352,
"step": 210
},
{
"epoch": 1.9815668202764978,
"grad_norm": 1.7777669429779053,
"learning_rate": 4.945871844376369e-05,
"loss": 0.0245,
"num_input_tokens_seen": 253928,
"step": 215
},
{
"epoch": 2.0276497695852536,
"grad_norm": 1.5329607725143433,
"learning_rate": 4.94333464562659e-05,
"loss": 0.0354,
"num_input_tokens_seen": 259904,
"step": 220
},
{
"epoch": 2.0737327188940093,
"grad_norm": 2.1847078800201416,
"learning_rate": 4.940740017799833e-05,
"loss": 0.0352,
"num_input_tokens_seen": 265656,
"step": 225
},
{
"epoch": 2.119815668202765,
"grad_norm": 1.6991156339645386,
"learning_rate": 4.938088021881233e-05,
"loss": 0.0416,
"num_input_tokens_seen": 271576,
"step": 230
},
{
"epoch": 2.165898617511521,
"grad_norm": 3.675462484359741,
"learning_rate": 4.935378720204319e-05,
"loss": 0.047,
"num_input_tokens_seen": 277464,
"step": 235
},
{
"epoch": 2.2119815668202767,
"grad_norm": 1.8100879192352295,
"learning_rate": 4.9326121764495596e-05,
"loss": 0.0454,
"num_input_tokens_seen": 283592,
"step": 240
},
{
"epoch": 2.258064516129032,
"grad_norm": 2.6867613792419434,
"learning_rate": 4.929788455642864e-05,
"loss": 0.0647,
"num_input_tokens_seen": 289544,
"step": 245
},
{
"epoch": 2.3041474654377883,
"grad_norm": 0.8391556143760681,
"learning_rate": 4.9269076241540505e-05,
"loss": 0.0283,
"num_input_tokens_seen": 295480,
"step": 250
},
{
"epoch": 2.3502304147465436,
"grad_norm": 3.0261058807373047,
"learning_rate": 4.92396974969529e-05,
"loss": 0.0301,
"num_input_tokens_seen": 301680,
"step": 255
},
{
"epoch": 2.3963133640552994,
"grad_norm": 1.320726990699768,
"learning_rate": 4.920974901319515e-05,
"loss": 0.0416,
"num_input_tokens_seen": 307768,
"step": 260
},
{
"epoch": 2.442396313364055,
"grad_norm": 3.625941753387451,
"learning_rate": 4.917923149418792e-05,
"loss": 0.0504,
"num_input_tokens_seen": 313376,
"step": 265
},
{
"epoch": 2.488479262672811,
"grad_norm": 1.6434024572372437,
"learning_rate": 4.914814565722671e-05,
"loss": 0.0499,
"num_input_tokens_seen": 319592,
"step": 270
},
{
"epoch": 2.5345622119815667,
"grad_norm": 2.392733573913574,
"learning_rate": 4.911649223296499e-05,
"loss": 0.03,
"num_input_tokens_seen": 325392,
"step": 275
},
{
"epoch": 2.5806451612903225,
"grad_norm": 3.644463539123535,
"learning_rate": 4.9084271965397014e-05,
"loss": 0.0416,
"num_input_tokens_seen": 331264,
"step": 280
},
{
"epoch": 2.6267281105990783,
"grad_norm": 1.160701870918274,
"learning_rate": 4.905148561184033e-05,
"loss": 0.0639,
"num_input_tokens_seen": 337584,
"step": 285
},
{
"epoch": 2.672811059907834,
"grad_norm": 2.189955711364746,
"learning_rate": 4.901813394291801e-05,
"loss": 0.048,
"num_input_tokens_seen": 343632,
"step": 290
},
{
"epoch": 2.71889400921659,
"grad_norm": 2.4015591144561768,
"learning_rate": 4.898421774254051e-05,
"loss": 0.0465,
"num_input_tokens_seen": 349392,
"step": 295
},
{
"epoch": 2.7649769585253456,
"grad_norm": 1.5051063299179077,
"learning_rate": 4.894973780788722e-05,
"loss": 0.0512,
"num_input_tokens_seen": 355104,
"step": 300
},
{
"epoch": 2.8110599078341014,
"grad_norm": 2.82021427154541,
"learning_rate": 4.891469494938781e-05,
"loss": 0.0412,
"num_input_tokens_seen": 360824,
"step": 305
},
{
"epoch": 2.857142857142857,
"grad_norm": 1.578285574913025,
"learning_rate": 4.887908999070308e-05,
"loss": 0.0292,
"num_input_tokens_seen": 366792,
"step": 310
},
{
"epoch": 2.903225806451613,
"grad_norm": 2.392061710357666,
"learning_rate": 4.884292376870567e-05,
"loss": 0.0264,
"num_input_tokens_seen": 372536,
"step": 315
},
{
"epoch": 2.9493087557603688,
"grad_norm": 0.46661177277565,
"learning_rate": 4.880619713346039e-05,
"loss": 0.0402,
"num_input_tokens_seen": 378472,
"step": 320
},
{
"epoch": 2.9953917050691246,
"grad_norm": 2.8422093391418457,
"learning_rate": 4.876891094820417e-05,
"loss": 0.0214,
"num_input_tokens_seen": 383984,
"step": 325
},
{
"epoch": 3.0414746543778803,
"grad_norm": 1.581181526184082,
"learning_rate": 4.873106608932585e-05,
"loss": 0.0536,
"num_input_tokens_seen": 389896,
"step": 330
},
{
"epoch": 3.087557603686636,
"grad_norm": 2.1210877895355225,
"learning_rate": 4.869266344634556e-05,
"loss": 0.0295,
"num_input_tokens_seen": 395848,
"step": 335
},
{
"epoch": 3.133640552995392,
"grad_norm": 0.6473267078399658,
"learning_rate": 4.8653703921893766e-05,
"loss": 0.0209,
"num_input_tokens_seen": 401784,
"step": 340
},
{
"epoch": 3.1797235023041477,
"grad_norm": 3.651721239089966,
"learning_rate": 4.8614188431690125e-05,
"loss": 0.0348,
"num_input_tokens_seen": 407656,
"step": 345
},
{
"epoch": 3.225806451612903,
"grad_norm": 1.2039521932601929,
"learning_rate": 4.85741179045219e-05,
"loss": 0.0167,
"num_input_tokens_seen": 413344,
"step": 350
},
{
"epoch": 3.271889400921659,
"grad_norm": 2.5414485931396484,
"learning_rate": 4.853349328222219e-05,
"loss": 0.0567,
"num_input_tokens_seen": 419112,
"step": 355
},
{
"epoch": 3.3179723502304146,
"grad_norm": 0.12772409617900848,
"learning_rate": 4.849231551964771e-05,
"loss": 0.0483,
"num_input_tokens_seen": 425104,
"step": 360
},
{
"epoch": 3.3640552995391704,
"grad_norm": 1.686139702796936,
"learning_rate": 4.845058558465645e-05,
"loss": 0.0379,
"num_input_tokens_seen": 430920,
"step": 365
},
{
"epoch": 3.410138248847926,
"grad_norm": 1.3768738508224487,
"learning_rate": 4.840830445808483e-05,
"loss": 0.0447,
"num_input_tokens_seen": 436864,
"step": 370
},
{
"epoch": 3.456221198156682,
"grad_norm": 1.8329992294311523,
"learning_rate": 4.836547313372471e-05,
"loss": 0.0417,
"num_input_tokens_seen": 442880,
"step": 375
},
{
"epoch": 3.5023041474654377,
"grad_norm": 2.0811145305633545,
"learning_rate": 4.832209261830002e-05,
"loss": 0.0368,
"num_input_tokens_seen": 448536,
"step": 380
},
{
"epoch": 3.5483870967741935,
"grad_norm": 0.14438925683498383,
"learning_rate": 4.827816393144305e-05,
"loss": 0.0269,
"num_input_tokens_seen": 454664,
"step": 385
},
{
"epoch": 3.5944700460829493,
"grad_norm": 2.131544828414917,
"learning_rate": 4.823368810567056e-05,
"loss": 0.0387,
"num_input_tokens_seen": 460584,
"step": 390
},
{
"epoch": 3.640552995391705,
"grad_norm": 2.2278664112091064,
"learning_rate": 4.818866618635947e-05,
"loss": 0.0365,
"num_input_tokens_seen": 466352,
"step": 395
},
{
"epoch": 3.686635944700461,
"grad_norm": 0.2778138220310211,
"learning_rate": 4.814309923172227e-05,
"loss": 0.0394,
"num_input_tokens_seen": 472288,
"step": 400
}
],
"logging_steps": 5,
"max_steps": 3240,
"num_input_tokens_seen": 472288,
"num_train_epochs": 30,
"save_steps": 400,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 5598346769498112.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}