TongZheng1999
/

alma-13b-sft-group-3-max-tokens-512

Model card Files Files and versions Community

alma-13b-sft-group-3-max-tokens-512 / checkpoint-360 /trainer_state.json

TongZheng1999's picture

Upload folder using huggingface_hub

9bb0c22 verified 8 days ago

history blame contribute delete

4.21 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.9982668977469671,
	"eval_steps": 18,
	"global_step": 360,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.04991334488734835,
	"grad_norm": 0.4083130955696106,
	"learning_rate": 0.002,
	"loss": 1.2589,
	"step": 18
	},
	{
	"epoch": 0.0998266897746967,
	"grad_norm": 0.3094785809516907,
	"learning_rate": 0.0008944271909999159,
	"loss": 1.0636,
	"step": 36
	},
	{
	"epoch": 0.14974003466204505,
	"grad_norm": 0.37615087628364563,
	"learning_rate": 0.0006488856845230502,
	"loss": 0.8574,
	"step": 54
	},
	{
	"epoch": 0.1996533795493934,
	"grad_norm": 0.2839493453502655,
	"learning_rate": 0.0005345224838248488,
	"loss": 0.8292,
	"step": 72
	},
	{
	"epoch": 0.24956672443674177,
	"grad_norm": 0.2941131889820099,
	"learning_rate": 0.00046499055497527714,
	"loss": 0.7934,
	"step": 90
	},
	{
	"epoch": 0.2994800693240901,
	"grad_norm": 0.3659161627292633,
	"learning_rate": 0.0004170288281141495,
	"loss": 0.7727,
	"step": 108
	},
	{
	"epoch": 0.3493934142114385,
	"grad_norm": 0.3437303304672241,
	"learning_rate": 0.00038138503569823694,
	"loss": 0.7557,
	"step": 126
	},
	{
	"epoch": 0.3993067590987868,
	"grad_norm": 0.2811639904975891,
	"learning_rate": 0.00035355339059327376,
	"loss": 0.7285,
	"step": 144
	},
	{
	"epoch": 0.44922010398613516,
	"grad_norm": 0.35479724407196045,
	"learning_rate": 0.00033104235544094716,
	"loss": 0.7107,
	"step": 162
	},
	{
	"epoch": 0.49913344887348354,
	"grad_norm": 0.3011772036552429,
	"learning_rate": 0.0003123475237772121,
	"loss": 0.7186,
	"step": 180
	},
	{
	"epoch": 0.5490467937608319,
	"grad_norm": 0.29623347520828247,
	"learning_rate": 0.00029649972666444046,
	"loss": 0.6818,
	"step": 198
	},
	{
	"epoch": 0.5989601386481802,
	"grad_norm": 0.3092997074127197,
	"learning_rate": 0.000282842712474619,
	"loss": 0.6701,
	"step": 216
	},
	{
	"epoch": 0.6488734835355287,
	"grad_norm": 0.32858991622924805,
	"learning_rate": 0.00027091418459143857,
	"loss": 0.6733,
	"step": 234
	},
	{
	"epoch": 0.698786828422877,
	"grad_norm": 0.3046702742576599,
	"learning_rate": 0.0002603778219616478,
	"loss": 0.6523,
	"step": 252
	},
	{
	"epoch": 0.7487001733102253,
	"grad_norm": 0.41049444675445557,
	"learning_rate": 0.00025098232205526344,
	"loss": 0.6473,
	"step": 270
	},
	{
	"epoch": 0.7986135181975736,
	"grad_norm": 0.35009312629699707,
	"learning_rate": 0.00024253562503633296,
	"loss": 0.6309,
	"step": 288
	},
	{
	"epoch": 0.848526863084922,
	"grad_norm": 0.3388204276561737,
	"learning_rate": 0.0002348880878058814,
	"loss": 0.6264,
	"step": 306
	},
	{
	"epoch": 0.8984402079722703,
	"grad_norm": 0.28809213638305664,
	"learning_rate": 0.0002279211529192759,
	"loss": 0.6046,
	"step": 324
	},
	{
	"epoch": 0.9483535528596188,
	"grad_norm": 0.3333686292171478,
	"learning_rate": 0.0002215395102486845,
	"loss": 0.5891,
	"step": 342
	},
	{
	"epoch": 0.9982668977469671,
	"grad_norm": 0.3479894697666168,
	"learning_rate": 0.00021566554640687683,
	"loss": 0.5952,
	"step": 360
	}
	],
	"logging_steps": 18,
	"max_steps": 360,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 18,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 3.656326962122588e+18,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}