tlsdm65376
/

krx_Llama3.1_8b_instruct_M2_all_data_sg

Model card Files Files and versions

krx_Llama3.1_8b_instruct_M2_all_data_sg / checkpoint-948 /trainer_state.json

rememz's picture

Upload folder using huggingface_hub

2dbd225 verified 8 months ago

history blame contribute delete

3.87 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 3.0,
	"eval_steps": 1000,
	"global_step": 948,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.15822784810126583,
	"grad_norm": 0.22299839556217194,
	"learning_rate": 9.987121632940603e-05,
	"loss": 1.2749,
	"step": 50
	},
	{
	"epoch": 0.31645569620253167,
	"grad_norm": 0.29688388109207153,
	"learning_rate": 9.853447789039338e-05,
	"loss": 1.112,
	"step": 100
	},
	{
	"epoch": 0.47468354430379744,
	"grad_norm": 0.34126031398773193,
	"learning_rate": 9.578324417078541e-05,
	"loss": 1.0716,
	"step": 150
	},
	{
	"epoch": 0.6329113924050633,
	"grad_norm": 0.3432747423648834,
	"learning_rate": 9.169769749212656e-05,
	"loss": 1.0426,
	"step": 200
	},
	{
	"epoch": 0.7911392405063291,
	"grad_norm": 0.35207387804985046,
	"learning_rate": 8.639690757035497e-05,
	"loss": 1.0167,
	"step": 250
	},
	{
	"epoch": 0.9493670886075949,
	"grad_norm": 0.3663095533847809,
	"learning_rate": 8.003536133216028e-05,
	"loss": 0.9973,
	"step": 300
	},
	{
	"epoch": 1.1075949367088607,
	"grad_norm": 0.4503985643386841,
	"learning_rate": 7.279846052743947e-05,
	"loss": 0.9599,
	"step": 350
	},
	{
	"epoch": 1.2658227848101267,
	"grad_norm": 0.43278568983078003,
	"learning_rate": 6.489711835603232e-05,
	"loss": 0.9332,
	"step": 400
	},
	{
	"epoch": 1.4240506329113924,
	"grad_norm": 0.44808298349380493,
	"learning_rate": 5.6561612585365554e-05,
	"loss": 0.9252,
	"step": 450
	},
	{
	"epoch": 1.5822784810126582,
	"grad_norm": 0.45410802960395813,
	"learning_rate": 4.8034874304563326e-05,
	"loss": 0.9253,
	"step": 500
	},
	{
	"epoch": 1.740506329113924,
	"grad_norm": 0.47317686676979065,
	"learning_rate": 3.9565407908466436e-05,
	"loss": 0.9377,
	"step": 550
	},
	{
	"epoch": 1.8987341772151898,
	"grad_norm": 0.4796537756919861,
	"learning_rate": 3.140004865248845e-05,
	"loss": 0.9257,
	"step": 600
	},
	{
	"epoch": 2.0569620253164556,
	"grad_norm": 0.5124043822288513,
	"learning_rate": 2.3776768853093317e-05,
	"loss": 0.9008,
	"step": 650
	},
	{
	"epoch": 2.2151898734177213,
	"grad_norm": 0.5075053572654724,
	"learning_rate": 1.6917742390944913e-05,
	"loss": 0.8689,
	"step": 700
	},
	{
	"epoch": 2.3734177215189876,
	"grad_norm": 0.5190396904945374,
	"learning_rate": 1.1022869645771195e-05,
	"loss": 0.8626,
	"step": 750
	},
	{
	"epoch": 2.5316455696202533,
	"grad_norm": 0.5119550824165344,
	"learning_rate": 6.263951573102922e-06,
	"loss": 0.8686,
	"step": 800
	},
	{
	"epoch": 2.689873417721519,
	"grad_norm": 0.5137431025505066,
	"learning_rate": 2.7796827143696634e-06,
	"loss": 0.8754,
	"step": 850
	},
	{
	"epoch": 2.848101265822785,
	"grad_norm": 0.5047231912612915,
	"learning_rate": 6.716090647324702e-07,
	"loss": 0.8636,
	"step": 900
	}
	],
	"logging_steps": 50,
	"max_steps": 948,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 1000,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 3.516545844872479e+17,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}