CocoRoF
/

KoModernBERT-base-mlm-v04-retry-model-chp10

Generated from Trainer

Inference Endpoints

Model card Files Files and versions Community

KoModernBERT-base-mlm-v04-retry-model-chp10 / last-checkpoint /trainer_state.json

CocoRoF's picture

Training in progress, step 188, checkpoint

a9bcdd1 verified 19 days ago

history blame contribute delete

3.93 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.9972648155822628,
	"eval_steps": 100,
	"global_step": 188,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.05304600082884376,
	"grad_norm": 44.375,
	"learning_rate": 9.998963956030293e-07,
	"loss": 60.1259,
	"step": 10
	},
	{
	"epoch": 0.10609200165768752,
	"grad_norm": 18.46875,
	"learning_rate": 9.997927912060587e-07,
	"loss": 32.1406,
	"step": 20
	},
	{
	"epoch": 0.1591380024865313,
	"grad_norm": 14.609375,
	"learning_rate": 9.99689186809088e-07,
	"loss": 29.5838,
	"step": 30
	},
	{
	"epoch": 0.21218400331537504,
	"grad_norm": 14.71875,
	"learning_rate": 9.995855824121175e-07,
	"loss": 28.0137,
	"step": 40
	},
	{
	"epoch": 0.2652300041442188,
	"grad_norm": 21.984375,
	"learning_rate": 9.994819780151468e-07,
	"loss": 26.8252,
	"step": 50
	},
	{
	"epoch": 0.3182760049730626,
	"grad_norm": 12.4296875,
	"learning_rate": 9.993783736181764e-07,
	"loss": 26.4244,
	"step": 60
	},
	{
	"epoch": 0.37132200580190633,
	"grad_norm": 12.203125,
	"learning_rate": 9.992747692212056e-07,
	"loss": 26.1259,
	"step": 70
	},
	{
	"epoch": 0.4243680066307501,
	"grad_norm": 10.3828125,
	"learning_rate": 9.991711648242352e-07,
	"loss": 25.4777,
	"step": 80
	},
	{
	"epoch": 0.4774140074595939,
	"grad_norm": 14.0546875,
	"learning_rate": 9.990675604272645e-07,
	"loss": 25.3432,
	"step": 90
	},
	{
	"epoch": 0.5304600082884376,
	"grad_norm": 13.203125,
	"learning_rate": 9.98963956030294e-07,
	"loss": 24.9725,
	"step": 100
	},
	{
	"epoch": 0.5304600082884376,
	"eval_loss": 0.3865843713283539,
	"eval_runtime": 12.0567,
	"eval_samples_per_second": 821.122,
	"eval_steps_per_second": 25.712,
	"step": 100
	},
	{
	"epoch": 0.5835060091172813,
	"grad_norm": 13.484375,
	"learning_rate": 9.988603516333233e-07,
	"loss": 24.8423,
	"step": 110
	},
	{
	"epoch": 0.6365520099461252,
	"grad_norm": 12.78125,
	"learning_rate": 9.987567472363527e-07,
	"loss": 24.5701,
	"step": 120
	},
	{
	"epoch": 0.689598010774969,
	"grad_norm": 13.5625,
	"learning_rate": 9.98653142839382e-07,
	"loss": 24.5008,
	"step": 130
	},
	{
	"epoch": 0.7426440116038127,
	"grad_norm": 10.8046875,
	"learning_rate": 9.985495384424114e-07,
	"loss": 24.1657,
	"step": 140
	},
	{
	"epoch": 0.7956900124326565,
	"grad_norm": 15.5078125,
	"learning_rate": 9.984459340454408e-07,
	"loss": 24.2864,
	"step": 150
	},
	{
	"epoch": 0.8487360132615002,
	"grad_norm": 11.609375,
	"learning_rate": 9.983423296484702e-07,
	"loss": 24.0425,
	"step": 160
	},
	{
	"epoch": 0.901782014090344,
	"grad_norm": 11.1171875,
	"learning_rate": 9.982387252514998e-07,
	"loss": 24.1412,
	"step": 170
	},
	{
	"epoch": 0.9548280149191878,
	"grad_norm": 15.390625,
	"learning_rate": 9.98135120854529e-07,
	"loss": 23.766,
	"step": 180
	}
	],
	"logging_steps": 10,
	"max_steps": 188,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 100,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 5.1908991489828454e+17,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}