akahana
/

wikipedia-gpt2

Model card Files Files and versions Metrics Training metrics Community

wikipedia-gpt2 / trainer_state.json

akahana's picture

Upload folder using huggingface_hub

86b39d6 verified about 1 month ago

history blame contribute delete

3.09 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.0,
	"eval_steps": 1000,
	"global_step": 12141,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.08236553825879252,
	"grad_norm": 2.806833267211914,
	"learning_rate": 1.835268923482415e-05,
	"loss": 7.3233,
	"step": 1000
	},
	{
	"epoch": 0.16473107651758503,
	"grad_norm": 2.414405345916748,
	"learning_rate": 1.67053784696483e-05,
	"loss": 6.428,
	"step": 2000
	},
	{
	"epoch": 0.24709661477637757,
	"grad_norm": 2.7336931228637695,
	"learning_rate": 1.505806770447245e-05,
	"loss": 6.121,
	"step": 3000
	},
	{
	"epoch": 0.32946215303517007,
	"grad_norm": 2.707063674926758,
	"learning_rate": 1.3410756939296599e-05,
	"loss": 5.8761,
	"step": 4000
	},
	{
	"epoch": 0.4118276912939626,
	"grad_norm": 2.918213129043579,
	"learning_rate": 1.176344617412075e-05,
	"loss": 5.7252,
	"step": 5000
	},
	{
	"epoch": 0.49419322955275513,
	"grad_norm": 2.722219944000244,
	"learning_rate": 1.0116135408944898e-05,
	"loss": 5.5951,
	"step": 6000
	},
	{
	"epoch": 0.5765587678115477,
	"grad_norm": 3.1989521980285645,
	"learning_rate": 8.470471954534224e-06,
	"loss": 5.4935,
	"step": 7000
	},
	{
	"epoch": 0.6589243060703401,
	"grad_norm": 3.0108842849731445,
	"learning_rate": 6.823161189358373e-06,
	"loss": 5.4158,
	"step": 8000
	},
	{
	"epoch": 0.7412898443291327,
	"grad_norm": 2.9963932037353516,
	"learning_rate": 5.177497734947698e-06,
	"loss": 5.3553,
	"step": 9000
	},
	{
	"epoch": 0.8236553825879253,
	"grad_norm": 3.4173614978790283,
	"learning_rate": 3.530186969771848e-06,
	"loss": 5.2982,
	"step": 10000
	},
	{
	"epoch": 0.9060209208467177,
	"grad_norm": 2.9771502017974854,
	"learning_rate": 1.884523515361173e-06,
	"loss": 5.2726,
	"step": 11000
	},
	{
	"epoch": 0.9883864591055103,
	"grad_norm": 3.5910282135009766,
	"learning_rate": 2.3721275018532246e-07,
	"loss": 5.2688,
	"step": 12000
	},
	{
	"epoch": 1.0,
	"step": 12141,
	"total_flos": 2.5377466023936e+16,
	"train_loss": 5.757713494451266,
	"train_runtime": 5411.3319,
	"train_samples_per_second": 17.948,
	"train_steps_per_second": 2.244
	}
	],
	"logging_steps": 1000,
	"max_steps": 12141,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 5000,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 2.5377466023936e+16,
	"train_batch_size": 8,
	"trial_name": null,
	"trial_params": null
	}