gemma-7b-sql
/
nemo
/checkpoints
/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last
Ctrl+K
- model.decoder.final_layernorm.weight
- model.decoder.layers.mlp.linear_fc1._extra_state
- model.decoder.layers.mlp.linear_fc1.layer_norm_weight
- model.decoder.layers.mlp.linear_fc1.weight
- model.decoder.layers.mlp.linear_fc2._extra_state
- model.decoder.layers.mlp.linear_fc2.weight
- model.decoder.layers.self_attention.linear_proj._extra_state
- model.decoder.layers.self_attention.linear_proj.weight
- model.decoder.layers.self_attention.linear_qkv._extra_state
- model.decoder.layers.self_attention.linear_qkv.layer_norm_weight
- model.decoder.layers.self_attention.linear_qkv.weight
- model.embedding.word_embeddings.weight
- optimizer.state.exp_avg.model.decoder.final_layernorm.weight
- optimizer.state.exp_avg.model.decoder.layers.mlp.linear_fc1.layer_norm_weight
- optimizer.state.exp_avg.model.decoder.layers.mlp.linear_fc1.weight
- optimizer.state.exp_avg.model.decoder.layers.mlp.linear_fc2.weight
- optimizer.state.exp_avg.model.decoder.layers.self_attention.linear_proj.weight
- optimizer.state.exp_avg.model.decoder.layers.self_attention.linear_qkv.layer_norm_weight
- optimizer.state.exp_avg.model.decoder.layers.self_attention.linear_qkv.weight
- optimizer.state.exp_avg.model.embedding.word_embeddings.weight
- optimizer.state.exp_avg_sq.model.decoder.final_layernorm.weight
- optimizer.state.exp_avg_sq.model.decoder.layers.mlp.linear_fc1.layer_norm_weight
- optimizer.state.exp_avg_sq.model.decoder.layers.mlp.linear_fc1.weight
- optimizer.state.exp_avg_sq.model.decoder.layers.mlp.linear_fc2.weight
- optimizer.state.exp_avg_sq.model.decoder.layers.self_attention.linear_proj.weight
- optimizer.state.exp_avg_sq.model.decoder.layers.self_attention.linear_qkv.layer_norm_weight
- optimizer.state.exp_avg_sq.model.decoder.layers.self_attention.linear_qkv.weight
- optimizer.state.exp_avg_sq.model.embedding.word_embeddings.weight
- optimizer.state.fp32_param.model.decoder.final_layernorm.weight
- optimizer.state.fp32_param.model.decoder.layers.mlp.linear_fc1.layer_norm_weight
- optimizer.state.fp32_param.model.decoder.layers.mlp.linear_fc2.weight
- optimizer.state.fp32_param.model.decoder.layers.self_attention.linear_proj.weight
- optimizer.state.fp32_param.model.decoder.layers.self_attention.linear_qkv.layer_norm_weight
- optimizer.state.fp32_param.model.decoder.layers.self_attention.linear_qkv.weight
- optimizer.state.fp32_param.model.embedding.word_embeddings.weight
- optimizer.state.param.model.decoder.layers.mlp.linear_fc1.weight
-
25.2 kB
xet
-
113 Bytes