atsuki-yamaguchi commited on
Commit
b1a7f79
1 Parent(s): de6faf8

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +35 -35
config.json CHANGED
@@ -1,36 +1,36 @@
1
  {
2
- "_name_or_path": "/jmain02/home/J2AD003/txk102/shared/models/gemma-2-9b-si_2^15_50000-mean",
3
- "architectures": [
4
- "Gemma2ForMultiCausalLM"
5
- ],
6
- "attention_bias": false,
7
- "attention_dropout": 0.0,
8
- "attn_logit_softcapping": 50.0,
9
- "bos_token_id": 2,
10
- "cache_implementation": "hybrid",
11
- "copy_lm_head": true,
12
- "eos_token_id": 1,
13
- "final_logit_softcapping": 30.0,
14
- "head_dim": 256,
15
- "hidden_act": "gelu_pytorch_tanh",
16
- "hidden_activation": "gelu_pytorch_tanh",
17
- "hidden_size": 3584,
18
- "initializer_range": 0.02,
19
- "intermediate_size": 14336,
20
- "max_position_embeddings": 8192,
21
- "model_type": "gemma2",
22
- "num_attention_heads": 16,
23
- "num_hidden_layers": 42,
24
- "num_key_value_heads": 8,
25
- "num_lm_heads": 1,
26
- "pad_token_id": 0,
27
- "query_pre_attn_scalar": 256,
28
- "rms_norm_eps": 1e-06,
29
- "rope_theta": 10000.0,
30
- "sliding_window": 4096,
31
- "sliding_window_size": 4096,
32
- "torch_dtype": "float16",
33
- "transformers_version": "4.44.0.dev0",
34
- "use_cache": true,
35
- "vocab_size": 256104
36
- }
 
1
  {
2
+ "_name_or_path": "google/gemma-2-9b",
3
+ "architectures": [
4
+ "Gemma2ForMultiCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "attn_logit_softcapping": 50.0,
9
+ "bos_token_id": 2,
10
+ "cache_implementation": "hybrid",
11
+ "copy_lm_head": true,
12
+ "eos_token_id": 1,
13
+ "final_logit_softcapping": 30.0,
14
+ "head_dim": 256,
15
+ "hidden_act": "gelu_pytorch_tanh",
16
+ "hidden_activation": "gelu_pytorch_tanh",
17
+ "hidden_size": 3584,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 14336,
20
+ "max_position_embeddings": 8192,
21
+ "model_type": "gemma2",
22
+ "num_attention_heads": 16,
23
+ "num_hidden_layers": 42,
24
+ "num_key_value_heads": 8,
25
+ "num_lm_heads": 1,
26
+ "pad_token_id": 0,
27
+ "query_pre_attn_scalar": 256,
28
+ "rms_norm_eps": 1e-06,
29
+ "rope_theta": 10000.0,
30
+ "sliding_window": 4096,
31
+ "sliding_window_size": 4096,
32
+ "torch_dtype": "float16",
33
+ "transformers_version": "4.44.0.dev0",
34
+ "use_cache": true,
35
+ "vocab_size": 256104
36
+ }