ayjays132 commited on
Commit
13604d8
1 Parent(s): e311507

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +104 -9
config.json CHANGED
@@ -1,23 +1,54 @@
1
  {
2
  "activation_function": "gelu_new",
 
 
 
 
 
 
 
3
  "architectures": [
4
- "GPT2LMHeadModel"
5
  ],
6
  "attn_pdrop": 0.1,
7
- "bos_token_id": 50256,
 
 
 
8
  "embd_pdrop": 0.1,
9
- "eos_token_id": 50256,
 
 
10
  "initializer_range": 0.02,
 
 
 
 
 
 
 
11
  "layer_norm_epsilon": 1e-05,
 
 
 
 
12
  "model_type": "gpt2",
13
- "n_ctx": 2048,
14
- "n_embd": 2048,
15
  "n_head": 16,
 
16
  "n_layer": 24,
17
  "n_positions": 2048,
18
- "n_special": 0,
19
- "predict_special_tokens": true,
 
 
 
 
 
20
  "resid_pdrop": 0.1,
 
 
 
21
  "summary_activation": null,
22
  "summary_first_dropout": 0.1,
23
  "summary_proj_to_labels": true,
@@ -26,8 +57,72 @@
26
  "task_specific_params": {
27
  "text-generation": {
28
  "do_sample": true,
29
- "max_length": 50
 
 
 
 
 
 
30
  }
31
  },
32
- "vocab_size": 50257
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  }
 
1
  {
2
  "activation_function": "gelu_new",
3
+ "adaptation_rate": 0.05,
4
+ "additional_special_tokens": [
5
+ "<greeting>",
6
+ "<farewell>",
7
+ "<thank>",
8
+ "<apology>"
9
+ ],
10
  "architectures": [
11
+ "GPT2Model"
12
  ],
13
  "attn_pdrop": 0.1,
14
+ "bos_token_id": -1,
15
+ "contextual_embedding_dim": 2048,
16
+ "device": "cuda",
17
+ "dropout_rate": 0.1,
18
  "embd_pdrop": 0.1,
19
+ "embedding_dim": 2048,
20
+ "eos_token_id": -1,
21
+ "hidden_dim": 2048,
22
  "initializer_range": 0.02,
23
+ "innovative_growth_capacity": 50000,
24
+ "integration_settings": {
25
+ "config_name": "config.json",
26
+ "load_from_transformers": true,
27
+ "pytorch_dump_folder_path": "./model_save",
28
+ "pytorch_model_bin_name": "pytorch_model.bin"
29
+ },
30
  "layer_norm_epsilon": 1e-05,
31
+ "lstm_hidden_dim": 2048,
32
+ "max_memory_size": 100000,
33
+ "max_neurons": 100,
34
+ "meta_learning_rate": 0.001,
35
  "model_type": "gpt2",
36
+ "n_embd": 768,
 
37
  "n_head": 16,
38
+ "n_inner": null,
39
  "n_layer": 24,
40
  "n_positions": 2048,
41
+ "num_embeddings": 50268,
42
+ "num_heads": 64,
43
+ "num_layers": 24,
44
+ "output_attentions": true,
45
+ "output_hidden_states": true,
46
+ "pad_token_id": -100,
47
+ "reorder_and_upcast_attn": false,
48
  "resid_pdrop": 0.1,
49
+ "scale_attn_by_inverse_layer_idx": false,
50
+ "scale_attn_weights": true,
51
+ "sep_token_id": -1,
52
  "summary_activation": null,
53
  "summary_first_dropout": 0.1,
54
  "summary_proj_to_labels": true,
 
57
  "task_specific_params": {
58
  "text-generation": {
59
  "do_sample": true,
60
+ "length_penalty": 1.0,
61
+ "max_length": 50,
62
+ "no_repeat_ngram_size": 2,
63
+ "repetition_penalty": 1.2,
64
+ "temperature": 0.9,
65
+ "top_k": 50,
66
+ "top_p": 0.95
67
  }
68
  },
69
+ "return_dict": true,
70
+ "torchscript": false,
71
+ "torch_dtype": null,
72
+ "use_bfloat16": false,
73
+ "tf_legacy_loss": false,
74
+ "pruned_heads": {},
75
+ "tie_word_embeddings": true,
76
+ "chunk_size_feed_forward": 0,
77
+ "is_encoder_decoder": false,
78
+ "is_decoder": false,
79
+ "cross_attention_hidden_size": null,
80
+ "add_cross_attention": false,
81
+ "tie_encoder_decoder": false,
82
+ "max_length": 20,
83
+ "min_length": 0,
84
+ "do_sample": false,
85
+ "early_stopping": false,
86
+ "num_beams": 1,
87
+ "num_beam_groups": 1,
88
+ "diversity_penalty": 0.0,
89
+ "temperature": 1.0,
90
+ "top_k": 50,
91
+ "top_p": 1.0,
92
+ "typical_p": 1.0,
93
+ "repetition_penalty": 1.0,
94
+ "length_penalty": 1.0,
95
+ "no_repeat_ngram_size": 0,
96
+ "encoder_no_repeat_ngram_size": 0,
97
+ "bad_words_ids": null,
98
+ "num_return_sequences": 1,
99
+ "output_scores": false,
100
+ "return_dict_in_generate": false,
101
+ "forced_bos_token_id": null,
102
+ "forced_eos_token_id": null,
103
+ "remove_invalid_values": false,
104
+ "exponential_decay_length_penalty": null,
105
+ "suppress_tokens": null,
106
+ "begin_suppress_tokens": null,
107
+ "finetuning_task": null,
108
+ "id2label": {
109
+ "0": "LABEL_0",
110
+ "1": "LABEL_1"
111
+ },
112
+ "label2id": {
113
+ "LABEL_0": 0,
114
+ "LABEL_1": 1
115
+ },
116
+ "tokenizer_class": null,
117
+ "prefix": null,
118
+ "decoder_start_token_id": null,
119
+ "problem_type": null,
120
+ "transformers_version": null,
121
+ "vocab_size": 50281,
122
+ "context_window": 20,
123
+ "env": null,
124
+ "state_shape": null,
125
+ "action_size": 50257,
126
+ "q_model": null,
127
+ "target_q_model": null
128
  }