ayjays132 commited on
Commit
5161b97
1 Parent(s): 8849d96

Create config.json

Browse files
Files changed (1) hide show
  1. config.json +154 -0
config.json ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ayjays132/CustomGPT2Conversational",
3
+ "activation_function": "gelu_new",
4
+ "advanced_model_options": {
5
+ "contextual_embeddings": {
6
+ "approaches": [
7
+ "contextual_attention_mechanisms",
8
+ "semantic_embedding_regularization"
9
+ ],
10
+ "enable": true
11
+ },
12
+ "dynamic_adaptation": {
13
+ "enable": true,
14
+ "techniques": [
15
+ "adaptive_layer_dropping",
16
+ "dynamic_context_window"
17
+ ]
18
+ },
19
+ "innovative_neuron_growth": {
20
+ "enable": true,
21
+ "strategies": [
22
+ "selective_neuron_pruning",
23
+ "progressive_neuron_expansion"
24
+ ]
25
+ },
26
+ "memory_optimization": {
27
+ "enable": true,
28
+ "methods": [
29
+ "gradient_checkpointing",
30
+ "memory-efficient_attention"
31
+ ]
32
+ },
33
+ "meta_learning": {
34
+ "approaches": [
35
+ "meta_learning_rate_adjustment",
36
+ "online_adaptation"
37
+ ],
38
+ "enable": true
39
+ },
40
+ "secret_advanced_options": {
41
+ "adaptive_token_embedding": {
42
+ "enable": true,
43
+ "strategies": [
44
+ "dynamic_embedding_resizing",
45
+ "contextual_embedding_scaling"
46
+ ]
47
+ },
48
+ "future_context_prediction": {
49
+ "enable": true,
50
+ "techniques": [
51
+ "lookahead_context_integration",
52
+ "predictive_attention_mechanisms"
53
+ ]
54
+ },
55
+ "multi_modal_integration": {
56
+ "enable": true,
57
+ "methods": [
58
+ "text_image_alignment",
59
+ "cross_modal_attention"
60
+ ]
61
+ }
62
+ }
63
+ },
64
+ "architectures": [
65
+ "GPT2LMHeadModel"
66
+ ],
67
+ "attn_pdrop": 0.1,
68
+ "bos_token_id": 50267,
69
+ "context_window": 20,
70
+ "contextual_embedding_dim": 1024,
71
+ "device": "cuda",
72
+ "dropout_rate": 0.1,
73
+ "early_stopping": true,
74
+ "embd_pdrop": 0.1,
75
+ "embedding_dim": 1024,
76
+ "eos_token_id": 50267,
77
+ "hidden_dim": 1024,
78
+ "initializer_range": 0.02,
79
+ "innovative_growth_capacity": 50000,
80
+ "integration_settings": {
81
+ "config_name": "config.json",
82
+ "load_from_transformers": true,
83
+ "pytorch_dump_folder_path": "./model_save",
84
+ "pytorch_model_bin_name": "pytorch_model.bin"
85
+ },
86
+ "layer_norm_epsilon": 1e-05,
87
+ "max_length": 1024,
88
+ "max_memory_size": 100000,
89
+ "max_neurons": 100,
90
+ "meta_learning_rate": 0.001,
91
+ "min_length": 50,
92
+ "model_type": "gpt2",
93
+ "n_ctx": 1024,
94
+ "n_embd": 1024,
95
+ "n_head": 16,
96
+ "n_inner": null,
97
+ "n_layer": 24,
98
+ "n_positions": 1024,
99
+ "no_repeat_ngram_size": 2,
100
+ "num_beams": 5,
101
+ "num_embeddings": 50257,
102
+ "num_heads": 64,
103
+ "num_layers": 24,
104
+ "output_attentions": true,
105
+ "output_hidden_states": true,
106
+ "pad_token_id": 50267,
107
+ "reorder_and_upcast_attn": false,
108
+ "resid_pdrop": 0.1,
109
+ "scale_attn_by_inverse_layer_idx": false,
110
+ "scale_attn_weights": true,
111
+ "sep_token_id": 50267,
112
+ "special_tokens": {
113
+ "additional_special_tokens": [
114
+ "<greeting>",
115
+ "<farewell>",
116
+ "<thank>",
117
+ "<apology>"
118
+ ],
119
+ "bos_token": "<bos>",
120
+ "cls_token": "<cls>",
121
+ "eos_token": "<eos>",
122
+ "mask_token": "<mask>",
123
+ "pad_token": "<pad>",
124
+ "sep_token": "<sep>",
125
+ "unk_token": "<unk>"
126
+ },
127
+ "state_shape": null,
128
+ "summary_activation": null,
129
+ "summary_first_dropout": 0.1,
130
+ "summary_proj_to_labels": true,
131
+ "summary_type": "cls_index",
132
+ "summary_use_proj": true,
133
+ "target_q_model": null,
134
+ "task_specific_params": {
135
+ "text-generation": {
136
+ "do_sample": true,
137
+ "early_stopping": true,
138
+ "length_penalty": 1.0,
139
+ "max_length": 2048,
140
+ "min_length": 64,
141
+ "no_repeat_ngram_size": 2,
142
+ "num_beams": 8,
143
+ "num_return_sequences": 3,
144
+ "repetition_penalty": 1.2,
145
+ "temperature": 0.9,
146
+ "top_k": 50,
147
+ "top_p": 0.95
148
+ }
149
+ },
150
+ "torch_dtype": "float32",
151
+ "transformers_version": "4.44.0",
152
+ "use_cache": true,
153
+ "vocab_size": 50257
154
+ }