Upload 2 files
Browse files- config.json +3 -1
- gpt_config.py +6 -1
config.json
CHANGED
@@ -1,7 +1,9 @@
|
|
1 |
{
|
|
|
2 |
"architectures": [
|
3 |
"XttsGPT"
|
4 |
],
|
|
|
5 |
"audio_config": {
|
6 |
"mel_channels": 80,
|
7 |
"output_sample_rate": 24000,
|
@@ -16,7 +18,6 @@
|
|
16 |
"gpt_batch_size": 1,
|
17 |
"gpt_max_audio_tokens": 605,
|
18 |
"hidden_size": 1024,
|
19 |
-
"n_inner": 4098,
|
20 |
"initializer_range": 0.02,
|
21 |
"kv_cache": true,
|
22 |
"layer_norm_epsilon": 1e-05,
|
@@ -24,6 +25,7 @@
|
|
24 |
"max_prompt_tokens": 70,
|
25 |
"max_text_tokens": 402,
|
26 |
"model_type": "xtts_gpt",
|
|
|
27 |
"num_attention_heads": 16,
|
28 |
"num_audio_tokens": 1026,
|
29 |
"num_hidden_layers": 30,
|
|
|
1 |
{
|
2 |
+
"activation_function": "gelu",
|
3 |
"architectures": [
|
4 |
"XttsGPT"
|
5 |
],
|
6 |
+
"attn_pdrop": 0.1,
|
7 |
"audio_config": {
|
8 |
"mel_channels": 80,
|
9 |
"output_sample_rate": 24000,
|
|
|
18 |
"gpt_batch_size": 1,
|
19 |
"gpt_max_audio_tokens": 605,
|
20 |
"hidden_size": 1024,
|
|
|
21 |
"initializer_range": 0.02,
|
22 |
"kv_cache": true,
|
23 |
"layer_norm_epsilon": 1e-05,
|
|
|
25 |
"max_prompt_tokens": 70,
|
26 |
"max_text_tokens": 402,
|
27 |
"model_type": "xtts_gpt",
|
28 |
+
"n_inner": 4098,
|
29 |
"num_attention_heads": 16,
|
30 |
"num_audio_tokens": 1026,
|
31 |
"num_hidden_layers": 30,
|
gpt_config.py
CHANGED
@@ -36,6 +36,7 @@ class XTTSGPTConfig(PretrainedConfig):
|
|
36 |
self,
|
37 |
# Model architecture
|
38 |
hidden_size: int = 1024, # gpt_n_model_channels in original
|
|
|
39 |
num_hidden_layers: int = 30, # gpt_layers in original
|
40 |
num_attention_heads: int = 16, # gpt_n_heads in original
|
41 |
|
@@ -82,6 +83,8 @@ class XTTSGPTConfig(PretrainedConfig):
|
|
82 |
"AutoConfig": "AstraMindAI/xtts2-gpt--gpt_config.XTTSGPTConfig",
|
83 |
"AutoModelForCausalLM": "AstraMindAI/xtts2-gpt--xtts2_gpt_modeling.XttsGPT",
|
84 |
},
|
|
|
|
|
85 |
**kwargs
|
86 |
):
|
87 |
super().__init__(**kwargs)
|
@@ -90,8 +93,10 @@ class XTTSGPTConfig(PretrainedConfig):
|
|
90 |
self.audio_config = GPTAudioConfig(
|
91 |
**audio_config if audio_config is not None else {}
|
92 |
)
|
93 |
-
|
|
|
94 |
self.hidden_size = hidden_size
|
|
|
95 |
self.num_hidden_layers = num_hidden_layers
|
96 |
self.num_attention_heads = num_attention_heads
|
97 |
|
|
|
36 |
self,
|
37 |
# Model architecture
|
38 |
hidden_size: int = 1024, # gpt_n_model_channels in original
|
39 |
+
n_inner: int = 4098,
|
40 |
num_hidden_layers: int = 30, # gpt_layers in original
|
41 |
num_attention_heads: int = 16, # gpt_n_heads in original
|
42 |
|
|
|
83 |
"AutoConfig": "AstraMindAI/xtts2-gpt--gpt_config.XTTSGPTConfig",
|
84 |
"AutoModelForCausalLM": "AstraMindAI/xtts2-gpt--xtts2_gpt_modeling.XttsGPT",
|
85 |
},
|
86 |
+
activation_function: str = "gelu",
|
87 |
+
attn_pdrop: float = 0.1,
|
88 |
**kwargs
|
89 |
):
|
90 |
super().__init__(**kwargs)
|
|
|
93 |
self.audio_config = GPTAudioConfig(
|
94 |
**audio_config if audio_config is not None else {}
|
95 |
)
|
96 |
+
self.activation_function = activation_function
|
97 |
+
self.attn_pdrop = attn_pdrop
|
98 |
self.hidden_size = hidden_size
|
99 |
+
self.n_inner = n_inner
|
100 |
self.num_hidden_layers = num_hidden_layers
|
101 |
self.num_attention_heads = num_attention_heads
|
102 |
|