Upload 7 files
Browse files- .gitattributes +1 -0
- D_0.pth +3 -0
- G_0.pth +3 -0
- README.md +43 -5
- config.json +48 -0
- model +3 -0
- model_0.pt +3 -0
- rmvpe.pt +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
model filter=lfs diff=lfs merge=lfs -text
|
D_0.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:273a1da965da0f3b51c7f630c3aa1bf0ef4739da4ab367a9f063a6e12058e8ce
|
3 |
+
size 187027770
|
G_0.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da86273856084312fcae6c6adc50f7149baab67693ea9f896117ad20c076dd2e
|
3 |
+
size 209268661
|
README.md
CHANGED
@@ -1,5 +1,43 @@
|
|
1 |
-
---
|
2 |
-
license: apache-2.0
|
3 |
-
|
4 |
-
|
5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
tags:
|
4 |
+
- Hibernates
|
5 |
+
- HVC-Audio-Convert
|
6 |
+
|
7 |
+
pipeline_tag: audio-to-audio
|
8 |
+
---
|
9 |
+
|
10 |
+
# HVC-Audio-Convert Base Models
|
11 |
+
|
12 |
+
## Overview
|
13 |
+
These models serve as the foundational components for HVC-Audio-Convert (Soft-VC Voice Conversion), an advanced voice conversion framework that combines SoftVC feature extraction with the VITS (Conditional Variational Autoencoder with Adversarial Learning) architecture.
|
14 |
+
|
15 |
+
## Key Features
|
16 |
+
- High-quality voice conversion capabilities
|
17 |
+
- Pre-trained on diverse vocal datasets
|
18 |
+
- Supports cross-lingual voice conversion
|
19 |
+
- Compatible with HVC-Audio-Convert v4.0 and newer
|
20 |
+
|
21 |
+
## Technical Details
|
22 |
+
- **Architecture**: Based on VITS (Conditional Variational Autoencoder)
|
23 |
+
- **Feature Extraction**: Hibernates content encoder
|
24 |
+
- **Training Data**: Curated multi-speaker datasets
|
25 |
+
- **Model Format**: PyTorch checkpoints
|
26 |
+
|
27 |
+
## Usage
|
28 |
+
1. Download the desired base model
|
29 |
+
2. Use with HVC-Audio-Convert framework
|
30 |
+
3. Fine-tune on target voice data
|
31 |
+
4. Perform voice conversion
|
32 |
+
|
33 |
+
## Requirements
|
34 |
+
- HVC-Audio-Convert framework
|
35 |
+
- Python 3.8+
|
36 |
+
- PyTorch 1.13.0+
|
37 |
+
- CUDA compatible GPU (recommended)
|
38 |
+
|
39 |
+
## License
|
40 |
+
This project is licensed under the Apache License 2.0 - see the LICENSE file for details.
|
41 |
+
|
42 |
+
## Citation
|
43 |
+
If you use these models in your research, please cite:
|
config.json
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"resblock": "2",
|
3 |
+
"num_gpus": 1,
|
4 |
+
"batch_size": 24,
|
5 |
+
"learning_rate": 0.00015,
|
6 |
+
"adam_b1": 0.85,
|
7 |
+
"adam_b2": 0.995,
|
8 |
+
"adam_eps": 1e-8,
|
9 |
+
"lr_decay": 0.9995,
|
10 |
+
"seed": 42,
|
11 |
+
|
12 |
+
"upsample_rates": [6, 6, 4, 2, 2],
|
13 |
+
"upsample_kernel_sizes": [12, 12, 8, 4, 4],
|
14 |
+
"upsample_initial_channel": 384,
|
15 |
+
"resblock_kernel_sizes": [3, 5, 7, 11],
|
16 |
+
"resblock_dilation_sizes": [[1,2,4], [1,2,4], [1,2,4], [1,2,4]],
|
17 |
+
"discriminator_periods": [2, 3, 5, 7, 11, 17, 23, 31],
|
18 |
+
|
19 |
+
"segment_size": 8192,
|
20 |
+
"num_mels": 100,
|
21 |
+
"num_freq": 2049,
|
22 |
+
"n_fft": 4096,
|
23 |
+
"hop_size": 256,
|
24 |
+
"win_size": 4096,
|
25 |
+
|
26 |
+
"sampling_rate": 48000,
|
27 |
+
|
28 |
+
"fmin": 10,
|
29 |
+
"fmax": 24000,
|
30 |
+
"fmax_for_loss": 18000,
|
31 |
+
|
32 |
+
"num_workers": 6,
|
33 |
+
|
34 |
+
"training": {
|
35 |
+
"epochs": 2000,
|
36 |
+
"save_interval": 10,
|
37 |
+
"validation_interval": 2,
|
38 |
+
"grad_clip": 4.0,
|
39 |
+
"warmup_steps": 1000,
|
40 |
+
"decay_steps": 50000
|
41 |
+
},
|
42 |
+
|
43 |
+
"dist_config": {
|
44 |
+
"dist_backend": "nccl",
|
45 |
+
"dist_url": "tcp://localhost:54322",
|
46 |
+
"world_size": 1
|
47 |
+
}
|
48 |
+
}
|
model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c576b63b7ed952161b70fad34e0562ace502ce689195520d8a2a6c051de29d6
|
3 |
+
size 56825430
|
model_0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:409452a27ab310f7a5897844d003d372a7357cc91c4a43562584a1714518cdf9
|
3 |
+
size 220895384
|
rmvpe.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19dc1809cf4cdb0a18db93441816bc327e14e5644b72eeaae5220560c6736fe2
|
3 |
+
size 368492925
|