hibernatesai commited on
Commit
d3912b3
·
verified ·
1 Parent(s): d65a438

Upload 7 files

Browse files
Files changed (8) hide show
  1. .gitattributes +1 -0
  2. D_0.pth +3 -0
  3. G_0.pth +3 -0
  4. README.md +43 -5
  5. config.json +48 -0
  6. model +3 -0
  7. model_0.pt +3 -0
  8. rmvpe.pt +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model filter=lfs diff=lfs merge=lfs -text
D_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:273a1da965da0f3b51c7f630c3aa1bf0ef4739da4ab367a9f063a6e12058e8ce
3
+ size 187027770
G_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da86273856084312fcae6c6adc50f7149baab67693ea9f896117ad20c076dd2e
3
+ size 209268661
README.md CHANGED
@@ -1,5 +1,43 @@
1
- ---
2
- license: apache-2.0
3
- license_name: apache2.0
4
- license_link: LICENSE
5
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ tags:
4
+ - Hibernates
5
+ - HVC-Audio-Convert
6
+
7
+ pipeline_tag: audio-to-audio
8
+ ---
9
+
10
+ # HVC-Audio-Convert Base Models
11
+
12
+ ## Overview
13
+ These models serve as the foundational components for HVC-Audio-Convert (Soft-VC Voice Conversion), an advanced voice conversion framework that combines SoftVC feature extraction with the VITS (Conditional Variational Autoencoder with Adversarial Learning) architecture.
14
+
15
+ ## Key Features
16
+ - High-quality voice conversion capabilities
17
+ - Pre-trained on diverse vocal datasets
18
+ - Supports cross-lingual voice conversion
19
+ - Compatible with HVC-Audio-Convert v4.0 and newer
20
+
21
+ ## Technical Details
22
+ - **Architecture**: Based on VITS (Conditional Variational Autoencoder)
23
+ - **Feature Extraction**: Hibernates content encoder
24
+ - **Training Data**: Curated multi-speaker datasets
25
+ - **Model Format**: PyTorch checkpoints
26
+
27
+ ## Usage
28
+ 1. Download the desired base model
29
+ 2. Use with HVC-Audio-Convert framework
30
+ 3. Fine-tune on target voice data
31
+ 4. Perform voice conversion
32
+
33
+ ## Requirements
34
+ - HVC-Audio-Convert framework
35
+ - Python 3.8+
36
+ - PyTorch 1.13.0+
37
+ - CUDA compatible GPU (recommended)
38
+
39
+ ## License
40
+ This project is licensed under the Apache License 2.0 - see the LICENSE file for details.
41
+
42
+ ## Citation
43
+ If you use these models in your research, please cite:
config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "resblock": "2",
3
+ "num_gpus": 1,
4
+ "batch_size": 24,
5
+ "learning_rate": 0.00015,
6
+ "adam_b1": 0.85,
7
+ "adam_b2": 0.995,
8
+ "adam_eps": 1e-8,
9
+ "lr_decay": 0.9995,
10
+ "seed": 42,
11
+
12
+ "upsample_rates": [6, 6, 4, 2, 2],
13
+ "upsample_kernel_sizes": [12, 12, 8, 4, 4],
14
+ "upsample_initial_channel": 384,
15
+ "resblock_kernel_sizes": [3, 5, 7, 11],
16
+ "resblock_dilation_sizes": [[1,2,4], [1,2,4], [1,2,4], [1,2,4]],
17
+ "discriminator_periods": [2, 3, 5, 7, 11, 17, 23, 31],
18
+
19
+ "segment_size": 8192,
20
+ "num_mels": 100,
21
+ "num_freq": 2049,
22
+ "n_fft": 4096,
23
+ "hop_size": 256,
24
+ "win_size": 4096,
25
+
26
+ "sampling_rate": 48000,
27
+
28
+ "fmin": 10,
29
+ "fmax": 24000,
30
+ "fmax_for_loss": 18000,
31
+
32
+ "num_workers": 6,
33
+
34
+ "training": {
35
+ "epochs": 2000,
36
+ "save_interval": 10,
37
+ "validation_interval": 2,
38
+ "grad_clip": 4.0,
39
+ "warmup_steps": 1000,
40
+ "decay_steps": 50000
41
+ },
42
+
43
+ "dist_config": {
44
+ "dist_backend": "nccl",
45
+ "dist_url": "tcp://localhost:54322",
46
+ "world_size": 1
47
+ }
48
+ }
model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c576b63b7ed952161b70fad34e0562ace502ce689195520d8a2a6c051de29d6
3
+ size 56825430
model_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:409452a27ab310f7a5897844d003d372a7357cc91c4a43562584a1714518cdf9
3
+ size 220895384
rmvpe.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19dc1809cf4cdb0a18db93441816bc327e14e5644b72eeaae5220560c6736fe2
3
+ size 368492925