Spaces:

HighCWu
/

starganv2vc-paddle

Runtime error

App Files Files Community

HighCWu commited on May 10, 2022

Commit

b51a9f3

1 Parent(s): b2ce270

init commit.

Browse files

Files changed (29) hide show

.gitattributes +1 -0
Configs/config.yml +54 -0
Data/train_list.txt +2725 -0
Data/val_list.txt +303 -0
Demo/inference.ipynb +471 -0
LICENSE +21 -0
README.md +76 -5
Utils/ASR/config.yml +28 -0
app.py +151 -0
convert_parallel_wavegan_weights_to_paddle.ipynb +177 -0
convert_starganv2_vc_weights_to_paddle.ipynb +236 -0
prepare_data.ipynb +179 -0
requirements.txt +5 -0
starganv2vc_paddle/LICENSE +21 -0
starganv2vc_paddle/Utils/ASR/__init__.py +1 -0
starganv2vc_paddle/Utils/ASR/layers.py +359 -0
starganv2vc_paddle/Utils/ASR/models.py +187 -0
starganv2vc_paddle/Utils/JDC/__init__.py +1 -0
starganv2vc_paddle/Utils/JDC/model.py +174 -0
starganv2vc_paddle/Utils/__init__.py +1 -0
starganv2vc_paddle/fbank_matrix.pd +0 -0
starganv2vc_paddle/losses.py +215 -0
starganv2vc_paddle/meldataset.py +155 -0
starganv2vc_paddle/models.py +391 -0
starganv2vc_paddle/optimizers.py +80 -0
starganv2vc_paddle/trainer.py +276 -0
starganv2vc_paddle/transforms.py +120 -0
test_arch.py +65 -0
train.py +149 -0

.gitattributes CHANGED Viewed

@@ -25,3 +25,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zstandard filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zstandard filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.wav filter=lfs diff=lfs merge=lfs -text

Configs/config.yml ADDED Viewed

	@@ -0,0 +1,54 @@

+log_dir: "Models/VCTK20"
+save_freq: 2
+epochs: 150
+batch_size: 5
+pretrained_model: ""
+load_only_params: false
+fp16_run: true
+train_data: "Data/train_list.txt"
+val_data: "Data/val_list.txt"
+F0_path: "Utils/JDC/bst.pd"
+ASR_config: "Utils/ASR/config.yml"
+ASR_path: "Utils/ASR/epoch_00100.pd"
+preprocess_params:
+  sr: 24000
+  spect_params:
+    n_fft: 2048
+    win_length: 1200
+    hop_length: 300
+model_params:
+  dim_in: 64
+  style_dim: 64
+  latent_dim: 16
+  num_domains: 20
+  max_conv_dim: 512
+  n_repeat: 4
+  w_hpf: 0
+  F0_channel: 256
+loss_params:
+  g_loss:
+    lambda_sty: 1.
+    lambda_cyc: 5.
+    lambda_ds: 1.
+    lambda_norm: 1.
+    lambda_asr: 10.
+    lambda_f0: 5.
+    lambda_f0_sty: 0.1
+    lambda_adv: 2.
+    lambda_adv_cls: 0.5
+    norm_bias: 0.5
+  d_loss:
+    lambda_reg: 1.
+    lambda_adv_cls: 0.1
+    lambda_con_reg: 10.
+  adv_cls_epoch: 50
+  con_reg_epoch: 30
+optimizer_params:
+  lr: 0.0001

Data/train_list.txt ADDED Viewed

	@@ -0,0 +1,2725 @@

+./Data/p225/22.wav|0
+./Data/p239/163.wav|7
+./Data/p227/144.wav|11
+./Data/p258/67.wav|16
+./Data/p259/74.wav|17
+./Data/p230/103.wav|3
+./Data/p225/7.wav|0
+./Data/p226/67.wav|10
+./Data/p228/27.wav|1
+./Data/p243/23.wav|13
+./Data/p228/3.wav|1
+./Data/p244/61.wav|9
+./Data/p230/178.wav|3
+./Data/p240/120.wav|8
+./Data/p228/154.wav|1
+./Data/p230/146.wav|3
+./Data/p240/149.wav|8
+./Data/p254/56.wav|14
+./Data/p240/81.wav|8
+./Data/p226/27.wav|10
+./Data/p256/59.wav|15
+./Data/p231/74.wav|4
+./Data/p231/9.wav|4
+./Data/p240/1.wav|8
+./Data/p236/135.wav|6
+./Data/p232/85.wav|12
+./Data/p230/69.wav|3
+./Data/p256/35.wav|15
+./Data/p239/6.wav|7
+./Data/p254/70.wav|14
+./Data/p244/135.wav|9
+./Data/p254/114.wav|14
+./Data/p236/117.wav|6
+./Data/p225/78.wav|0
+./Data/p236/66.wav|6
+./Data/p228/155.wav|1
+./Data/p239/83.wav|7
+./Data/p240/22.wav|8
+./Data/p225/2.wav|0
+./Data/p230/75.wav|3
+./Data/p239/17.wav|7
+./Data/p239/147.wav|7
+./Data/p273/125.wav|19
+./Data/p270/17.wav|18
+./Data/p233/74.wav|5
+./Data/p233/112.wav|5
+./Data/p228/1.wav|1
+./Data/p258/125.wav|16
+./Data/p231/56.wav|4
+./Data/p227/101.wav|11
+./Data/p232/108.wav|12
+./Data/p239/82.wav|7
+./Data/p270/160.wav|18
+./Data/p227/58.wav|11
+./Data/p233/111.wav|5
+./Data/p259/146.wav|17
+./Data/p230/77.wav|3
+./Data/p256/104.wav|15
+./Data/p228/140.wav|1
+./Data/p231/143.wav|4
+./Data/p270/20.wav|18
+./Data/p225/62.wav|0
+./Data/p229/11.wav|2
+./Data/p259/66.wav|17
+./Data/p239/53.wav|7
+./Data/p239/23.wav|7
+./Data/p240/115.wav|8
+./Data/p233/41.wav|5
+./Data/p270/61.wav|18
+./Data/p232/50.wav|12
+./Data/p239/56.wav|7
+./Data/p244/117.wav|9
+./Data/p233/6.wav|5
+./Data/p227/31.wav|11
+./Data/p231/134.wav|4
+./Data/p243/12.wav|13
+./Data/p226/14.wav|10
+./Data/p240/45.wav|8
+./Data/p231/91.wav|4
+./Data/p259/163.wav|17
+./Data/p236/41.wav|6
+./Data/p231/66.wav|4
+./Data/p233/122.wav|5
+./Data/p244/8.wav|9
+./Data/p232/41.wav|12
+./Data/p232/101.wav|12
+./Data/p273/70.wav|19
+./Data/p270/168.wav|18
+./Data/p226/80.wav|10
+./Data/p270/178.wav|18
+./Data/p225/39.wav|0
+./Data/p258/29.wav|16
+./Data/p231/46.wav|4
+./Data/p244/41.wav|9
+./Data/p227/115.wav|11
+./Data/p228/14.wav|1
+./Data/p239/116.wav|7
+./Data/p259/13.wav|17
+./Data/p254/51.wav|14
+./Data/p256/36.wav|15
+./Data/p254/108.wav|14
+./Data/p226/46.wav|10
+./Data/p258/39.wav|16
+./Data/p273/106.wav|19
+./Data/p228/104.wav|1
+./Data/p256/1.wav|15
+./Data/p258/109.wav|16
+./Data/p259/123.wav|17
+./Data/p258/99.wav|16
+./Data/p256/61.wav|15
+./Data/p231/17.wav|4
+./Data/p227/74.wav|11
+./Data/p256/21.wav|15
+./Data/p226/1.wav|10
+./Data/p231/129.wav|4
+./Data/p231/79.wav|4
+./Data/p226/74.wav|10
+./Data/p233/69.wav|5
+./Data/p227/44.wav|11
+./Data/p239/141.wav|7
+./Data/p228/28.wav|1
+./Data/p239/101.wav|7
+./Data/p258/106.wav|16
+./Data/p236/49.wav|6
+./Data/p230/46.wav|3
+./Data/p244/118.wav|9
+./Data/p227/25.wav|11
+./Data/p243/25.wav|13
+./Data/p270/96.wav|18
+./Data/p228/130.wav|1
+./Data/p230/62.wav|3
+./Data/p227/114.wav|11
+./Data/p228/116.wav|1
+./Data/p233/40.wav|5
+./Data/p230/147.wav|3
+./Data/p240/71.wav|8
+./Data/p233/29.wav|5
+./Data/p230/13.wav|3
+./Data/p239/63.wav|7
+./Data/p228/19.wav|1
+./Data/p233/63.wav|5
+./Data/p227/50.wav|11
+./Data/p270/6.wav|18
+./Data/p228/106.wav|1
+./Data/p236/86.wav|6
+./Data/p240/129.wav|8
+./Data/p273/21.wav|19
+./Data/p256/83.wav|15
+./Data/p240/85.wav|8
+./Data/p258/43.wav|16
+./Data/p273/41.wav|19
+./Data/p270/192.wav|18
+./Data/p230/134.wav|3
+./Data/p228/44.wav|1
+./Data/p231/102.wav|4
+./Data/p270/73.wav|18
+./Data/p239/153.wav|7
+./Data/p270/36.wav|18
+./Data/p273/145.wav|19
+./Data/p228/107.wav|1
+./Data/p244/12.wav|9
+./Data/p270/181.wav|18
+./Data/p231/35.wav|4
+./Data/p233/132.wav|5
+./Data/p226/19.wav|10
+./Data/p239/91.wav|7
+./Data/p225/66.wav|0
+./Data/p229/40.wav|2
+./Data/p227/48.wav|11
+./Data/p225/44.wav|0
+./Data/p229/108.wav|2
+./Data/p227/47.wav|11
+./Data/p270/78.wav|18
+./Data/p259/77.wav|17
+./Data/p239/51.wav|7
+./Data/p230/58.wav|3
+./Data/p233/81.wav|5
+./Data/p230/150.wav|3
+./Data/p227/141.wav|11
+./Data/p243/121.wav|13
+./Data/p244/11.wav|9
+./Data/p270/63.wav|18
+./Data/p236/81.wav|6
+./Data/p229/89.wav|2
+./Data/p231/83.wav|4
+./Data/p233/33.wav|5
+./Data/p227/107.wav|11
+./Data/p228/48.wav|1
+./Data/p259/44.wav|17
+./Data/p228/131.wav|1
+./Data/p227/20.wav|11
+./Data/p256/98.wav|15
+./Data/p273/45.wav|19
+./Data/p239/137.wav|7
+./Data/p232/93.wav|12
+./Data/p239/38.wav|7
+./Data/p243/161.wav|13
+./Data/p258/3.wav|16
+./Data/p273/132.wav|19
+./Data/p230/15.wav|3
+./Data/p259/155.wav|17
+./Data/p256/125.wav|15
+./Data/p256/7.wav|15
+./Data/p231/58.wav|4
+./Data/p256/75.wav|15
+./Data/p236/116.wav|6
+./Data/p233/10.wav|5
+./Data/p270/39.wav|18
+./Data/p254/48.wav|14
+./Data/p270/121.wav|18
+./Data/p240/139.wav|8
+./Data/p240/121.wav|8
+./Data/p244/13.wav|9
+./Data/p243/148.wav|13
+./Data/p240/125.wav|8
+./Data/p259/14.wav|17
+./Data/p273/139.wav|19
+./Data/p233/123.wav|5
+./Data/p225/34.wav|0
+./Data/p244/10.wav|9
+./Data/p258/4.wav|16
+./Data/p236/111.wav|6
+./Data/p259/59.wav|17
+./Data/p258/85.wav|16
+./Data/p227/73.wav|11
+./Data/p273/107.wav|19
+./Data/p231/94.wav|4
+./Data/p231/34.wav|4
+./Data/p270/200.wav|18
+./Data/p273/74.wav|19
+./Data/p232/45.wav|12
+./Data/p227/37.wav|11
+./Data/p256/101.wav|15
+./Data/p233/64.wav|5
+./Data/p228/52.wav|1
+./Data/p254/104.wav|14
+./Data/p236/103.wav|6
+./Data/p233/73.wav|5
+./Data/p243/146.wav|13
+./Data/p258/60.wav|16
+./Data/p254/8.wav|14
+./Data/p226/64.wav|10
+./Data/p243/63.wav|13
+./Data/p226/70.wav|10
+./Data/p233/37.wav|5
+./Data/p254/78.wav|14
+./Data/p227/123.wav|11
+./Data/p240/55.wav|8
+./Data/p229/126.wav|2
+./Data/p254/54.wav|14
+./Data/p243/163.wav|13
+./Data/p273/79.wav|19
+./Data/p230/9.wav|3
+./Data/p270/50.wav|18
+./Data/p243/64.wav|13
+./Data/p229/100.wav|2
+./Data/p240/100.wav|8
+./Data/p239/139.wav|7
+./Data/p236/65.wav|6
+./Data/p243/97.wav|13
+./Data/p258/37.wav|16
+./Data/p233/9.wav|5
+./Data/p243/10.wav|13
+./Data/p244/53.wav|9
+./Data/p259/162.wav|17
+./Data/p236/131.wav|6
+./Data/p227/134.wav|11
+./Data/p228/5.wav|1
+./Data/p273/18.wav|19
+./Data/p243/115.wav|13
+./Data/p256/113.wav|15
+./Data/p243/103.wav|13
+./Data/p273/133.wav|19
+./Data/p244/90.wav|9
+./Data/p258/45.wav|16
+./Data/p229/16.wav|2
+./Data/p244/25.wav|9
+./Data/p225/95.wav|0
+./Data/p230/18.wav|3
+./Data/p270/196.wav|18
+./Data/p229/58.wav|2
+./Data/p239/125.wav|7
+./Data/p225/27.wav|0
+./Data/p239/13.wav|7
+./Data/p259/121.wav|17
+./Data/p240/12.wav|8
+./Data/p270/40.wav|18
+./Data/p258/17.wav|16
+./Data/p270/123.wav|18
+./Data/p258/93.wav|16
+./Data/p229/43.wav|2
+./Data/p243/152.wav|13
+./Data/p236/11.wav|6
+./Data/p232/38.wav|12
+./Data/p225/9.wav|0
+./Data/p270/128.wav|18
+./Data/p258/22.wav|16
+./Data/p227/113.wav|11
+./Data/p228/128.wav|1
+./Data/p228/56.wav|1
+./Data/p239/19.wav|7
+./Data/p273/134.wav|19
+./Data/p231/144.wav|4
+./Data/p231/16.wav|4
+./Data/p259/141.wav|17
+./Data/p227/9.wav|11
+./Data/p273/114.wav|19
+./Data/p225/94.wav|0
+./Data/p273/42.wav|19
+./Data/p236/72.wav|6
+./Data/p240/58.wav|8
+./Data/p258/78.wav|16
+./Data/p227/129.wav|11
+./Data/p254/9.wav|14
+./Data/p226/43.wav|10
+./Data/p228/103.wav|1
+./Data/p232/114.wav|12
+./Data/p254/80.wav|14
+./Data/p240/144.wav|8
+./Data/p227/55.wav|11
+./Data/p254/2.wav|14
+./Data/p273/101.wav|19
+./Data/p243/67.wav|13
+./Data/p227/94.wav|11
+./Data/p227/121.wav|11
+./Data/p259/153.wav|17
+./Data/p258/40.wav|16
+./Data/p239/32.wav|7
+./Data/p270/83.wav|18
+./Data/p226/103.wav|10
+./Data/p258/18.wav|16
+./Data/p243/155.wav|13
+./Data/p229/117.wav|2
+./Data/p231/127.wav|4
+./Data/p256/30.wav|15
+./Data/p240/90.wav|8
+./Data/p254/133.wav|14
+./Data/p240/51.wav|8
+./Data/p239/105.wav|7
+./Data/p226/85.wav|10
+./Data/p254/31.wav|14
+./Data/p258/57.wav|16
+./Data/p230/95.wav|3
+./Data/p226/52.wav|10
+./Data/p258/79.wav|16
+./Data/p273/49.wav|19
+./Data/p259/82.wav|17
+./Data/p227/126.wav|11
+./Data/p243/158.wav|13
+./Data/p273/130.wav|19
+./Data/p243/7.wav|13
+./Data/p228/137.wav|1
+./Data/p233/103.wav|5
+./Data/p254/37.wav|14
+./Data/p240/39.wav|8
+./Data/p225/28.wav|0
+./Data/p227/139.wav|11
+./Data/p244/130.wav|9
+./Data/p243/22.wav|13
+./Data/p228/69.wav|1
+./Data/p231/64.wav|4
+./Data/p233/129.wav|5
+./Data/p232/68.wav|12
+./Data/p231/87.wav|4
+./Data/p240/83.wav|8
+./Data/p232/55.wav|12
+./Data/p259/54.wav|17
+./Data/p270/125.wav|18
+./Data/p239/169.wav|7
+./Data/p229/39.wav|2
+./Data/p273/110.wav|19
+./Data/p233/93.wav|5
+./Data/p225/79.wav|0
+./Data/p230/140.wav|3
+./Data/p228/36.wav|1
+./Data/p230/4.wav|3
+./Data/p259/88.wav|17
+./Data/p243/86.wav|13
+./Data/p227/90.wav|11
+./Data/p254/83.wav|14
+./Data/p240/150.wav|8
+./Data/p232/118.wav|12
+./Data/p270/35.wav|18
+./Data/p231/126.wav|4
+./Data/p239/59.wav|7
+./Data/p243/47.wav|13
+./Data/p254/105.wav|14
+./Data/p258/72.wav|16
+./Data/p228/72.wav|1
+./Data/p270/136.wav|18
+./Data/p230/51.wav|3
+./Data/p227/67.wav|11
+./Data/p259/151.wav|17
+./Data/p232/66.wav|12
+./Data/p254/40.wav|14
+./Data/p273/12.wav|19
+./Data/p229/130.wav|2
+./Data/p270/156.wav|18
+./Data/p230/177.wav|3
+./Data/p270/169.wav|18
+./Data/p258/1.wav|16
+./Data/p229/103.wav|2
+./Data/p270/127.wav|18
+./Data/p226/72.wav|10
+./Data/p229/99.wav|2
+./Data/p232/8.wav|12
+./Data/p236/1.wav|6
+./Data/p230/85.wav|3
+./Data/p236/99.wav|6
+./Data/p231/139.wav|4
+./Data/p256/67.wav|15
+./Data/p240/38.wav|8
+./Data/p233/16.wav|5
+./Data/p243/13.wav|13
+./Data/p227/86.wav|11
+./Data/p233/110.wav|5
+./Data/p243/77.wav|13
+./Data/p227/77.wav|11
+./Data/p230/7.wav|3
+./Data/p270/175.wav|18
+./Data/p254/38.wav|14
+./Data/p227/71.wav|11
+./Data/p229/104.wav|2
+./Data/p231/101.wav|4
+./Data/p229/105.wav|2
+./Data/p225/49.wav|0
+./Data/p230/137.wav|3
+./Data/p226/42.wav|10
+./Data/p233/92.wav|5
+./Data/p243/58.wav|13
+./Data/p239/45.wav|7
+./Data/p233/135.wav|5
+./Data/p244/89.wav|9
+./Data/p243/166.wav|13
+./Data/p240/59.wav|8
+./Data/p254/86.wav|14
+./Data/p243/60.wav|13
+./Data/p227/19.wav|11
+./Data/p231/45.wav|4
+./Data/p227/140.wav|11
+./Data/p236/129.wav|6
+./Data/p240/67.wav|8
+./Data/p227/61.wav|11
+./Data/p228/77.wav|1
+./Data/p236/52.wav|6
+./Data/p258/33.wav|16
+./Data/p244/104.wav|9
+./Data/p259/84.wav|17
+./Data/p236/127.wav|6
+./Data/p228/150.wav|1
+./Data/p233/85.wav|5
+./Data/p270/147.wav|18
+./Data/p229/83.wav|2
+./Data/p226/68.wav|10
+./Data/p229/94.wav|2
+./Data/p270/46.wav|18
+./Data/p258/129.wav|16
+./Data/p270/191.wav|18
+./Data/p227/106.wav|11
+./Data/p239/136.wav|7
+./Data/p239/14.wav|7
+./Data/p239/71.wav|7
+./Data/p232/74.wav|12
+./Data/p225/75.wav|0
+./Data/p244/143.wav|9
+./Data/p259/173.wav|17
+./Data/p243/140.wav|13
+./Data/p273/48.wav|19
+./Data/p230/111.wav|3
+./Data/p240/94.wav|8
+./Data/p258/20.wav|16
+./Data/p227/52.wav|11
+./Data/p244/4.wav|9
+./Data/p227/109.wav|11
+./Data/p230/55.wav|3
+./Data/p232/92.wav|12
+./Data/p240/75.wav|8
+./Data/p229/82.wav|2
+./Data/p270/103.wav|18
+./Data/p254/87.wav|14
+./Data/p259/38.wav|17
+./Data/p240/147.wav|8
+./Data/p227/111.wav|11
+./Data/p228/2.wav|1
+./Data/p230/82.wav|3
+./Data/p239/33.wav|7
+./Data/p259/65.wav|17
+./Data/p273/102.wav|19
+./Data/p227/116.wav|11
+./Data/p258/61.wav|16
+./Data/p228/68.wav|1
+./Data/p244/116.wav|9
+./Data/p240/9.wav|8
+./Data/p273/64.wav|19
+./Data/p273/9.wav|19
+./Data/p230/8.wav|3
+./Data/p230/172.wav|3
+./Data/p243/32.wav|13
+./Data/p258/117.wav|16
+./Data/p236/43.wav|6
+./Data/p243/29.wav|13
+./Data/p231/86.wav|4
+./Data/p231/6.wav|4
+./Data/p236/166.wav|6
+./Data/p270/174.wav|18
+./Data/p229/123.wav|2
+./Data/p243/132.wav|13
+./Data/p228/91.wav|1
+./Data/p273/100.wav|19
+./Data/p243/61.wav|13
+./Data/p233/14.wav|5
+./Data/p256/5.wav|15
+./Data/p228/135.wav|1
+./Data/p254/21.wav|14
+./Data/p230/96.wav|3
+./Data/p240/142.wav|8
+./Data/p259/63.wav|17
+./Data/p243/37.wav|13
+./Data/p228/136.wav|1
+./Data/p254/126.wav|14
+./Data/p225/51.wav|0
+./Data/p258/9.wav|16
+./Data/p270/85.wav|18
+./Data/p228/149.wav|1
+./Data/p236/152.wav|6
+./Data/p259/124.wav|17
+./Data/p244/1.wav|9
+./Data/p259/104.wav|17
+./Data/p227/64.wav|11
+./Data/p230/70.wav|3
+./Data/p256/122.wav|15
+./Data/p258/30.wav|16
+./Data/p244/54.wav|9
+./Data/p270/198.wav|18
+./Data/p258/15.wav|16
+./Data/p254/52.wav|14
+./Data/p228/85.wav|1
+./Data/p230/1.wav|3
+./Data/p230/71.wav|3
+./Data/p259/147.wav|17
+./Data/p243/68.wav|13
+./Data/p226/79.wav|10
+./Data/p243/123.wav|13
+./Data/p229/85.wav|2
+./Data/p270/5.wav|18
+./Data/p226/12.wav|10
+./Data/p231/82.wav|4
+./Data/p230/120.wav|3
+./Data/p225/31.wav|0
+./Data/p236/130.wav|6
+./Data/p239/111.wav|7
+./Data/p230/60.wav|3
+./Data/p232/121.wav|12
+./Data/p259/27.wav|17
+./Data/p228/65.wav|1
+./Data/p231/92.wav|4
+./Data/p236/160.wav|6
+./Data/p258/145.wav|16
+./Data/p231/20.wav|4
+./Data/p226/47.wav|10
+./Data/p258/110.wav|16
+./Data/p231/93.wav|4
+./Data/p270/30.wav|18
+./Data/p227/97.wav|11
+./Data/p231/31.wav|4
+./Data/p273/55.wav|19
+./Data/p239/12.wav|7
+./Data/p240/63.wav|8
+./Data/p254/57.wav|14
+./Data/p244/35.wav|9
+./Data/p239/127.wav|7
+./Data/p226/130.wav|10
+./Data/p225/83.wav|0
+./Data/p259/56.wav|17
+./Data/p273/85.wav|19
+./Data/p244/129.wav|9
+./Data/p273/83.wav|19
+./Data/p270/45.wav|18
+./Data/p273/23.wav|19
+./Data/p233/15.wav|5
+./Data/p256/34.wav|15
+./Data/p273/38.wav|19
+./Data/p244/73.wav|9
+./Data/p243/43.wav|13
+./Data/p270/26.wav|18
+./Data/p239/87.wav|7
+./Data/p233/120.wav|5
+./Data/p236/14.wav|6
+./Data/p227/5.wav|11
+./Data/p258/104.wav|16
+./Data/p227/45.wav|11
+./Data/p229/35.wav|2
+./Data/p273/36.wav|19
+./Data/p240/82.wav|8
+./Data/p254/20.wav|14
+./Data/p232/128.wav|12
+./Data/p254/47.wav|14
+./Data/p270/102.wav|18
+./Data/p230/41.wav|3
+./Data/p225/23.wav|0
+./Data/p258/38.wav|16
+./Data/p233/137.wav|5
+./Data/p254/94.wav|14
+./Data/p244/122.wav|9
+./Data/p229/51.wav|2
+./Data/p244/96.wav|9
+./Data/p273/119.wav|19
+./Data/p227/80.wav|11
+./Data/p225/1.wav|0
+./Data/p244/80.wav|9
+./Data/p233/108.wav|5
+./Data/p259/119.wav|17
+./Data/p226/8.wav|10
+./Data/p228/60.wav|1
+./Data/p233/71.wav|5
+./Data/p243/168.wav|13
+./Data/p226/136.wav|10
+./Data/p236/110.wav|6
+./Data/p228/23.wav|1
+./Data/p244/137.wav|9
+./Data/p240/33.wav|8
+./Data/p256/13.wav|15
+./Data/p243/6.wav|13
+./Data/p227/30.wav|11
+./Data/p244/28.wav|9
+./Data/p228/24.wav|1
+./Data/p243/147.wav|13
+./Data/p231/39.wav|4
+./Data/p254/93.wav|14
+./Data/p256/29.wav|15
+./Data/p258/119.wav|16
+./Data/p240/69.wav|8
+./Data/p232/102.wav|12
+./Data/p233/101.wav|5
+./Data/p270/29.wav|18
+./Data/p233/47.wav|5
+./Data/p259/17.wav|17
+./Data/p228/94.wav|1
+./Data/p231/21.wav|4
+./Data/p230/143.wav|3
+./Data/p270/204.wav|18
+./Data/p229/71.wav|2
+./Data/p232/13.wav|12
+./Data/p227/127.wav|11
+./Data/p258/105.wav|16
+./Data/p227/112.wav|11
+./Data/p270/59.wav|18
+./Data/p232/47.wav|12
+./Data/p236/112.wav|6
+./Data/p273/115.wav|19
+./Data/p236/20.wav|6
+./Data/p258/115.wav|16
+./Data/p256/24.wav|15
+./Data/p273/76.wav|19
+./Data/p231/3.wav|4
+./Data/p225/56.wav|0
+./Data/p259/150.wav|17
+./Data/p227/70.wav|11
+./Data/p230/2.wav|3
+./Data/p226/22.wav|10
+./Data/p243/127.wav|13
+./Data/p258/31.wav|16
+./Data/p233/89.wav|5
+./Data/p259/64.wav|17
+./Data/p259/96.wav|17
+./Data/p227/57.wav|11
+./Data/p232/132.wav|12
+./Data/p236/46.wav|6
+./Data/p232/70.wav|12
+./Data/p273/138.wav|19
+./Data/p244/99.wav|9
+./Data/p240/18.wav|8
+./Data/p243/145.wav|13
+./Data/p230/125.wav|3
+./Data/p243/49.wav|13
+./Data/p256/71.wav|15
+./Data/p258/133.wav|16
+./Data/p236/50.wav|6
+./Data/p270/122.wav|18
+./Data/p230/25.wav|3
+./Data/p236/124.wav|6
+./Data/p273/35.wav|19
+./Data/p258/98.wav|16
+./Data/p270/51.wav|18
+./Data/p229/121.wav|2
+./Data/p270/15.wav|18
+./Data/p270/193.wav|18
+./Data/p239/138.wav|7
+./Data/p273/108.wav|19
+./Data/p254/139.wav|14
+./Data/p256/23.wav|15
+./Data/p243/84.wav|13
+./Data/p273/93.wav|19
+./Data/p240/21.wav|8
+./Data/p240/109.wav|8
+./Data/p230/76.wav|3
+./Data/p232/61.wav|12
+./Data/p233/48.wav|5
+./Data/p233/133.wav|5
+./Data/p239/28.wav|7
+./Data/p230/149.wav|3
+./Data/p240/46.wav|8
+./Data/p243/74.wav|13
+./Data/p256/88.wav|15
+./Data/p228/61.wav|1
+./Data/p236/87.wav|6
+./Data/p236/2.wav|6
+./Data/p239/159.wav|7
+./Data/p231/44.wav|4
+./Data/p236/161.wav|6
+./Data/p256/19.wav|15
+./Data/p258/5.wav|16
+./Data/p243/83.wav|13
+./Data/p228/30.wav|1
+./Data/p226/65.wav|10
+./Data/p258/127.wav|16
+./Data/p254/60.wav|14
+./Data/p273/97.wav|19
+./Data/p228/50.wav|1
+./Data/p243/135.wav|13
+./Data/p228/111.wav|1
+./Data/p229/7.wav|2
+./Data/p229/3.wav|2
+./Data/p258/11.wav|16
+./Data/p258/6.wav|16
+./Data/p259/148.wav|17
+./Data/p232/30.wav|12
+./Data/p256/70.wav|15
+./Data/p259/160.wav|17
+./Data/p239/113.wav|7
+./Data/p229/109.wav|2
+./Data/p231/29.wav|4
+./Data/p258/25.wav|16
+./Data/p239/148.wav|7
+./Data/p239/78.wav|7
+./Data/p239/107.wav|7
+./Data/p239/99.wav|7
+./Data/p259/32.wav|17
+./Data/p239/11.wav|7
+./Data/p226/139.wav|10
+./Data/p229/88.wav|2
+./Data/p239/9.wav|7
+./Data/p229/26.wav|2
+./Data/p229/128.wav|2
+./Data/p244/119.wav|9
+./Data/p259/76.wav|17
+./Data/p239/129.wav|7
+./Data/p256/115.wav|15
+./Data/p230/102.wav|3
+./Data/p236/42.wav|6
+./Data/p225/16.wav|0
+./Data/p240/140.wav|8
+./Data/p226/36.wav|10
+./Data/p226/78.wav|10
+./Data/p225/37.wav|0
+./Data/p256/51.wav|15
+./Data/p254/112.wav|14
+./Data/p236/24.wav|6
+./Data/p228/164.wav|1
+./Data/p225/63.wav|0
+./Data/p259/25.wav|17
+./Data/p226/133.wav|10
+./Data/p244/107.wav|9
+./Data/p270/32.wav|18
+./Data/p270/56.wav|18
+./Data/p226/62.wav|10
+./Data/p228/95.wav|1
+./Data/p259/112.wav|17
+./Data/p229/114.wav|2
+./Data/p273/16.wav|19
+./Data/p236/60.wav|6
+./Data/p256/128.wav|15
+./Data/p273/144.wav|19
+./Data/p236/142.wav|6
+./Data/p231/130.wav|4
+./Data/p258/7.wav|16
+./Data/p225/96.wav|0
+./Data/p225/91.wav|0
+./Data/p232/115.wav|12
+./Data/p270/157.wav|18
+./Data/p273/104.wav|19
+./Data/p233/136.wav|5
+./Data/p240/78.wav|8
+./Data/p243/17.wav|13
+./Data/p240/62.wav|8
+./Data/p243/48.wav|13
+./Data/p232/29.wav|12
+./Data/p244/42.wav|9
+./Data/p259/93.wav|17
+./Data/p240/136.wav|8
+./Data/p226/117.wav|10
+./Data/p239/131.wav|7
+./Data/p270/54.wav|18
+./Data/p228/98.wav|1
+./Data/p270/166.wav|18
+./Data/p240/145.wav|8
+./Data/p270/14.wav|18
+./Data/p240/43.wav|8
+./Data/p258/107.wav|16
+./Data/p270/167.wav|18
+./Data/p259/62.wav|17
+./Data/p231/65.wav|4
+./Data/p240/5.wav|8
+./Data/p230/50.wav|3
+./Data/p256/3.wav|15
+./Data/p231/27.wav|4
+./Data/p229/27.wav|2
+./Data/p240/96.wav|8
+./Data/p225/82.wav|0
+./Data/p236/125.wav|6
+./Data/p254/71.wav|14
+./Data/p244/138.wav|9
+./Data/p254/89.wav|14
+./Data/p236/91.wav|6
+./Data/p244/38.wav|9
+./Data/p232/116.wav|12
+./Data/p270/11.wav|18
+./Data/p236/162.wav|6
+./Data/p228/127.wav|1
+./Data/p227/96.wav|11
+./Data/p226/98.wav|10
+./Data/p270/155.wav|18
+./Data/p236/143.wav|6
+./Data/p254/77.wav|14
+./Data/p273/26.wav|19
+./Data/p270/1.wav|18
+./Data/p273/51.wav|19
+./Data/p243/21.wav|13
+./Data/p231/68.wav|4
+./Data/p230/169.wav|3
+./Data/p226/56.wav|10
+./Data/p233/79.wav|5
+./Data/p273/58.wav|19
+./Data/p231/70.wav|4
+./Data/p228/42.wav|1
+./Data/p273/141.wav|19
+./Data/p256/91.wav|15
+./Data/p259/70.wav|17
+./Data/p236/69.wav|6
+./Data/p228/16.wav|1
+./Data/p270/44.wav|18
+./Data/p230/16.wav|3
+./Data/p244/97.wav|9
+./Data/p254/42.wav|14
+./Data/p225/53.wav|0
+./Data/p230/59.wav|3
+./Data/p226/140.wav|10
+./Data/p232/7.wav|12
+./Data/p229/47.wav|2
+./Data/p231/13.wav|4
+./Data/p258/49.wav|16
+./Data/p226/92.wav|10
+./Data/p227/81.wav|11
+./Data/p230/162.wav|3
+./Data/p240/20.wav|8
+./Data/p236/88.wav|6
+./Data/p236/79.wav|6
+./Data/p236/39.wav|6
+./Data/p233/97.wav|5
+./Data/p232/96.wav|12
+./Data/p273/82.wav|19
+./Data/p230/123.wav|3
+./Data/p230/126.wav|3
+./Data/p258/75.wav|16
+./Data/p232/78.wav|12
+./Data/p231/48.wav|4
+./Data/p244/110.wav|9
+./Data/p258/71.wav|16
+./Data/p256/116.wav|15
+./Data/p231/63.wav|4
+./Data/p258/26.wav|16
+./Data/p243/18.wav|13
+./Data/p243/55.wav|13
+./Data/p270/162.wav|18
+./Data/p244/33.wav|9
+./Data/p226/77.wav|10
+./Data/p270/98.wav|18
+./Data/p230/121.wav|3
+./Data/p226/94.wav|10
+./Data/p270/84.wav|18
+./Data/p270/53.wav|18
+./Data/p243/124.wav|13
+./Data/p228/86.wav|1
+./Data/p229/25.wav|2
+./Data/p230/68.wav|3
+./Data/p240/29.wav|8
+./Data/p236/63.wav|6
+./Data/p270/129.wav|18
+./Data/p229/79.wav|2
+./Data/p233/102.wav|5
+./Data/p228/34.wav|1
+./Data/p230/163.wav|3
+./Data/p228/64.wav|1
+./Data/p233/115.wav|5
+./Data/p243/88.wav|13
+./Data/p244/14.wav|9
+./Data/p243/174.wav|13
+./Data/p229/74.wav|2
+./Data/p258/27.wav|16
+./Data/p259/86.wav|17
+./Data/p273/92.wav|19
+./Data/p239/81.wav|7
+./Data/p254/109.wav|14
+./Data/p232/103.wav|12
+./Data/p230/21.wav|3
+./Data/p226/10.wav|10
+./Data/p240/2.wav|8
+./Data/p256/102.wav|15
+./Data/p240/127.wav|8
+./Data/p259/138.wav|17
+./Data/p254/123.wav|14
+./Data/p270/92.wav|18
+./Data/p254/30.wav|14
+./Data/p273/86.wav|19
+./Data/p244/106.wav|9
+./Data/p226/107.wav|10
+./Data/p240/4.wav|8
+./Data/p228/97.wav|1
+./Data/p258/32.wav|16
+./Data/p232/79.wav|12
+./Data/p259/154.wav|17
+./Data/p231/19.wav|4
+./Data/p259/91.wav|17
+./Data/p244/45.wav|9
+./Data/p240/97.wav|8
+./Data/p259/45.wav|17
+./Data/p270/197.wav|18
+./Data/p229/1.wav|2
+./Data/p259/11.wav|17
+./Data/p228/29.wav|1
+./Data/p230/72.wav|3
+./Data/p228/145.wav|1
+./Data/p244/71.wav|9
+./Data/p230/66.wav|3
+./Data/p226/51.wav|10
+./Data/p270/10.wav|18
+./Data/p254/96.wav|14
+./Data/p256/64.wav|15
+./Data/p243/65.wav|13
+./Data/p228/148.wav|1
+./Data/p243/41.wav|13
+./Data/p228/57.wav|1
+./Data/p239/92.wav|7
+./Data/p256/124.wav|15
+./Data/p259/116.wav|17
+./Data/p233/70.wav|5
+./Data/p227/1.wav|11
+./Data/p231/59.wav|4
+./Data/p243/30.wav|13
+./Data/p254/41.wav|14
+./Data/p228/123.wav|1
+./Data/p239/20.wav|7
+./Data/p229/77.wav|2
+./Data/p239/132.wav|7
+./Data/p243/144.wav|13
+./Data/p227/137.wav|11
+./Data/p239/134.wav|7
+./Data/p240/108.wav|8
+./Data/p256/118.wav|15
+./Data/p256/126.wav|15
+./Data/p226/110.wav|10
+./Data/p236/29.wav|6
+./Data/p236/74.wav|6
+./Data/p231/77.wav|4
+./Data/p256/45.wav|15
+./Data/p256/39.wav|15
+./Data/p228/66.wav|1
+./Data/p232/35.wav|12
+./Data/p273/37.wav|19
+./Data/p240/135.wav|8
+./Data/p236/73.wav|6
+./Data/p256/38.wav|15
+./Data/p243/109.wav|13
+./Data/p227/33.wav|11
+./Data/p259/87.wav|17
+./Data/p225/55.wav|0
+./Data/p243/138.wav|13
+./Data/p227/3.wav|11
+./Data/p254/74.wav|14
+./Data/p254/137.wav|14
+./Data/p228/43.wav|1
+./Data/p270/71.wav|18
+./Data/p243/56.wav|13
+./Data/p228/119.wav|1
+./Data/p244/136.wav|9
+./Data/p259/94.wav|17
+./Data/p259/120.wav|17
+./Data/p230/74.wav|3
+./Data/p227/100.wav|11
+./Data/p228/143.wav|1
+./Data/p225/98.wav|0
+./Data/p256/2.wav|15
+./Data/p273/146.wav|19
+./Data/p230/99.wav|3
+./Data/p243/20.wav|13
+./Data/p258/96.wav|16
+./Data/p226/87.wav|10
+./Data/p240/64.wav|8
+./Data/p243/114.wav|13
+./Data/p273/77.wav|19
+./Data/p256/48.wav|15
+./Data/p258/120.wav|16
+./Data/p240/111.wav|8
+./Data/p226/73.wav|10
+./Data/p229/15.wav|2
+./Data/p270/165.wav|18
+./Data/p226/124.wav|10
+./Data/p254/53.wav|14
+./Data/p239/97.wav|7
+./Data/p236/71.wav|6
+./Data/p243/66.wav|13
+./Data/p230/26.wav|3
+./Data/p233/17.wav|5
+./Data/p273/143.wav|19
+./Data/p229/6.wav|2
+./Data/p258/41.wav|16
+./Data/p240/10.wav|8
+./Data/p244/115.wav|9
+./Data/p256/8.wav|15
+./Data/p243/133.wav|13
+./Data/p236/145.wav|6
+./Data/p240/110.wav|8
+./Data/p270/100.wav|18
+./Data/p230/167.wav|3
+./Data/p270/27.wav|18
+./Data/p243/149.wav|13
+./Data/p228/139.wav|1
+./Data/p256/96.wav|15
+./Data/p230/61.wav|3
+./Data/p258/42.wav|16
+./Data/p236/94.wav|6
+./Data/p230/42.wav|3
+./Data/p270/144.wav|18
+./Data/p228/141.wav|1
+./Data/p232/4.wav|12
+./Data/p229/8.wav|2
+./Data/p230/39.wav|3
+./Data/p256/47.wav|15
+./Data/p229/54.wav|2
+./Data/p239/168.wav|7
+./Data/p227/7.wav|11
+./Data/p227/93.wav|11
+./Data/p240/13.wav|8
+./Data/p270/172.wav|18
+./Data/p243/45.wav|13
+./Data/p259/30.wav|17
+./Data/p270/116.wav|18
+./Data/p240/48.wav|8
+./Data/p227/24.wav|11
+./Data/p229/80.wav|2
+./Data/p233/2.wav|5
+./Data/p228/87.wav|1
+./Data/p240/105.wav|8
+./Data/p239/60.wav|7
+./Data/p244/39.wav|9
+./Data/p240/124.wav|8
+./Data/p259/145.wav|17
+./Data/p227/76.wav|11
+./Data/p254/58.wav|14
+./Data/p230/156.wav|3
+./Data/p229/42.wav|2
+./Data/p273/68.wav|19
+./Data/p228/146.wav|1
+./Data/p236/165.wav|6
+./Data/p229/34.wav|2
+./Data/p239/123.wav|7
+./Data/p273/121.wav|19
+./Data/p270/176.wav|18
+./Data/p258/74.wav|16
+./Data/p254/84.wav|14
+./Data/p259/157.wav|17
+./Data/p258/130.wav|16
+./Data/p244/18.wav|9
+./Data/p229/59.wav|2
+./Data/p229/10.wav|2
+./Data/p273/89.wav|19
+./Data/p259/23.wav|17
+./Data/p256/6.wav|15
+./Data/p227/8.wav|11
+./Data/p258/59.wav|16
+./Data/p232/91.wav|12
+./Data/p258/137.wav|16
+./Data/p258/122.wav|16
+./Data/p230/89.wav|3
+./Data/p232/58.wav|12
+./Data/p231/11.wav|4
+./Data/p273/120.wav|19
+./Data/p232/39.wav|12
+./Data/p236/44.wav|6
+./Data/p254/12.wav|14
+./Data/p270/95.wav|18
+./Data/p270/153.wav|18
+./Data/p230/164.wav|3
+./Data/p225/30.wav|0
+./Data/p240/126.wav|8
+./Data/p230/54.wav|3
+./Data/p270/87.wav|18
+./Data/p225/14.wav|0
+./Data/p231/145.wav|4
+./Data/p254/81.wav|14
+./Data/p244/55.wav|9
+./Data/p259/3.wav|17
+./Data/p273/50.wav|19
+./Data/p228/84.wav|1
+./Data/p244/3.wav|9
+./Data/p239/55.wav|7
+./Data/p232/5.wav|12
+./Data/p229/111.wav|2
+./Data/p236/141.wav|6
+./Data/p233/54.wav|5
+./Data/p240/88.wav|8
+./Data/p236/16.wav|6
+./Data/p239/154.wav|7
+./Data/p240/72.wav|8
+./Data/p236/75.wav|6
+./Data/p230/166.wav|3
+./Data/p231/122.wav|4
+./Data/p273/24.wav|19
+./Data/p233/30.wav|5
+./Data/p226/9.wav|10
+./Data/p240/65.wav|8
+./Data/p228/80.wav|1
+./Data/p232/46.wav|12
+./Data/p239/109.wav|7
+./Data/p231/67.wav|4
+./Data/p233/67.wav|5
+./Data/p228/162.wav|1
+./Data/p229/134.wav|2
+./Data/p239/27.wav|7
+./Data/p227/145.wav|11
+./Data/p225/67.wav|0
+./Data/p232/99.wav|12
+./Data/p270/140.wav|18
+./Data/p225/70.wav|0
+./Data/p259/21.wav|17
+./Data/p230/28.wav|3
+./Data/p230/80.wav|3
+./Data/p243/34.wav|13
+./Data/p254/61.wav|14
+./Data/p236/58.wav|6
+./Data/p239/21.wav|7
+./Data/p230/91.wav|3
+./Data/p256/68.wav|15
+./Data/p225/21.wav|0
+./Data/p233/49.wav|5
+./Data/p236/114.wav|6
+./Data/p228/134.wav|1
+./Data/p231/114.wav|4
+./Data/p239/18.wav|7
+./Data/p227/132.wav|11
+./Data/p236/115.wav|6
+./Data/p254/99.wav|14
+./Data/p243/143.wav|13
+./Data/p270/49.wav|18
+./Data/p239/152.wav|7
+./Data/p232/120.wav|12
+./Data/p256/25.wav|15
+./Data/p229/116.wav|2
+./Data/p239/130.wav|7
+./Data/p254/124.wav|14
+./Data/p270/118.wav|18
+./Data/p244/46.wav|9
+./Data/p231/105.wav|4
+./Data/p231/115.wav|4
+./Data/p239/144.wav|7
+./Data/p226/39.wav|10
+./Data/p233/78.wav|5
+./Data/p227/53.wav|11
+./Data/p239/146.wav|7
+./Data/p256/77.wav|15
+./Data/p259/37.wav|17
+./Data/p258/36.wav|16
+./Data/p254/13.wav|14
+./Data/p229/69.wav|2
+./Data/p231/90.wav|4
+./Data/p226/84.wav|10
+./Data/p259/48.wav|17
+./Data/p233/88.wav|5
+./Data/p228/153.wav|1
+./Data/p254/43.wav|14
+./Data/p231/97.wav|4
+./Data/p273/44.wav|19
+./Data/p233/27.wav|5
+./Data/p232/90.wav|12
+./Data/p254/36.wav|14
+./Data/p232/27.wav|12
+./Data/p230/113.wav|3
+./Data/p254/130.wav|14
+./Data/p254/62.wav|14
+./Data/p239/118.wav|7
+./Data/p230/109.wav|3
+./Data/p227/102.wav|11
+./Data/p226/48.wav|10
+./Data/p230/175.wav|3
+./Data/p231/60.wav|4
+./Data/p259/105.wav|17
+./Data/p233/28.wav|5
+./Data/p229/36.wav|2
+./Data/p256/111.wav|15
+./Data/p230/133.wav|3
+./Data/p233/125.wav|5
+./Data/p228/59.wav|1
+./Data/p239/58.wav|7
+./Data/p273/116.wav|19
+./Data/p230/97.wav|3
+./Data/p273/88.wav|19
+./Data/p228/93.wav|1
+./Data/p259/81.wav|17
+./Data/p228/144.wav|1
+./Data/p230/32.wav|3
+./Data/p240/6.wav|8
+./Data/p230/17.wav|3
+./Data/p259/98.wav|17
+./Data/p227/75.wav|11
+./Data/p231/26.wav|4
+./Data/p231/103.wav|4
+./Data/p236/67.wav|6
+./Data/p270/107.wav|18
+./Data/p226/24.wav|10
+./Data/p273/34.wav|19
+./Data/p236/90.wav|6
+./Data/p256/14.wav|15
+./Data/p236/140.wav|6
+./Data/p273/39.wav|19
+./Data/p270/163.wav|18
+./Data/p239/77.wav|7
+./Data/p230/148.wav|3
+./Data/p273/113.wav|19
+./Data/p254/140.wav|14
+./Data/p239/46.wav|7
+./Data/p243/51.wav|13
+./Data/p231/10.wav|4
+./Data/p231/104.wav|4
+./Data/p270/132.wav|18
+./Data/p228/108.wav|1
+./Data/p233/39.wav|5
+./Data/p259/130.wav|17
+./Data/p239/85.wav|7
+./Data/p240/37.wav|8
+./Data/p270/58.wav|18
+./Data/p243/78.wav|13
+./Data/p273/61.wav|19
+./Data/p230/144.wav|3
+./Data/p233/21.wav|5
+./Data/p225/35.wav|0
+./Data/p228/158.wav|1
+./Data/p259/26.wav|17
+./Data/p230/33.wav|3
+./Data/p258/128.wav|16
+./Data/p233/61.wav|5
+./Data/p225/97.wav|0
+./Data/p259/143.wav|17
+./Data/p226/50.wav|10
+./Data/p243/71.wav|13
+./Data/p230/22.wav|3
+./Data/p226/58.wav|10
+./Data/p239/110.wav|7
+./Data/p258/136.wav|16
+./Data/p226/102.wav|10
+./Data/p258/88.wav|16
+./Data/p233/94.wav|5
+./Data/p258/77.wav|16
+./Data/p231/2.wav|4
+./Data/p273/40.wav|19
+./Data/p239/133.wav|7
+./Data/p270/33.wav|18
+./Data/p254/132.wav|14
+./Data/p270/99.wav|18
+./Data/p227/84.wav|11
+./Data/p226/132.wav|10
+./Data/p239/165.wav|7
+./Data/p270/23.wav|18
+./Data/p270/41.wav|18
+./Data/p236/28.wav|6
+./Data/p231/76.wav|4
+./Data/p231/28.wav|4
+./Data/p236/56.wav|6
+./Data/p236/146.wav|6
+./Data/p244/125.wav|9
+./Data/p256/55.wav|15
+./Data/p232/40.wav|12
+./Data/p239/64.wav|7
+./Data/p240/130.wav|8
+./Data/p239/41.wav|7
+./Data/p240/138.wav|8
+./Data/p226/118.wav|10
+./Data/p228/62.wav|1
+./Data/p244/16.wav|9
+./Data/p244/20.wav|9
+./Data/p226/125.wav|10
+./Data/p270/74.wav|18
+./Data/p229/129.wav|2
+./Data/p227/142.wav|11
+./Data/p228/38.wav|1
+./Data/p258/97.wav|16
+./Data/p233/77.wav|5
+./Data/p232/84.wav|12
+./Data/p229/17.wav|2
+./Data/p227/18.wav|11
+./Data/p239/94.wav|7
+./Data/p239/1.wav|7
+./Data/p225/52.wav|0
+./Data/p270/82.wav|18
+./Data/p232/53.wav|12
+./Data/p258/51.wav|16
+./Data/p258/132.wav|16
+./Data/p229/66.wav|2
+./Data/p270/19.wav|18
+./Data/p227/88.wav|11
+./Data/p231/96.wav|4
+./Data/p239/72.wav|7
+./Data/p225/73.wav|0
+./Data/p240/146.wav|8
+./Data/p236/97.wav|6
+./Data/p227/43.wav|11
+./Data/p232/119.wav|12
+./Data/p231/53.wav|4
+./Data/p239/42.wav|7
+./Data/p259/115.wav|17
+./Data/p244/105.wav|9
+./Data/p256/33.wav|15
+./Data/p231/100.wav|4
+./Data/p240/8.wav|8
+./Data/p256/57.wav|15
+./Data/p227/130.wav|11
+./Data/p226/30.wav|10
+./Data/p233/80.wav|5
+./Data/p232/17.wav|12
+./Data/p259/167.wav|17
+./Data/p227/122.wav|11
+./Data/p239/128.wav|7
+./Data/p231/133.wav|4
+./Data/p273/129.wav|19
+./Data/p243/15.wav|13
+./Data/p243/44.wav|13
+./Data/p259/161.wav|17
+./Data/p243/94.wav|13
+./Data/p244/62.wav|9
+./Data/p270/180.wav|18
+./Data/p258/126.wav|16
+./Data/p229/137.wav|2
+./Data/p233/105.wav|5
+./Data/p244/79.wav|9
+./Data/p254/46.wav|14
+./Data/p240/95.wav|8
+./Data/p259/135.wav|17
+./Data/p259/52.wav|17
+./Data/p229/68.wav|2
+./Data/p254/33.wav|14
+./Data/p230/83.wav|3
+./Data/p256/89.wav|15
+./Data/p254/90.wav|14
+./Data/p270/182.wav|18
+./Data/p226/18.wav|10
+./Data/p270/145.wav|18
+./Data/p231/128.wav|4
+./Data/p239/140.wav|7
+./Data/p228/100.wav|1
+./Data/p227/49.wav|11
+./Data/p240/53.wav|8
+./Data/p258/108.wav|16
+./Data/p226/83.wav|10
+./Data/p270/106.wav|18
+./Data/p243/11.wav|13
+./Data/p229/12.wav|2
+./Data/p228/7.wav|1
+./Data/p243/8.wav|13
+./Data/p227/128.wav|11
+./Data/p230/118.wav|3
+./Data/p227/78.wav|11
+./Data/p244/30.wav|9
+./Data/p231/98.wav|4
+./Data/p230/38.wav|3
+./Data/p244/47.wav|9
+./Data/p270/138.wav|18
+./Data/p259/109.wav|17
+./Data/p270/112.wav|18
+./Data/p227/82.wav|11
+./Data/p228/161.wav|1
+./Data/p273/127.wav|19
+./Data/p232/72.wav|12
+./Data/p227/95.wav|11
+./Data/p236/105.wav|6
+./Data/p239/52.wav|7
+./Data/p273/135.wav|19
+./Data/p236/136.wav|6
+./Data/p228/113.wav|1
+./Data/p229/56.wav|2
+./Data/p240/34.wav|8
+./Data/p230/79.wav|3
+./Data/p232/48.wav|12
+./Data/p240/101.wav|8
+./Data/p229/112.wav|2
+./Data/p273/46.wav|19
+./Data/p273/27.wav|19
+./Data/p239/103.wav|7
+./Data/p259/117.wav|17
+./Data/p230/37.wav|3
+./Data/p233/138.wav|5
+./Data/p228/125.wav|1
+./Data/p230/115.wav|3
+./Data/p240/42.wav|8
+./Data/p231/99.wav|4
+./Data/p236/54.wav|6
+./Data/p233/104.wav|5
+./Data/p270/4.wav|18
+./Data/p226/122.wav|10
+./Data/p230/56.wav|3
+./Data/p244/58.wav|9
+./Data/p229/133.wav|2
+./Data/p270/64.wav|18
+./Data/p225/88.wav|0
+./Data/p240/104.wav|8
+./Data/p244/78.wav|9
+./Data/p254/113.wav|14
+./Data/p259/144.wav|17
+./Data/p236/100.wav|6
+./Data/p230/81.wav|3
+./Data/p259/53.wav|17
+./Data/p239/155.wav|7
+./Data/p236/148.wav|6
+./Data/p270/8.wav|18
+./Data/p225/90.wav|0
+./Data/p236/64.wav|6
+./Data/p236/159.wav|6
+./Data/p232/63.wav|12
+./Data/p244/2.wav|9
+./Data/p258/28.wav|16
+./Data/p259/5.wav|17
+./Data/p225/42.wav|0
+./Data/p256/49.wav|15
+./Data/p233/24.wav|5
+./Data/p270/146.wav|18
+./Data/p243/131.wav|13
+./Data/p229/91.wav|2
+./Data/p229/76.wav|2
+./Data/p227/22.wav|11
+./Data/p244/59.wav|9
+./Data/p236/17.wav|6
+./Data/p240/32.wav|8
+./Data/p232/23.wav|12
+./Data/p230/20.wav|3
+./Data/p232/111.wav|12
+./Data/p230/159.wav|3
+./Data/p244/15.wav|9
+./Data/p229/86.wav|2
+./Data/p240/54.wav|8
+./Data/p229/132.wav|2
+./Data/p239/126.wav|7
+./Data/p240/91.wav|8
+./Data/p244/51.wav|9
+./Data/p254/19.wav|14
+./Data/p244/32.wav|9
+./Data/p258/114.wav|16
+./Data/p254/106.wav|14
+./Data/p243/111.wav|13
+./Data/p226/106.wav|10
+./Data/p244/26.wav|9
+./Data/p225/57.wav|0
+./Data/p243/24.wav|13
+./Data/p259/127.wav|17
+./Data/p256/50.wav|15
+./Data/p239/100.wav|7
+./Data/p273/10.wav|19
+./Data/p229/2.wav|2
+./Data/p270/70.wav|18
+./Data/p254/95.wav|14
+./Data/p256/120.wav|15
+./Data/p233/107.wav|5
+./Data/p226/90.wav|10
+./Data/p258/55.wav|16
+./Data/p233/99.wav|5
+./Data/p230/6.wav|3
+./Data/p273/131.wav|19
+./Data/p273/52.wav|19
+./Data/p236/158.wav|6
+./Data/p232/62.wav|12
+./Data/p233/20.wav|5
+./Data/p270/90.wav|18
+./Data/p240/11.wav|8
+./Data/p258/66.wav|16
+./Data/p258/65.wav|16
+./Data/p270/94.wav|18
+./Data/p270/9.wav|18
+./Data/p228/82.wav|1
+./Data/p236/96.wav|6
+./Data/p229/33.wav|2
+./Data/p229/19.wav|2
+./Data/p239/54.wav|7
+./Data/p232/106.wav|12
+./Data/p231/138.wav|4
+./Data/p230/57.wav|3
+./Data/p270/89.wav|18
+./Data/p273/95.wav|19
+./Data/p231/131.wav|4
+./Data/p236/107.wav|6
+./Data/p228/122.wav|1
+./Data/p226/109.wav|10
+./Data/p270/117.wav|18
+./Data/p230/110.wav|3
+./Data/p270/37.wav|18
+./Data/p225/29.wav|0
+./Data/p233/8.wav|5
+./Data/p227/4.wav|11
+./Data/p232/97.wav|12
+./Data/p243/14.wav|13
+./Data/p254/91.wav|14
+./Data/p256/62.wav|15
+./Data/p229/110.wav|2
+./Data/p233/34.wav|5
+./Data/p226/81.wav|10
+./Data/p230/29.wav|3
+./Data/p240/84.wav|8
+./Data/p270/201.wav|18
+./Data/p239/157.wav|7
+./Data/p270/158.wav|18
+./Data/p236/80.wav|6
+./Data/p232/54.wav|12
+./Data/p239/29.wav|7
+./Data/p225/33.wav|0
+./Data/p273/7.wav|19
+./Data/p273/98.wav|19
+./Data/p227/63.wav|11
+./Data/p230/174.wav|3
+./Data/p270/28.wav|18
+./Data/p233/13.wav|5
+./Data/p273/99.wav|19
+./Data/p229/81.wav|2
+./Data/p273/124.wav|19
+./Data/p230/129.wav|3
+./Data/p259/133.wav|17
+./Data/p270/24.wav|18
+./Data/p226/35.wav|10
+./Data/p236/118.wav|6
+./Data/p254/121.wav|14
+./Data/p270/120.wav|18
+./Data/p231/30.wav|4
+./Data/p240/102.wav|8
+./Data/p243/53.wav|13
+./Data/p230/47.wav|3
+./Data/p233/55.wav|5
+./Data/p226/11.wav|10
+./Data/p239/120.wav|7
+./Data/p226/49.wav|10
+./Data/p239/44.wav|7
+./Data/p244/140.wav|9
+./Data/p258/63.wav|16
+./Data/p232/52.wav|12
+./Data/p273/109.wav|19
+./Data/p259/72.wav|17
+./Data/p259/164.wav|17
+./Data/p256/78.wav|15
+./Data/p243/107.wav|13
+./Data/p258/62.wav|16
+./Data/p239/31.wav|7
+./Data/p256/41.wav|15
+./Data/p273/63.wav|19
+./Data/p258/112.wav|16
+./Data/p243/116.wav|13
+./Data/p254/29.wav|14
+./Data/p229/45.wav|2
+./Data/p244/101.wav|9
+./Data/p232/34.wav|12
+./Data/p243/154.wav|13
+./Data/p231/33.wav|4
+./Data/p243/35.wav|13
+./Data/p236/38.wav|6
+./Data/p270/16.wav|18
+./Data/p270/187.wav|18
+./Data/p239/114.wav|7
+./Data/p244/24.wav|9
+./Data/p228/75.wav|1
+./Data/p226/26.wav|10
+./Data/p259/136.wav|17
+./Data/p236/147.wav|6
+./Data/p239/135.wav|7
+./Data/p270/43.wav|18
+./Data/p244/132.wav|9
+./Data/p243/129.wav|13
+./Data/p236/9.wav|6
+./Data/p232/109.wav|12
+./Data/p225/84.wav|0
+./Data/p227/27.wav|11
+./Data/p259/8.wav|17
+./Data/p259/67.wav|17
+./Data/p239/57.wav|7
+./Data/p243/69.wav|13
+./Data/p231/62.wav|4
+./Data/p259/140.wav|17
+./Data/p227/66.wav|11
+./Data/p230/44.wav|3
+./Data/p229/63.wav|2
+./Data/p256/4.wav|15
+./Data/p258/24.wav|16
+./Data/p240/80.wav|8
+./Data/p270/72.wav|18
+./Data/p240/47.wav|8
+./Data/p229/98.wav|2
+./Data/p244/111.wav|9
+./Data/p231/111.wav|4
+./Data/p243/91.wav|13
+./Data/p239/36.wav|7
+./Data/p259/103.wav|17
+./Data/p232/2.wav|12
+./Data/p236/3.wav|6
+./Data/p236/57.wav|6
+./Data/p233/109.wav|5
+./Data/p236/122.wav|6
+./Data/p270/76.wav|18
+./Data/p243/167.wav|13
+./Data/p228/20.wav|1
+./Data/p243/72.wav|13
+./Data/p239/2.wav|7
+./Data/p226/21.wav|10
+./Data/p256/43.wav|15
+./Data/p259/129.wav|17
+./Data/p231/15.wav|4
+./Data/p231/85.wav|4
+./Data/p226/29.wav|10
+./Data/p230/131.wav|3
+./Data/p259/97.wav|17
+./Data/p240/68.wav|8
+./Data/p233/84.wav|5
+./Data/p236/10.wav|6
+./Data/p244/120.wav|9
+./Data/p270/18.wav|18
+./Data/p231/24.wav|4
+./Data/p256/37.wav|15
+./Data/p233/11.wav|5
+./Data/p230/93.wav|3
+./Data/p230/73.wav|3
+./Data/p239/66.wav|7
+./Data/p230/40.wav|3
+./Data/p228/13.wav|1
+./Data/p231/49.wav|4
+./Data/p270/62.wav|18
+./Data/p236/78.wav|6
+./Data/p258/73.wav|16
+./Data/p236/35.wav|6
+./Data/p254/120.wav|14
+./Data/p258/53.wav|16
+./Data/p227/16.wav|11
+./Data/p232/33.wav|12
+./Data/p256/42.wav|15
+./Data/p233/68.wav|5
+./Data/p225/74.wav|0
+./Data/p244/127.wav|9
+./Data/p243/118.wav|13
+./Data/p273/128.wav|19
+./Data/p239/7.wav|7
+./Data/p243/50.wav|13
+./Data/p226/23.wav|10
+./Data/p270/199.wav|18
+./Data/p254/45.wav|14
+./Data/p254/11.wav|14
+./Data/p244/66.wav|9
+./Data/p270/152.wav|18
+./Data/p227/131.wav|11
+./Data/p270/38.wav|18
+./Data/p229/57.wav|2
+./Data/p227/35.wav|11
+./Data/p244/7.wav|9
+./Data/p226/32.wav|10
+./Data/p230/152.wav|3
+./Data/p239/161.wav|7
+./Data/p256/123.wav|15
+./Data/p231/14.wav|4
+./Data/p243/38.wav|13
+./Data/p229/102.wav|2
+./Data/p229/38.wav|2
+./Data/p233/116.wav|5
+./Data/p254/35.wav|14
+./Data/p254/118.wav|14
+./Data/p225/15.wav|0
+./Data/p230/132.wav|3
+./Data/p273/84.wav|19
+./Data/p254/122.wav|14
+./Data/p273/3.wav|19
+./Data/p270/68.wav|18
+./Data/p232/42.wav|12
+./Data/p225/93.wav|0
+./Data/p227/34.wav|11
+./Data/p270/22.wav|18
+./Data/p231/4.wav|4
+./Data/p227/125.wav|11
+./Data/p244/95.wav|9
+./Data/p236/18.wav|6
+./Data/p273/25.wav|19
+./Data/p259/169.wav|17
+./Data/p233/56.wav|5
+./Data/p270/203.wav|18
+./Data/p259/41.wav|17
+./Data/p233/38.wav|5
+./Data/p229/22.wav|2
+./Data/p256/17.wav|15
+./Data/p270/3.wav|18
+./Data/p231/5.wav|4
+./Data/p240/60.wav|8
+./Data/p227/21.wav|11
+./Data/p259/1.wav|17
+./Data/p259/4.wav|17
+./Data/p232/11.wav|12
+./Data/p259/114.wav|17
+./Data/p226/45.wav|10
+./Data/p236/27.wav|6
+./Data/p239/47.wav|7
+./Data/p244/85.wav|9
+./Data/p243/87.wav|13
+./Data/p258/89.wav|16
+./Data/p233/57.wav|5
+./Data/p228/78.wav|1
+./Data/p256/60.wav|15
+./Data/p232/83.wav|12
+./Data/p232/88.wav|12
+./Data/p231/120.wav|4
+./Data/p226/101.wav|10
+./Data/p236/102.wav|6
+./Data/p226/123.wav|10
+./Data/p259/85.wav|17
+./Data/p227/124.wav|11
+./Data/p259/80.wav|17
+./Data/p227/10.wav|11
+./Data/p233/26.wav|5
+./Data/p273/75.wav|19
+./Data/p243/73.wav|13
+./Data/p244/22.wav|9
+./Data/p243/126.wav|13
+./Data/p244/108.wav|9
+./Data/p243/134.wav|13
+./Data/p226/100.wav|10
+./Data/p231/123.wav|4
+./Data/p228/47.wav|1
+./Data/p243/42.wav|13
+./Data/p233/131.wav|5
+./Data/p273/2.wav|19
+./Data/p254/24.wav|14
+./Data/p236/123.wav|6
+./Data/p240/24.wav|8
+./Data/p244/63.wav|9
+./Data/p236/149.wav|6
+./Data/p236/83.wav|6
+./Data/p258/131.wav|16
+./Data/p243/120.wav|13
+./Data/p259/159.wav|17
+./Data/p258/8.wav|16
+./Data/p258/34.wav|16
+./Data/p243/33.wav|13
+./Data/p256/18.wav|15
+./Data/p232/73.wav|12
+./Data/p244/49.wav|9
+./Data/p258/12.wav|16
+./Data/p225/18.wav|0
+./Data/p258/68.wav|16
+./Data/p270/134.wav|18
+./Data/p228/54.wav|1
+./Data/p236/139.wav|6
+./Data/p225/6.wav|0
+./Data/p259/57.wav|17
+./Data/p243/70.wav|13
+./Data/p240/122.wav|8
+./Data/p259/69.wav|17
+./Data/p258/124.wav|16
+./Data/p226/138.wav|10
+./Data/p231/51.wav|4
+./Data/p259/126.wav|17
+./Data/p227/119.wav|11
+./Data/p254/136.wav|14
+./Data/p240/107.wav|8
+./Data/p254/4.wav|14
+./Data/p228/117.wav|1
+./Data/p244/92.wav|9
+./Data/p239/151.wav|7
+./Data/p259/131.wav|17
+./Data/p273/96.wav|19
+./Data/p254/69.wav|14
+./Data/p259/16.wav|17
+./Data/p244/86.wav|9
+./Data/p236/30.wav|6
+./Data/p230/34.wav|3
+./Data/p230/142.wav|3
+./Data/p244/37.wav|9
+./Data/p239/40.wav|7
+./Data/p232/87.wav|12
+./Data/p270/115.wav|18
+./Data/p232/124.wav|12
+./Data/p233/127.wav|5
+./Data/p228/70.wav|1
+./Data/p254/66.wav|14
+./Data/p232/16.wav|12
+./Data/p256/109.wav|15
+./Data/p243/169.wav|13
+./Data/p228/112.wav|1
+./Data/p254/82.wav|14
+./Data/p231/119.wav|4
+./Data/p236/59.wav|6
+./Data/p239/69.wav|7
+./Data/p225/12.wav|0
+./Data/p232/18.wav|12
+./Data/p229/32.wav|2
+./Data/p228/126.wav|1
+./Data/p270/171.wav|18
+./Data/p236/13.wav|6
+./Data/p228/12.wav|1
+./Data/p228/96.wav|1
+./Data/p256/11.wav|15
+./Data/p233/83.wav|5
+./Data/p256/99.wav|15
+./Data/p225/69.wav|0
+./Data/p254/7.wav|14
+./Data/p227/59.wav|11
+./Data/p273/136.wav|19
+./Data/p239/3.wav|7
+./Data/p256/119.wav|15
+./Data/p226/99.wav|10
+./Data/p256/56.wav|15
+./Data/p243/82.wav|13
+./Data/p227/69.wav|11
+./Data/p273/29.wav|19
+./Data/p233/100.wav|5
+./Data/p230/173.wav|3
+./Data/p240/132.wav|8
+./Data/p239/143.wav|7
+./Data/p231/40.wav|4
+./Data/p256/10.wav|15
+./Data/p229/75.wav|2
+./Data/p240/15.wav|8
+./Data/p228/102.wav|1
+./Data/p270/52.wav|18
+./Data/p270/7.wav|18
+./Data/p270/164.wav|18
+./Data/p233/91.wav|5
+./Data/p244/27.wav|9
+./Data/p244/48.wav|9
+./Data/p239/24.wav|7
+./Data/p226/113.wav|10
+./Data/p227/72.wav|11
+./Data/p270/67.wav|18
+./Data/p231/25.wav|4
+./Data/p229/120.wav|2
+./Data/p273/67.wav|19
+./Data/p230/67.wav|3
+./Data/p227/120.wav|11
+./Data/p239/121.wav|7
+./Data/p228/88.wav|1
+./Data/p254/15.wav|14
+./Data/p270/114.wav|18
+./Data/p254/14.wav|14
+./Data/p259/75.wav|17
+./Data/p236/126.wav|6
+./Data/p228/92.wav|1
+./Data/p230/127.wav|3
+./Data/p229/93.wav|2
+./Data/p233/82.wav|5
+./Data/p239/122.wav|7
+./Data/p229/72.wav|2
+./Data/p232/131.wav|12
+./Data/p239/67.wav|7
+./Data/p225/36.wav|0
+./Data/p254/3.wav|14
+./Data/p244/109.wav|9
+./Data/p230/112.wav|3
+./Data/p230/5.wav|3
+./Data/p256/87.wav|15
+./Data/p232/15.wav|12
+./Data/p244/67.wav|9
+./Data/p236/48.wav|6
+./Data/p232/110.wav|12
+./Data/p243/156.wav|13
+./Data/p231/140.wav|4
+./Data/p239/89.wav|7
+./Data/p229/53.wav|2
+./Data/p256/97.wav|15
+./Data/p256/79.wav|15
+./Data/p236/6.wav|6
+./Data/p236/106.wav|6
+./Data/p227/15.wav|11
+./Data/p273/20.wav|19
+./Data/p239/49.wav|7
+./Data/p254/134.wav|14
+./Data/p228/4.wav|1
+./Data/p227/117.wav|11
+./Data/p259/7.wav|17
+./Data/p258/91.wav|16
+./Data/p259/128.wav|17
+./Data/p236/61.wav|6
+./Data/p230/165.wav|3
+./Data/p225/20.wav|0
+./Data/p232/122.wav|12
+./Data/p230/130.wav|3
+./Data/p228/58.wav|1
+./Data/p227/38.wav|11
+./Data/p239/34.wav|7
+./Data/p240/137.wav|8
+./Data/p258/90.wav|16
+./Data/p258/138.wav|16
+./Data/p244/124.wav|9
+./Data/p239/167.wav|7
+./Data/p233/90.wav|5
+./Data/p239/172.wav|7
+./Data/p254/97.wav|14
+./Data/p259/29.wav|17
+./Data/p229/92.wav|2
+./Data/p227/11.wav|11
+./Data/p258/118.wav|16
+./Data/p244/69.wav|9
+./Data/p232/3.wav|12
+./Data/p256/28.wav|15
+./Data/p229/49.wav|2
+./Data/p236/82.wav|6
+./Data/p239/171.wav|7
+./Data/p254/127.wav|14
+./Data/p259/43.wav|17
+./Data/p228/21.wav|1
+./Data/p256/74.wav|15
+./Data/p226/76.wav|10
+./Data/p243/170.wav|13
+./Data/p239/39.wav|7
+./Data/p233/124.wav|5
+./Data/p229/13.wav|2
+./Data/p231/71.wav|4
+./Data/p229/118.wav|2
+./Data/p231/88.wav|4
+./Data/p231/55.wav|4
+./Data/p270/104.wav|18
+./Data/p270/110.wav|18
+./Data/p228/41.wav|1
+./Data/p258/2.wav|16
+./Data/p230/78.wav|3
+./Data/p231/80.wav|4
+./Data/p243/9.wav|13
+./Data/p239/16.wav|7
+./Data/p239/76.wav|7
+./Data/p226/126.wav|10
+./Data/p226/63.wav|10
+./Data/p233/46.wav|5
+./Data/p270/202.wav|18
+./Data/p239/164.wav|7
+./Data/p231/22.wav|4
+./Data/p259/24.wav|17
+./Data/p256/73.wav|15
+./Data/p259/10.wav|17
+./Data/p232/94.wav|12
+./Data/p273/30.wav|19
+./Data/p244/29.wav|9
+./Data/p226/129.wav|10
+./Data/p243/81.wav|13
+./Data/p236/121.wav|6
+./Data/p228/89.wav|1
+./Data/p231/81.wav|4
+./Data/p243/57.wav|13
+./Data/p236/40.wav|6
+./Data/p226/89.wav|10
+./Data/p244/44.wav|9
+./Data/p254/88.wav|14
+./Data/p227/108.wav|11
+./Data/p258/123.wav|16
+./Data/p233/95.wav|5
+./Data/p259/142.wav|17
+./Data/p231/73.wav|4
+./Data/p258/52.wav|16
+./Data/p236/89.wav|6
+./Data/p229/67.wav|2
+./Data/p258/46.wav|16
+./Data/p231/132.wav|4
+./Data/p227/41.wav|11
+./Data/p256/114.wav|15
+./Data/p232/10.wav|12
+./Data/p225/46.wav|0
+./Data/p231/61.wav|4
+./Data/p229/30.wav|2
+./Data/p236/101.wav|6
+./Data/p256/20.wav|15
+./Data/p226/60.wav|10
+./Data/p259/18.wav|17
+./Data/p236/151.wav|6
+./Data/p233/130.wav|5
+./Data/p273/91.wav|19
+./Data/p225/59.wav|0
+./Data/p227/83.wav|11
+./Data/p226/127.wav|10
+./Data/p270/137.wav|18
+./Data/p258/95.wav|16
+./Data/p227/42.wav|11
+./Data/p230/108.wav|3
+./Data/p243/137.wav|13
+./Data/p228/157.wav|1
+./Data/p243/105.wav|13
+./Data/p228/133.wav|1
+./Data/p270/93.wav|18
+./Data/p256/86.wav|15
+./Data/p254/17.wav|14
+./Data/p227/135.wav|11
+./Data/p228/118.wav|1
+./Data/p239/142.wav|7
+./Data/p273/137.wav|19
+./Data/p259/79.wav|17
+./Data/p259/108.wav|17
+./Data/p226/15.wav|10
+./Data/p231/43.wav|4
+./Data/p256/16.wav|15
+./Data/p232/20.wav|12
+./Data/p258/35.wav|16
+./Data/p243/141.wav|13
+./Data/p232/104.wav|12
+./Data/p259/58.wav|17
+./Data/p258/82.wav|16
+./Data/p233/76.wav|5
+./Data/p270/126.wav|18
+./Data/p236/70.wav|6
+./Data/p240/49.wav|8
+./Data/p256/106.wav|15
+./Data/p254/55.wav|14
+./Data/p270/2.wav|18
+./Data/p270/143.wav|18
+./Data/p229/48.wav|2
+./Data/p244/6.wav|9
+./Data/p233/65.wav|5
+./Data/p233/18.wav|5
+./Data/p244/87.wav|9
+./Data/p236/133.wav|6
+./Data/p227/2.wav|11
+./Data/p227/17.wav|11
+./Data/p273/111.wav|19
+./Data/p230/98.wav|3
+./Data/p226/120.wav|10
+./Data/p226/112.wav|10
+./Data/p230/161.wav|3
+./Data/p254/79.wav|14
+./Data/p230/101.wav|3
+./Data/p239/96.wav|7
+./Data/p228/159.wav|1
+./Data/p230/24.wav|3
+./Data/p240/28.wav|8
+./Data/p254/125.wav|14
+./Data/p259/168.wav|17
+./Data/p228/18.wav|1
+./Data/p270/88.wav|18
+./Data/p270/25.wav|18
+./Data/p231/89.wav|4
+./Data/p230/14.wav|3
+./Data/p254/63.wav|14
+./Data/p233/53.wav|5
+./Data/p225/54.wav|0
+./Data/p243/19.wav|13
+./Data/p259/139.wav|17
+./Data/p229/87.wav|2
+./Data/p232/56.wav|12
+./Data/p270/97.wav|18
+./Data/p232/95.wav|12
+./Data/p232/86.wav|12
+./Data/p259/137.wav|17
+./Data/p228/147.wav|1
+./Data/p273/112.wav|19
+./Data/p243/80.wav|13
+./Data/p233/72.wav|5
+./Data/p233/114.wav|5
+./Data/p240/23.wav|8
+./Data/p236/164.wav|6
+./Data/p236/144.wav|6
+./Data/p254/116.wav|14
+./Data/p273/105.wav|19
+./Data/p239/48.wav|7
+./Data/p236/68.wav|6
+./Data/p233/87.wav|5
+./Data/p239/50.wav|7
+./Data/p256/66.wav|15
+./Data/p270/159.wav|18
+./Data/p273/53.wav|19
+./Data/p254/28.wav|14
+./Data/p259/28.wav|17
+./Data/p227/89.wav|11
+./Data/p243/1.wav|13
+./Data/p239/61.wav|7
+./Data/p226/28.wav|10
+./Data/p232/113.wav|12
+./Data/p225/38.wav|0
+./Data/p236/128.wav|6
+./Data/p225/3.wav|0
+./Data/p258/83.wav|16
+./Data/p270/195.wav|18
+./Data/p231/69.wav|4
+./Data/p254/49.wav|14
+./Data/p226/135.wav|10
+./Data/p230/3.wav|3
+./Data/p228/124.wav|1
+./Data/p233/119.wav|5
+./Data/p229/31.wav|2
+./Data/p256/54.wav|15
+./Data/p258/121.wav|16
+./Data/p231/57.wav|4
+./Data/p244/84.wav|9
+./Data/p244/113.wav|9
+./Data/p228/71.wav|1
+./Data/p270/86.wav|18
+./Data/p254/98.wav|14
+./Data/p225/19.wav|0
+./Data/p258/21.wav|16
+./Data/p259/60.wav|17
+./Data/p227/105.wav|11
+./Data/p258/142.wav|16
+./Data/p230/52.wav|3
+./Data/p227/6.wav|11
+./Data/p244/139.wav|9
+./Data/p226/128.wav|10
+./Data/p239/70.wav|7
+./Data/p273/28.wav|19
+./Data/p230/171.wav|3
+./Data/p270/113.wav|18
+./Data/p259/19.wav|17
+./Data/p225/68.wav|0
+./Data/p239/73.wav|7
+./Data/p254/44.wav|14
+./Data/p240/113.wav|8
+./Data/p244/77.wav|9
+./Data/p259/49.wav|17
+./Data/p225/86.wav|0
+./Data/p258/94.wav|16
+./Data/p244/17.wav|9
+./Data/p227/12.wav|11
+./Data/p239/150.wav|7
+./Data/p225/10.wav|0
+./Data/p230/114.wav|3
+./Data/p258/69.wav|16
+./Data/p231/117.wav|4
+./Data/p244/23.wav|9
+./Data/p273/60.wav|19
+./Data/p259/156.wav|17
+./Data/p239/158.wav|7
+./Data/p244/102.wav|9
+./Data/p236/85.wav|6
+./Data/p259/2.wav|17
+./Data/p259/83.wav|17
+./Data/p226/40.wav|10
+./Data/p270/34.wav|18
+./Data/p240/99.wav|8
+./Data/p259/95.wav|17
+./Data/p240/79.wav|8
+./Data/p239/102.wav|7
+./Data/p273/57.wav|19
+./Data/p243/85.wav|13
+./Data/p239/149.wav|7
+./Data/p232/28.wav|12
+./Data/p254/25.wav|14
+./Data/p233/42.wav|5
+./Data/p227/39.wav|11
+./Data/p270/77.wav|18
+./Data/p233/51.wav|5
+./Data/p256/100.wav|15
+./Data/p258/140.wav|16
+./Data/p229/131.wav|2
+./Data/p243/52.wav|13
+./Data/p258/84.wav|16
+./Data/p229/138.wav|2
+./Data/p240/61.wav|8
+./Data/p254/27.wav|14
+./Data/p232/21.wav|12
+./Data/p226/38.wav|10
+./Data/p230/158.wav|3
+./Data/p256/52.wav|15
+./Data/p243/95.wav|13
+./Data/p243/89.wav|13
+./Data/p226/61.wav|10
+./Data/p230/117.wav|3
+./Data/p230/92.wav|3
+./Data/p236/55.wav|6
+./Data/p254/18.wav|14
+./Data/p254/129.wav|14
+./Data/p259/113.wav|17
+./Data/p225/25.wav|0
+./Data/p240/134.wav|8
+./Data/p230/86.wav|3
+./Data/p256/84.wav|15
+./Data/p228/99.wav|1
+./Data/p239/90.wav|7
+./Data/p230/155.wav|3
+./Data/p228/40.wav|1
+./Data/p254/72.wav|14
+./Data/p231/38.wav|4
+./Data/p225/32.wav|0
+./Data/p228/22.wav|1
+./Data/p231/7.wav|4
+./Data/p254/39.wav|14
+./Data/p240/112.wav|8
+./Data/p270/183.wav|18
+./Data/p270/60.wav|18
+./Data/p236/120.wav|6
+./Data/p239/145.wav|7
+./Data/p240/31.wav|8
+./Data/p229/115.wav|2
+./Data/p233/121.wav|5
+./Data/p228/33.wav|1
+./Data/p228/83.wav|1
+./Data/p258/58.wav|16
+./Data/p239/106.wav|7
+./Data/p273/123.wav|19
+./Data/p244/50.wav|9
+./Data/p229/50.wav|2
+./Data/p270/131.wav|18
+./Data/p236/8.wav|6
+./Data/p244/114.wav|9
+./Data/p230/153.wav|3
+./Data/p226/53.wav|10
+./Data/p240/93.wav|8
+./Data/p229/122.wav|2
+./Data/p256/90.wav|15
+./Data/p231/112.wav|4
+./Data/p270/48.wav|18
+./Data/p230/36.wav|3
+./Data/p230/135.wav|3
+./Data/p259/172.wav|17
+./Data/p229/55.wav|2
+./Data/p244/60.wav|9
+./Data/p232/75.wav|12
+./Data/p259/68.wav|17
+./Data/p233/7.wav|5
+./Data/p233/3.wav|5
+./Data/p226/141.wav|10
+./Data/p254/32.wav|14
+./Data/p239/26.wav|7
+./Data/p226/119.wav|10
+./Data/p239/173.wav|7
+./Data/p230/157.wav|3
+./Data/p236/157.wav|6
+./Data/p226/13.wav|10
+./Data/p254/68.wav|14
+./Data/p225/87.wav|0
+./Data/p231/118.wav|4
+./Data/p240/98.wav|8
+./Data/p233/5.wav|5
+./Data/p227/56.wav|11
+./Data/p239/93.wav|7
+./Data/p240/25.wav|8
+./Data/p243/142.wav|13
+./Data/p254/110.wav|14
+./Data/p230/138.wav|3
+./Data/p226/16.wav|10
+./Data/p270/189.wav|18
+./Data/p229/95.wav|2
+./Data/p231/37.wav|4
+./Data/p240/44.wav|8
+./Data/p228/46.wav|1
+./Data/p236/62.wav|6
+./Data/p226/20.wav|10
+./Data/p228/105.wav|1
+./Data/p258/44.wav|16
+./Data/p258/23.wav|16
+./Data/p270/108.wav|18
+./Data/p243/151.wav|13
+./Data/p239/170.wav|7
+./Data/p244/100.wav|9
+./Data/p258/81.wav|16
+./Data/p236/153.wav|6
+./Data/p229/5.wav|2
+./Data/p256/112.wav|15
+./Data/p258/70.wav|16
+./Data/p240/57.wav|8
+./Data/p244/36.wav|9
+./Data/p273/19.wav|19
+./Data/p233/75.wav|5
+./Data/p259/111.wav|17
+./Data/p243/100.wav|13
+./Data/p226/86.wav|10
+./Data/p256/26.wav|15
+./Data/p236/22.wav|6
+./Data/p229/124.wav|2
+./Data/p229/62.wav|2
+./Data/p258/87.wav|16
+./Data/p232/22.wav|12
+./Data/p259/158.wav|17
+./Data/p229/135.wav|2
+./Data/p233/118.wav|5
+./Data/p236/134.wav|6
+./Data/p226/34.wav|10
+./Data/p236/93.wav|6
+./Data/p243/108.wav|13
+./Data/p270/177.wav|18
+./Data/p239/30.wav|7
+./Data/p273/17.wav|19
+./Data/p231/110.wav|4
+./Data/p229/119.wav|2
+./Data/p243/130.wav|13
+./Data/p256/127.wav|15
+./Data/p226/105.wav|10
+./Data/p229/52.wav|2
+./Data/p226/54.wav|10
+./Data/p273/87.wav|19
+./Data/p270/57.wav|18
+./Data/p240/131.wav|8
+./Data/p273/117.wav|19
+./Data/p240/77.wav|8
+./Data/p233/32.wav|5
+./Data/p236/25.wav|6
+./Data/p227/79.wav|11
+./Data/p258/64.wav|16
+./Data/p240/92.wav|8
+./Data/p244/74.wav|9
+./Data/p228/120.wav|1
+./Data/p230/45.wav|3
+./Data/p225/89.wav|0
+./Data/p226/95.wav|10
+./Data/p270/80.wav|18
+./Data/p226/111.wav|10
+./Data/p243/2.wav|13
+./Data/p259/6.wav|17
+./Data/p227/85.wav|11
+./Data/p233/106.wav|5
+./Data/p227/14.wav|11
+./Data/p231/50.wav|4
+./Data/p230/139.wav|3
+./Data/p229/70.wav|2
+./Data/p258/14.wav|16
+./Data/p240/116.wav|8
+./Data/p225/64.wav|0
+./Data/p225/8.wav|0
+./Data/p243/113.wav|13
+./Data/p254/102.wav|14
+./Data/p270/148.wav|18
+./Data/p232/12.wav|12
+./Data/p259/22.wav|17
+./Data/p273/4.wav|19
+./Data/p244/133.wav|9
+./Data/p228/101.wav|1
+./Data/p273/31.wav|19
+./Data/p258/76.wav|16
+./Data/p227/146.wav|11
+./Data/p231/54.wav|4
+./Data/p236/37.wav|6
+./Data/p244/82.wav|9
+./Data/p225/17.wav|0
+./Data/p243/76.wav|13
+./Data/p273/140.wav|19
+./Data/p239/15.wav|7
+./Data/p230/19.wav|3
+./Data/p240/117.wav|8
+./Data/p244/94.wav|9
+./Data/p236/26.wav|6
+./Data/p259/99.wav|17
+./Data/p225/77.wav|0
+./Data/p244/31.wav|9
+./Data/p244/98.wav|9
+./Data/p243/59.wav|13
+./Data/p228/163.wav|1
+./Data/p270/141.wav|18
+./Data/p230/94.wav|3
+./Data/p228/110.wav|1
+./Data/p243/160.wav|13
+./Data/p239/162.wav|7
+./Data/p232/112.wav|12
+./Data/p273/54.wav|19
+./Data/p259/110.wav|17
+./Data/p244/64.wav|9
+./Data/p259/170.wav|17
+./Data/p230/53.wav|3
+./Data/p228/8.wav|1
+./Data/p232/80.wav|12
+./Data/p273/56.wav|19
+./Data/p256/93.wav|15
+./Data/p258/50.wav|16
+./Data/p231/41.wav|4
+./Data/p236/76.wav|6
+./Data/p229/65.wav|2
+./Data/p243/46.wav|13
+./Data/p228/31.wav|1
+./Data/p240/89.wav|8
+./Data/p240/119.wav|8
+./Data/p243/31.wav|13
+./Data/p273/122.wav|19
+./Data/p236/113.wav|6
+./Data/p232/67.wav|12
+./Data/p270/188.wav|18
+./Data/p256/46.wav|15
+./Data/p230/12.wav|3
+./Data/p236/156.wav|6
+./Data/p243/157.wav|13
+./Data/p239/22.wav|7
+./Data/p232/107.wav|12
+./Data/p229/28.wav|2
+./Data/p236/31.wav|6
+./Data/p254/50.wav|14
+./Data/p232/43.wav|12
+./Data/p244/142.wav|9
+./Data/p270/186.wav|18
+./Data/p258/139.wav|16
+./Data/p228/156.wav|1
+./Data/p256/12.wav|15
+./Data/p256/63.wav|15
+./Data/p230/116.wav|3
+./Data/p254/131.wav|14
+./Data/p243/139.wav|13
+./Data/p226/93.wav|10
+./Data/p239/98.wav|7
+./Data/p256/94.wav|15
+./Data/p243/102.wav|13
+./Data/p240/3.wav|8
+./Data/p258/86.wav|16
+./Data/p227/28.wav|11
+./Data/p228/49.wav|1
+./Data/p270/47.wav|18
+./Data/p226/88.wav|10
+./Data/p232/36.wav|12
+./Data/p259/90.wav|17
+./Data/p244/5.wav|9
+./Data/p243/122.wav|13
+./Data/p254/10.wav|14
+./Data/p254/64.wav|14
+./Data/p273/47.wav|19
+./Data/p243/171.wav|13
+./Data/p243/4.wav|13
+./Data/p230/128.wav|3
+./Data/p229/84.wav|2
+./Data/p259/39.wav|17
+./Data/p236/5.wav|6
+./Data/p225/47.wav|0
+./Data/p258/134.wav|16
+./Data/p259/12.wav|17
+./Data/p244/9.wav|9
+./Data/p227/98.wav|11
+./Data/p227/65.wav|11
+./Data/p226/114.wav|10
+./Data/p229/113.wav|2
+./Data/p240/16.wav|8
+./Data/p227/118.wav|11
+./Data/p258/56.wav|16
+./Data/p270/150.wav|18
+./Data/p256/85.wav|15
+./Data/p259/92.wav|17
+./Data/p239/84.wav|7
+./Data/p240/86.wav|8
+./Data/p225/11.wav|0
+./Data/p226/25.wav|10
+./Data/p270/65.wav|18
+./Data/p239/79.wav|7
+./Data/p240/76.wav|8
+./Data/p270/190.wav|18
+./Data/p236/163.wav|6
+./Data/p236/36.wav|6
+./Data/p240/41.wav|8
+./Data/p226/2.wav|10
+./Data/p230/104.wav|3
+./Data/p243/106.wav|13
+./Data/p243/90.wav|13
+./Data/p240/27.wav|8
+./Data/p240/30.wav|8
+./Data/p231/121.wav|4
+./Data/p239/8.wav|7
+./Data/p230/10.wav|3
+./Data/p239/104.wav|7
+./Data/p233/96.wav|5
+./Data/p236/33.wav|6
+./Data/p254/85.wav|14
+./Data/p227/26.wav|11
+./Data/p233/134.wav|5
+./Data/p230/48.wav|3
+./Data/p232/59.wav|12
+./Data/p239/156.wav|7
+./Data/p236/84.wav|6
+./Data/p228/63.wav|1
+./Data/p229/24.wav|2
+./Data/p236/155.wav|6
+./Data/p228/138.wav|1
+./Data/p270/185.wav|18
+./Data/p228/76.wav|1
+./Data/p254/115.wav|14
+./Data/p231/52.wav|4
+./Data/p273/8.wav|19
+./Data/p228/132.wav|1
+./Data/p273/59.wav|19
+./Data/p229/73.wav|2
+./Data/p259/152.wav|17
+./Data/p230/31.wav|3
+./Data/p230/35.wav|3
+./Data/p258/80.wav|16
+./Data/p225/61.wav|0
+./Data/p236/21.wav|6
+./Data/p232/127.wav|12
+./Data/p256/72.wav|15
+./Data/p244/123.wav|9
+./Data/p244/141.wav|9
+./Data/p270/69.wav|18
+./Data/p227/51.wav|11
+./Data/p273/11.wav|19
+./Data/p243/112.wav|13
+./Data/p254/16.wav|14
+./Data/p226/3.wav|10
+./Data/p231/36.wav|4
+./Data/p243/159.wav|13
+./Data/p228/55.wav|1
+./Data/p229/18.wav|2
+./Data/p273/22.wav|19
+./Data/p270/101.wav|18
+./Data/p227/62.wav|11
+./Data/p270/111.wav|18
+./Data/p254/73.wav|14
+./Data/p256/81.wav|15
+./Data/p226/116.wav|10
+./Data/p236/154.wav|6
+./Data/p233/98.wav|5
+./Data/p239/68.wav|7
+./Data/p273/69.wav|19
+./Data/p236/92.wav|6
+./Data/p273/81.wav|19
+./Data/p225/43.wav|0
+./Data/p230/27.wav|3
+./Data/p227/54.wav|11
+./Data/p233/113.wav|5
+./Data/p236/23.wav|6
+./Data/p236/51.wav|6
+./Data/p233/50.wav|5
+./Data/p225/76.wav|0
+./Data/p244/21.wav|9
+./Data/p228/53.wav|1
+./Data/p240/148.wav|8
+./Data/p243/173.wav|13
+./Data/p270/105.wav|18
+./Data/p227/13.wav|11
+./Data/p228/121.wav|1
+./Data/p233/128.wav|5
+./Data/p256/82.wav|15
+./Data/p244/76.wav|9
+./Data/p232/9.wav|12
+./Data/p239/4.wav|7
+./Data/p240/106.wav|8
+./Data/p270/81.wav|18
+./Data/p225/48.wav|0
+./Data/p254/67.wav|14
+./Data/p240/66.wav|8
+./Data/p259/47.wav|17
+./Data/p230/63.wav|3
+./Data/p230/141.wav|3
+./Data/p231/137.wav|4
+./Data/p227/133.wav|11
+./Data/p259/100.wav|17
+./Data/p259/171.wav|17
+./Data/p240/56.wav|8
+./Data/p273/126.wav|19
+./Data/p256/32.wav|15
+./Data/p270/79.wav|18
+./Data/p227/46.wav|11
+./Data/p228/51.wav|1
+./Data/p243/54.wav|13
+./Data/p258/141.wav|16
+./Data/p226/31.wav|10
+./Data/p236/137.wav|6
+./Data/p230/30.wav|3
+./Data/p236/34.wav|6
+./Data/p228/35.wav|1
+./Data/p244/56.wav|9
+./Data/p230/107.wav|3
+./Data/p240/36.wav|8
+./Data/p233/62.wav|5
+./Data/p239/112.wav|7
+./Data/p231/42.wav|4
+./Data/p256/9.wav|15
+./Data/p227/23.wav|11
+./Data/p236/32.wav|6
+./Data/p228/67.wav|1
+./Data/p225/72.wav|0
+./Data/p232/82.wav|12
+./Data/p244/68.wav|9
+./Data/p230/145.wav|3
+./Data/p239/5.wav|7
+./Data/p230/154.wav|3
+./Data/p232/98.wav|12
+./Data/p243/136.wav|13
+./Data/p228/115.wav|1
+./Data/p226/5.wav|10
+./Data/p240/52.wav|8
+./Data/p270/170.wav|18
+./Data/p243/93.wav|13
+./Data/p243/26.wav|13
+./Data/p230/136.wav|3
+./Data/p226/97.wav|10
+./Data/p229/136.wav|2
+./Data/p227/136.wav|11
+./Data/p236/119.wav|6
+./Data/p232/14.wav|12
+./Data/p254/138.wav|14
+./Data/p240/143.wav|8
+./Data/p259/122.wav|17
+./Data/p270/205.wav|18
+./Data/p254/100.wav|14
+./Data/p270/149.wav|18
+./Data/p259/9.wav|17
+./Data/p226/96.wav|10
+./Data/p230/23.wav|3
+./Data/p244/72.wav|9
+./Data/p259/73.wav|17
+./Data/p227/68.wav|11
+./Data/p226/75.wav|10
+./Data/p236/109.wav|6
+./Data/p258/102.wav|16
+./Data/p232/44.wav|12
+./Data/p243/27.wav|13
+./Data/p232/126.wav|12
+./Data/p240/14.wav|8
+./Data/p226/71.wav|10
+./Data/p230/88.wav|3
+./Data/p233/45.wav|5
+./Data/p244/103.wav|9
+./Data/p232/26.wav|12
+./Data/p229/101.wav|2
+./Data/p229/44.wav|2
+./Data/p232/123.wav|12
+./Data/p228/129.wav|1
+./Data/p273/32.wav|19
+./Data/p232/125.wav|12
+./Data/p240/103.wav|8
+./Data/p254/128.wav|14
+./Data/p254/34.wav|14
+./Data/p240/19.wav|8
+./Data/p232/89.wav|12
+./Data/p273/73.wav|19
+./Data/p231/109.wav|4
+./Data/p270/124.wav|18
+./Data/p244/112.wav|9
+./Data/p256/117.wav|15
+./Data/p244/88.wav|9
+./Data/p228/17.wav|1
+./Data/p233/86.wav|5
+./Data/p254/23.wav|14
+./Data/p233/59.wav|5
+./Data/p232/25.wav|12
+./Data/p231/108.wav|4
+./Data/p258/103.wav|16
+./Data/p232/69.wav|12
+./Data/p230/65.wav|3
+./Data/p240/73.wav|8
+./Data/p243/125.wav|13
+./Data/p256/92.wav|15
+./Data/p270/31.wav|18
+./Data/p256/44.wav|15
+./Data/p236/98.wav|6
+./Data/p228/90.wav|1
+./Data/p231/125.wav|4
+./Data/p232/64.wav|12
+./Data/p273/80.wav|19
+./Data/p227/32.wav|11
+./Data/p226/17.wav|10
+./Data/p226/69.wav|10
+./Data/p231/142.wav|4
+./Data/p225/65.wav|0
+./Data/p229/64.wav|2
+./Data/p240/70.wav|8
+./Data/p225/85.wav|0
+./Data/p259/166.wav|17
+./Data/p230/119.wav|3
+./Data/p258/135.wav|16
+./Data/p225/60.wav|0
+./Data/p239/74.wav|7
+./Data/p233/117.wav|5
+./Data/p226/44.wav|10
+./Data/p227/103.wav|11
+./Data/p228/45.wav|1
+./Data/p244/52.wav|9
+./Data/p230/168.wav|3
+./Data/p259/71.wav|17
+./Data/p270/109.wav|18
+./Data/p243/164.wav|13
+./Data/p243/36.wav|13
+./Data/p270/12.wav|18
+./Data/p229/125.wav|2
+./Data/p259/51.wav|17
+./Data/p225/81.wav|0
+./Data/p240/133.wav|8
+./Data/p270/130.wav|18
+./Data/p228/37.wav|1
+./Data/p228/39.wav|1
+./Data/p240/35.wav|8
+./Data/p231/124.wav|4
+./Data/p244/121.wav|9
+./Data/p270/133.wav|18
+./Data/p227/110.wav|11
+./Data/p244/134.wav|9
+./Data/p254/59.wav|14
+./Data/p239/35.wav|7
+./Data/p236/150.wav|6
+./Data/p227/40.wav|11
+./Data/p258/13.wav|16
+./Data/p240/123.wav|8
+./Data/p231/141.wav|4
+./Data/p228/151.wav|1
+./Data/p236/45.wav|6
+./Data/p273/5.wav|19
+./Data/p231/113.wav|4
+./Data/p256/103.wav|15
+./Data/p227/87.wav|11
+./Data/p270/173.wav|18
+./Data/p243/104.wav|13
+./Data/p240/141.wav|8
+./Data/p240/128.wav|8
+./Data/p259/50.wav|17
+./Data/p231/8.wav|4
+./Data/p226/82.wav|10
+./Data/p243/110.wav|13
+./Data/p243/101.wav|13
+./Data/p259/132.wav|17
+./Data/p227/99.wav|11
+./Data/p259/42.wav|17
+./Data/p229/29.wav|2
+./Data/p236/104.wav|6
+./Data/p259/34.wav|17
+./Data/p254/117.wav|14
+./Data/p227/29.wav|11
+./Data/p258/111.wav|16
+./Data/p229/9.wav|2
+./Data/p240/26.wav|8
+./Data/p259/89.wav|17
+./Data/p270/21.wav|18
+./Data/p254/101.wav|14
+./Data/p259/40.wav|17
+./Data/p240/7.wav|8
+./Data/p240/114.wav|8
+./Data/p230/176.wav|3
+./Data/p231/47.wav|4
+./Data/p239/37.wav|7
+./Data/p232/51.wav|12
+./Data/p270/142.wav|18
+./Data/p254/6.wav|14
+./Data/p225/50.wav|0
+./Data/p227/91.wav|11
+./Data/p259/149.wav|17
+./Data/p259/125.wav|17
+./Data/p229/107.wav|2
+./Data/p228/10.wav|1
+./Data/p231/107.wav|4

Data/val_list.txt ADDED Viewed

	@@ -0,0 +1,303 @@

+./Data/p270/13.wav|18
+./Data/p273/94.wav|19
+./Data/p229/97.wav|2
+./Data/p232/117.wav|12
+./Data/p226/55.wav|10
+./Data/p259/102.wav|17
+./Data/p226/7.wav|10
+./Data/p254/26.wav|14
+./Data/p239/115.wav|7
+./Data/p239/86.wav|7
+./Data/p229/106.wav|2
+./Data/p244/43.wav|9
+./Data/p270/179.wav|18
+./Data/p273/6.wav|19
+./Data/p258/101.wav|16
+./Data/p273/62.wav|19
+./Data/p228/11.wav|1
+./Data/p273/103.wav|19
+./Data/p230/49.wav|3
+./Data/p233/23.wav|5
+./Data/p230/122.wav|3
+./Data/p239/80.wav|7
+./Data/p226/4.wav|10
+./Data/p240/50.wav|8
+./Data/p243/28.wav|13
+./Data/p236/95.wav|6
+./Data/p244/126.wav|9
+./Data/p244/40.wav|9
+./Data/p239/108.wav|7
+./Data/p273/72.wav|19
+./Data/p254/92.wav|14
+./Data/p231/116.wav|4
+./Data/p231/32.wav|4
+./Data/p243/117.wav|13
+./Data/p256/121.wav|15
+./Data/p243/3.wav|13
+./Data/p226/91.wav|10
+./Data/p256/53.wav|15
+./Data/p254/75.wav|14
+./Data/p243/150.wav|13
+./Data/p231/95.wav|4
+./Data/p228/81.wav|1
+./Data/p226/33.wav|10
+./Data/p232/71.wav|12
+./Data/p236/4.wav|6
+./Data/p236/132.wav|6
+./Data/p254/119.wav|14
+./Data/p236/7.wav|6
+./Data/p227/104.wav|11
+./Data/p226/59.wav|10
+./Data/p233/35.wav|5
+./Data/p231/23.wav|4
+./Data/p273/71.wav|19
+./Data/p240/74.wav|8
+./Data/p259/33.wav|17
+./Data/p259/118.wav|17
+./Data/p273/15.wav|19
+./Data/p226/115.wav|10
+./Data/p236/19.wav|6
+./Data/p226/57.wav|10
+./Data/p229/14.wav|2
+./Data/p243/98.wav|13
+./Data/p243/79.wav|13
+./Data/p231/12.wav|4
+./Data/p230/170.wav|3
+./Data/p228/114.wav|1
+./Data/p254/103.wav|14
+./Data/p256/108.wav|15
+./Data/p256/58.wav|15
+./Data/p229/23.wav|2
+./Data/p270/151.wav|18
+./Data/p259/36.wav|17
+./Data/p230/64.wav|3
+./Data/p226/134.wav|10
+./Data/p230/84.wav|3
+./Data/p270/91.wav|18
+./Data/p230/160.wav|3
+./Data/p236/15.wav|6
+./Data/p225/45.wav|0
+./Data/p239/62.wav|7
+./Data/p256/107.wav|15
+./Data/p258/144.wav|16
+./Data/p229/37.wav|2
+./Data/p226/108.wav|10
+./Data/p225/92.wav|0
+./Data/p227/138.wav|11
+./Data/p230/151.wav|3
+./Data/p229/90.wav|2
+./Data/p244/131.wav|9
+./Data/p231/1.wav|4
+./Data/p243/40.wav|13
+./Data/p226/131.wav|10
+./Data/p226/121.wav|10
+./Data/p270/119.wav|18
+./Data/p225/4.wav|0
+./Data/p243/39.wav|13
+./Data/p233/1.wav|5
+./Data/p239/117.wav|7
+./Data/p259/101.wav|17
+./Data/p228/73.wav|1
+./Data/p273/78.wav|19
+./Data/p256/22.wav|15
+./Data/p244/65.wav|9
+./Data/p240/17.wav|8
+./Data/p258/47.wav|16
+./Data/p239/95.wav|7
+./Data/p243/119.wav|13
+./Data/p259/106.wav|17
+./Data/p233/22.wav|5
+./Data/p232/60.wav|12
+./Data/p270/55.wav|18
+./Data/p230/87.wav|3
+./Data/p270/139.wav|18
+./Data/p225/5.wav|0
+./Data/p243/128.wav|13
+./Data/p258/10.wav|16
+./Data/p230/100.wav|3
+./Data/p239/43.wav|7
+./Data/p232/57.wav|12
+./Data/p256/27.wav|15
+./Data/p232/130.wav|12
+./Data/p243/153.wav|13
+./Data/p258/92.wav|16
+./Data/p232/81.wav|12
+./Data/p256/65.wav|15
+./Data/p259/107.wav|17
+./Data/p239/10.wav|7
+./Data/p233/4.wav|5
+./Data/p259/165.wav|17
+./Data/p225/41.wav|0
+./Data/p229/61.wav|2
+./Data/p227/36.wav|11
+./Data/p243/62.wav|13
+./Data/p259/31.wav|17
+./Data/p231/75.wav|4
+./Data/p233/31.wav|5
+./Data/p273/66.wav|19
+./Data/p226/6.wav|10
+./Data/p243/162.wav|13
+./Data/p229/21.wav|2
+./Data/p230/11.wav|3
+./Data/p231/84.wav|4
+./Data/p273/118.wav|19
+./Data/p227/92.wav|11
+./Data/p256/110.wav|15
+./Data/p230/105.wav|3
+./Data/p239/75.wav|7
+./Data/p229/78.wav|2
+./Data/p254/111.wav|14
+./Data/p232/24.wav|12
+./Data/p233/19.wav|5
+./Data/p233/52.wav|5
+./Data/p258/143.wav|16
+./Data/p254/135.wav|14
+./Data/p232/37.wav|12
+./Data/p244/81.wav|9
+./Data/p270/161.wav|18
+./Data/p233/43.wav|5
+./Data/p240/40.wav|8
+./Data/p244/70.wav|9
+./Data/p254/1.wav|14
+./Data/p229/96.wav|2
+./Data/p243/99.wav|13
+./Data/p259/20.wav|17
+./Data/p233/66.wav|5
+./Data/p239/88.wav|7
+./Data/p225/71.wav|0
+./Data/p227/143.wav|11
+./Data/p228/142.wav|1
+./Data/p231/135.wav|4
+./Data/p254/107.wav|14
+./Data/p233/36.wav|5
+./Data/p232/19.wav|12
+./Data/p258/113.wav|16
+./Data/p243/96.wav|13
+./Data/p273/90.wav|19
+./Data/p225/13.wav|0
+./Data/p228/32.wav|1
+./Data/p229/60.wav|2
+./Data/p273/14.wav|19
+./Data/p239/25.wav|7
+./Data/p256/31.wav|15
+./Data/p225/40.wav|0
+./Data/p273/43.wav|19
+./Data/p270/206.wav|18
+./Data/p244/19.wav|9
+./Data/p244/83.wav|9
+./Data/p259/134.wav|17
+./Data/p244/91.wav|9
+./Data/p225/80.wav|0
+./Data/p227/60.wav|11
+./Data/p244/128.wav|9
+./Data/p256/80.wav|15
+./Data/p256/15.wav|15
+./Data/p244/34.wav|9
+./Data/p256/69.wav|15
+./Data/p228/15.wav|1
+./Data/p232/65.wav|12
+./Data/p273/65.wav|19
+./Data/p239/124.wav|7
+./Data/p259/15.wav|17
+./Data/p226/137.wav|10
+./Data/p243/75.wav|13
+./Data/p258/16.wav|16
+./Data/p232/6.wav|12
+./Data/p231/106.wav|4
+./Data/p228/6.wav|1
+./Data/p243/172.wav|13
+./Data/p236/77.wav|6
+./Data/p256/95.wav|15
+./Data/p256/76.wav|15
+./Data/p239/119.wav|7
+./Data/p236/108.wav|6
+./Data/p243/92.wav|13
+./Data/p232/129.wav|12
+./Data/p230/124.wav|3
+./Data/p228/9.wav|1
+./Data/p232/100.wav|12
+./Data/p254/5.wav|14
+./Data/p273/1.wav|19
+./Data/p236/47.wav|6
+./Data/p240/87.wav|8
+./Data/p229/127.wav|2
+./Data/p228/152.wav|1
+./Data/p225/24.wav|0
+./Data/p229/20.wav|2
+./Data/p233/12.wav|5
+./Data/p259/46.wav|17
+./Data/p231/72.wav|4
+./Data/p254/65.wav|14
+./Data/p231/18.wav|4
+./Data/p270/66.wav|18
+./Data/p233/44.wav|5
+./Data/p233/126.wav|5
+./Data/p233/58.wav|5
+./Data/p273/142.wav|19
+./Data/p228/26.wav|1
+./Data/p230/106.wav|3
+./Data/p228/109.wav|1
+./Data/p232/76.wav|12
+./Data/p226/37.wav|10
+./Data/p226/66.wav|10
+./Data/p270/75.wav|18
+./Data/p229/4.wav|2
+./Data/p239/166.wav|7
+./Data/p228/79.wav|1
+./Data/p230/43.wav|3
+./Data/p258/100.wav|16
+./Data/p244/93.wav|9
+./Data/p256/105.wav|15
+./Data/p236/12.wav|6
+./Data/p270/154.wav|18
+./Data/p244/75.wav|9
+./Data/p239/160.wav|7
+./Data/p239/174.wav|7
+./Data/p225/26.wav|0
+./Data/p232/49.wav|12
+./Data/p258/19.wav|16
+./Data/p273/13.wav|19
+./Data/p232/32.wav|12
+./Data/p270/42.wav|18
+./Data/p270/194.wav|18
+./Data/p259/174.wav|17
+./Data/p236/53.wav|6
+./Data/p232/77.wav|12
+./Data/p240/118.wav|8
+./Data/p239/175.wav|7
+./Data/p225/58.wav|0
+./Data/p232/1.wav|12
+./Data/p243/5.wav|13
+./Data/p229/41.wav|2
+./Data/p233/60.wav|5
+./Data/p236/138.wav|6
+./Data/p258/54.wav|16
+./Data/p254/22.wav|14
+./Data/p254/76.wav|14
+./Data/p228/25.wav|1
+./Data/p259/61.wav|17
+./Data/p270/135.wav|18
+./Data/p231/136.wav|4
+./Data/p232/105.wav|12
+./Data/p259/35.wav|17
+./Data/p244/57.wav|9
+./Data/p226/104.wav|10
+./Data/p258/48.wav|16
+./Data/p229/139.wav|2
+./Data/p239/65.wav|7
+./Data/p228/74.wav|1
+./Data/p233/25.wav|5
+./Data/p243/16.wav|13
+./Data/p243/165.wav|13
+./Data/p229/46.wav|2
+./Data/p226/41.wav|10
+./Data/p228/160.wav|1
+./Data/p230/90.wav|3
+./Data/p270/184.wav|18
+./Data/p259/55.wav|17
+./Data/p232/31.wav|12
+./Data/p231/78.wav|4
+./Data/p259/78.wav|17
+./Data/p273/33.wav|19
+./Data/p256/40.wav|15
+./Data/p258/116.wav|16

Demo/inference.ipynb ADDED Viewed

	@@ -0,0 +1,471 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "HwaQq4GRU_Nw"
+   },
+   "source": [
+    "# StarGANv2-VC Demo (VCTK 20 Speakers)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "hCpoXuZeGKAn"
+   },
+   "source": [
+    "### Utils"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%cd .."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "executionInfo": {
+     "elapsed": 24923,
+     "status": "ok",
+     "timestamp": 1613984920200,
+     "user": {
+      "displayName": "Yinghao Li",
+      "photoUrl": "",
+      "userId": "12798981472803960591"
+     },
+     "user_tz": 300
+    },
+    "id": "3on9IjGhVGTP",
+    "outputId": "63a799f8-564d-48c2-fb0f-e66c0cd9fdb8"
+   },
+   "outputs": [],
+   "source": [
+    "# load packages\n",
+    "import random\n",
+    "import yaml\n",
+    "from munch import Munch\n",
+    "import numpy as np\n",
+    "import paddle\n",
+    "from paddle import nn\n",
+    "import paddle.nn.functional as F\n",
+    "import paddleaudio\n",
+    "import librosa\n",
+    "\n",
+    "from starganv2vc_paddle.Utils.ASR.models import ASRCNN\n",
+    "from starganv2vc_paddle.Utils.JDC.model import JDCNet\n",
+    "from starganv2vc_paddle.models import Generator, MappingNetwork, StyleEncoder\n",
+    "\n",
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Source: http://speech.ee.ntu.edu.tw/~jjery2243542/resource/model/is18/en_speaker_used.txt\n",
+    "# Source: https://github.com/jjery2243542/voice_conversion\n",
+    "\n",
+    "speakers = [225,228,229,230,231,233,236,239,240,244,226,227,232,243,254,256,258,259,270,273]\n",
+    "\n",
+    "to_mel = paddleaudio.features.MelSpectrogram(\n",
+    "    n_mels=80, n_fft=2048, win_length=1200, hop_length=300)\n",
+    "to_mel.fbank_matrix[:] = paddle.load('starganv2vc_paddle/fbank_matrix.pd')['fbank_matrix']\n",
+    "mean, std = -4, 4\n",
+    "\n",
+    "def preprocess(wave):\n",
+    "    wave_tensor = paddle.to_tensor(wave).astype(paddle.float32)\n",
+    "    mel_tensor = to_mel(wave_tensor)\n",
+    "    mel_tensor = (paddle.log(1e-5 + mel_tensor.unsqueeze(0)) - mean) / std\n",
+    "    return mel_tensor\n",
+    "\n",
+    "def build_model(model_params={}):\n",
+    "    args = Munch(model_params)\n",
+    "    generator = Generator(args.dim_in, args.style_dim, args.max_conv_dim, w_hpf=args.w_hpf, F0_channel=args.F0_channel)\n",
+    "    mapping_network = MappingNetwork(args.latent_dim, args.style_dim, args.num_domains, hidden_dim=args.max_conv_dim)\n",
+    "    style_encoder = StyleEncoder(args.dim_in, args.style_dim, args.num_domains, args.max_conv_dim)\n",
+    "    \n",
+    "    nets_ema = Munch(generator=generator,\n",
+    "                     mapping_network=mapping_network,\n",
+    "                     style_encoder=style_encoder)\n",
+    "\n",
+    "    return nets_ema\n",
+    "\n",
+    "def compute_style(speaker_dicts):\n",
+    "    reference_embeddings = {}\n",
+    "    for key, (path, speaker) in speaker_dicts.items():\n",
+    "        if path == \"\":\n",
+    "            label = paddle.to_tensor([speaker], dtype=paddle.int64)\n",
+    "            latent_dim = starganv2.mapping_network.shared[0].weight.shape[0]\n",
+    "            ref = starganv2.mapping_network(paddle.randn([1, latent_dim]), label)\n",
+    "        else:\n",
+    "            wave, sr = librosa.load(path, sr=24000)\n",
+    "            audio, index = librosa.effects.trim(wave, top_db=30)\n",
+    "            if sr != 24000:\n",
+    "                wave = librosa.resample(wave, sr, 24000)\n",
+    "            mel_tensor = preprocess(wave)\n",
+    "\n",
+    "            with paddle.no_grad():\n",
+    "                label = paddle.to_tensor([speaker], dtype=paddle.int64)\n",
+    "                ref = starganv2.style_encoder(mel_tensor.unsqueeze(1), label)\n",
+    "        reference_embeddings[key] = (ref, label)\n",
+    "    \n",
+    "    return reference_embeddings"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Load models"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load F0 model\n",
+    "\n",
+    "F0_model = JDCNet(num_class=1, seq_len=192)\n",
+    "params = paddle.load(\"Models/bst.pd\")['net']\n",
+    "F0_model.set_state_dict(params)\n",
+    "_ = F0_model.eval()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "executionInfo": {
+     "elapsed": 43003,
+     "status": "ok",
+     "timestamp": 1613984938321,
+     "user": {
+      "displayName": "Yinghao Li",
+      "photoUrl": "",
+      "userId": "12798981472803960591"
+     },
+     "user_tz": 300
+    },
+    "id": "NZA3ot-oF5t-"
+   },
+   "outputs": [],
+   "source": [
+    "# load vocoder\n",
+    "\n",
+    "import yaml\n",
+    "import paddle\n",
+    "\n",
+    "from yacs.config import CfgNode\n",
+    "from paddlespeech.t2s.models.parallel_wavegan import PWGGenerator\n",
+    "\n",
+    "with open('Vocoder/config.yml') as f:\n",
+    "    voc_config = CfgNode(yaml.safe_load(f))\n",
+    "voc_config[\"generator_params\"].pop(\"upsample_net\")\n",
+    "voc_config[\"generator_params\"][\"upsample_scales\"] = voc_config[\"generator_params\"].pop(\"upsample_params\")[\"upsample_scales\"]\n",
+    "vocoder = PWGGenerator(**voc_config[\"generator_params\"])\n",
+    "vocoder.remove_weight_norm()\n",
+    "vocoder.eval()\n",
+    "vocoder.set_state_dict(paddle.load('Vocoder/checkpoint-400000steps.pd'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "executionInfo": {
+     "elapsed": 24462,
+     "status": "ok",
+     "timestamp": 1613985522414,
+     "user": {
+      "displayName": "Yinghao Li",
+      "photoUrl": "",
+      "userId": "12798981472803960591"
+     },
+     "user_tz": 300
+    },
+    "id": "Ou4367LCyefA",
+    "outputId": "19c61f6f-f39a-43b9-9275-09418c2aebb4"
+   },
+   "outputs": [],
+   "source": [
+    "# load starganv2\n",
+    "\n",
+    "model_path = 'Models/vc_ema.pd'\n",
+    "\n",
+    "with open('Models/config.yml') as f:\n",
+    "    starganv2_config = yaml.safe_load(f)\n",
+    "starganv2 = build_model(model_params=starganv2_config[\"model_params\"])\n",
+    "params = paddle.load(model_path)\n",
+    "params = params['model_ema']\n",
+    "_ = [starganv2[key].set_state_dict(params[key]) for key in starganv2]\n",
+    "_ = [starganv2[key].eval() for key in starganv2]\n",
+    "starganv2.style_encoder = starganv2.style_encoder\n",
+    "starganv2.mapping_network = starganv2.mapping_network\n",
+    "starganv2.generator = starganv2.generator"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Conversion"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load input wave\n",
+    "selected_speakers = [273, 259, 258, 243, 254, 244, 236, 233, 230, 228]\n",
+    "k = random.choice(selected_speakers)\n",
+    "wav_path = 'Demo/VCTK-corpus/p' + str(k) + '/p' + str(k) + '_023.wav'\n",
+    "audio, source_sr = librosa.load(wav_path, sr=24000)\n",
+    "audio = audio / np.max(np.abs(audio))\n",
+    "audio.dtype = np.float32"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Convert by style encoder"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# with reference, using style encoder\n",
+    "speaker_dicts = {}\n",
+    "for s in selected_speakers:\n",
+    "    k = s\n",
+    "    speaker_dicts['p' + str(s)] = ('Demo/VCTK-corpus/p' + str(k) + '/p' + str(k) + '_023.wav', speakers.index(s))\n",
+    "\n",
+    "reference_embeddings = compute_style(speaker_dicts)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 333
+    },
+    "executionInfo": {
+     "elapsed": 1424,
+     "status": "ok",
+     "timestamp": 1613986299525,
+     "user": {
+      "displayName": "Yinghao Li",
+      "photoUrl": "",
+      "userId": "12798981472803960591"
+     },
+     "user_tz": 300
+    },
+    "id": "T5tahObUyN-d",
+    "outputId": "f4f38742-2235-4f59-cb2a-5008912cd870",
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# conversion \n",
+    "import time\n",
+    "start = time.time()\n",
+    "    \n",
+    "source = preprocess(audio)\n",
+    "keys = []\n",
+    "converted_samples = {}\n",
+    "reconstructed_samples = {}\n",
+    "converted_mels = {}\n",
+    "\n",
+    "for key, (ref, _) in reference_embeddings.items():\n",
+    "    with paddle.no_grad():\n",
+    "        f0_feat = F0_model.get_feature_GAN(source.unsqueeze(1))\n",
+    "        out = starganv2.generator(source.unsqueeze(1), ref, F0=f0_feat)\n",
+    "        \n",
+    "        c = out.transpose([0,1,3,2]).squeeze()\n",
+    "        y_out = vocoder.inference(c)\n",
+    "        y_out = y_out.reshape([-1])\n",
+    "\n",
+    "        if key not in speaker_dicts or speaker_dicts[key][0] == \"\":\n",
+    "            recon = None\n",
+    "        else:\n",
+    "            wave, sr = librosa.load(speaker_dicts[key][0], sr=24000)\n",
+    "            mel = preprocess(wave)\n",
+    "            c = mel.transpose([0,2,1]).squeeze()\n",
+    "            recon = vocoder.inference(c)\n",
+    "            recon = recon.reshape([-1]).numpy()\n",
+    "\n",
+    "    converted_samples[key] = y_out.numpy()\n",
+    "    reconstructed_samples[key] = recon\n",
+    "\n",
+    "    converted_mels[key] = out\n",
+    "    \n",
+    "    keys.append(key)\n",
+    "end = time.time()\n",
+    "print('total processing time: %.3f sec' % (end - start) )\n",
+    "\n",
+    "import IPython.display as ipd\n",
+    "for key, wave in converted_samples.items():\n",
+    "    print('Converted: %s' % key)\n",
+    "    display(ipd.Audio(wave, rate=24000))\n",
+    "    print('Reference (vocoder): %s' % key)\n",
+    "    if reconstructed_samples[key] is not None:\n",
+    "        display(ipd.Audio(reconstructed_samples[key], rate=24000))\n",
+    "\n",
+    "print('Original (vocoder):')\n",
+    "wave, sr = librosa.load(wav_path, sr=24000)\n",
+    "mel = preprocess(wave)\n",
+    "c = mel.transpose([0,2,1]).squeeze()\n",
+    "with paddle.no_grad():\n",
+    "    recon = vocoder.inference(c)\n",
+    "    recon = recon.reshape([-1]).numpy()\n",
+    "display(ipd.Audio(recon, rate=24000))\n",
+    "print('Original:')\n",
+    "display(ipd.Audio(wav_path, rate=24000))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "SWh3o9hvGvJt"
+   },
+   "source": [
+    "#### Convert by mapping network"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# no reference, using mapping network\n",
+    "speaker_dicts = {}\n",
+    "selected_speakers = [273, 259, 258, 243, 254, 244, 236, 233, 230, 228]\n",
+    "for s in selected_speakers:\n",
+    "    k = s\n",
+    "    speaker_dicts['p' + str(s)] = ('', speakers.index(s))\n",
+    "\n",
+    "reference_embeddings = compute_style(speaker_dicts)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# conversion \n",
+    "import time\n",
+    "start = time.time()\n",
+    "    \n",
+    "source = preprocess(audio)\n",
+    "keys = []\n",
+    "converted_samples = {}\n",
+    "reconstructed_samples = {}\n",
+    "converted_mels = {}\n",
+    "\n",
+    "for key, (ref, _) in reference_embeddings.items():\n",
+    "    with paddle.no_grad():\n",
+    "        f0_feat = F0_model.get_feature_GAN(source.unsqueeze(1))\n",
+    "        out = starganv2.generator(source.unsqueeze(1), ref, F0=f0_feat)\n",
+    "        \n",
+    "        c = out.transpose([0,1,3,2]).squeeze()\n",
+    "        y_out = vocoder.inference(c)\n",
+    "        y_out = y_out.reshape([-1])\n",
+    "\n",
+    "        if key not in speaker_dicts or speaker_dicts[key][0] == \"\":\n",
+    "            recon = None\n",
+    "        else:\n",
+    "            wave, sr = librosa.load(speaker_dicts[key][0], sr=24000)\n",
+    "            mel = preprocess(wave)\n",
+    "            c = mel.transpose([0,2,1]).squeeze()\n",
+    "            recon = vocoder.inference(c)\n",
+    "            recon = recon.reshape([-1]).numpy()\n",
+    "\n",
+    "    converted_samples[key] = y_out.numpy()\n",
+    "    reconstructed_samples[key] = recon\n",
+    "\n",
+    "    converted_mels[key] = out\n",
+    "    \n",
+    "    keys.append(key)\n",
+    "end = time.time()\n",
+    "print('total processing time: %.3f sec' % (end - start) )\n",
+    "\n",
+    "import IPython.display as ipd\n",
+    "for key, wave in converted_samples.items():\n",
+    "    print('Converted: %s' % key)\n",
+    "    display(ipd.Audio(wave, rate=24000))\n",
+    "    print('Reference (vocoder): %s' % key)\n",
+    "    if reconstructed_samples[key] is not None:\n",
+    "        display(ipd.Audio(reconstructed_samples[key], rate=24000))\n",
+    "\n",
+    "print('Original (vocoder):')\n",
+    "wave, sr = librosa.load(wav_path, sr=24000)\n",
+    "mel = preprocess(wave)\n",
+    "c = mel.transpose([0,2,1]).squeeze().to('cuda')\n",
+    "with paddle.no_grad():\n",
+    "    recon = vocoder.inference(c)\n",
+    "    recon = recon.reshape([-1]).numpy()\n",
+    "display(ipd.Audio(recon, rate=24000))\n",
+    "print('Original:')\n",
+    "display(ipd.Audio(wav_path, rate=24000))"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "collapsed_sections": [
+    "hCpoXuZeGKAn"
+   ],
+   "name": "Starganv2_vc.ipynb",
+   "provenance": [],
+   "toc_visible": true
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2022 Wu Hecong
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
-title: Starganv2vc Paddle
-emoji: 🐨
-colorFrom: blue
-colorTo: indigo
 sdk: gradio
 sdk_version: 2.9.4
 app_file: app.py
@@ -10,4 +10,75 @@ pinned: false
 license: mit
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference

 ---
+title: StarGANv2 Voice Conversion on PaddlePaddle
+emoji: 🗣️
+colorFrom: green
+colorTo: blue
 sdk: gradio
 sdk_version: 2.9.4
 app_file: app.py
 license: mit
 ---
+# StarGANv2-VC-Paddle
+[![Baidu AI Studio](https://img.shields.io/static/v1?label=Baidu&message=AI%20Studio%20Free%20A100&color=blue)](https://aistudio.baidu.com/aistudio/projectdetail/3955253)
+[![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/HighCWu/starganv2vc-paddle)
+A paddlepaddle version of [StarGANv2-VC](https://github.com/yl4579/StarGANv2-VC).
+Download pretrained models [here](https://aistudio.baidu.com/aistudio/datasetdetail/145012).
+Getting started with free v100/a100 in [AI Studio](https://aistudio.baidu.com/aistudio/projectdetail/3955253) or fast try with [HugginFace Spaces](https://huggingface.co/spaces/HighCWu/starganv2vc-paddle).
+---
+Original PyTorch Repo [README](https://github.com/yl4579/StarGANv2-VC) 👇
+---
+# StarGANv2-VC: A Diverse, Unsupervised, Non-parallel Framework for Natural-Sounding Voice Conversion
+### Yinghao Aaron Li, Ali Zare, Nima Mesgarani
+> We present an unsupervised non-parallel many-to-many voice conversion (VC) method using a generative adversarial network (GAN) called StarGAN v2. Using a combination of adversarial source classifier loss and perceptual loss, our model significantly outperforms previous VC models. Although our model is trained only with 20 English speakers, it generalizes to a variety of voice conversion tasks, such as any-to-many, cross-lingual, and singing conversion. Using a style encoder, our framework can also convert plain reading speech into stylistic speech, such as emotional and falsetto speech. Subjective and objective evaluation experiments on a non-parallel many-to-many voice conversion task revealed that our model produces natural sounding voices, close to the sound quality of state-of-the-art text-tospeech (TTS) based voice conversion methods without the need for text labels. Moreover, our model is completely convolutional and with a faster-than-real-time vocoder such as Parallel WaveGAN can perform real-time voice conversion.
+Paper: https://arxiv.org/abs/2107.10394
+Audio samples: https://starganv2-vc.github.io/
+## Pre-requisites
+1. Python >= 3.7
+2. Clone this repository:
+```bash
+git https://github.com/yl4579/StarGANv2-VC.git
+cd StarGANv2-VC
+```
+3. Install python requirements:
+```bash
+pip install SoundFile torchaudio munch parallel_wavegan torch pydub
+```
+4. Download and extract the [VCTK dataset](https://datashare.ed.ac.uk/handle/10283/3443)
+and use [VCTK.ipynb](https://github.com/yl4579/StarGANv2-VC/blob/main/Data/VCTK.ipynb) to prepare the data (downsample to 24 kHz etc.). You can also [download the dataset](https://drive.google.com/file/d/1t7QQbu4YC_P1mv9puA_KgSomSFDsSzD6/view?usp=sharing) we have prepared and unzip it to the `Data` folder, use the provided `config.yml` to reproduce our models.
+## Training
+```bash
+python train.py --config_path ./Configs/config.yml
+```
+Please specify the training and validation data in `config.yml` file. Change `num_domains` to the number of speakers in the dataset. The data list format needs to be `filename.wav|speaker_number`, see [train_list.txt](https://github.com/yl4579/StarGANv2-VC/blob/main/Data/train_list.txt) as an example.
+Checkpoints and Tensorboard logs will be saved at `log_dir`. To speed up training, you may want to make `batch_size` as large as your GPU RAM can take. However, please note that `batch_size = 5` will take around 10G GPU RAM.
+## Inference
+Please refer to [inference.ipynb](https://github.com/yl4579/StarGANv2-VC/blob/main/Demo/inference.ipynb) for details.
+The pretrained StarGANv2 and ParallelWaveGAN on VCTK corpus can be downloaded at [StarGANv2 Link](https://drive.google.com/file/d/1nzTyyl-9A1Hmqya2Q_f2bpZkUoRjbZsY/view?usp=sharing) and [ParallelWaveGAN Link](https://drive.google.com/file/d/1q8oSAzwkqi99oOGXDZyLypCiz0Qzn3Ab/view?usp=sharing). Please unzip to `Models` and `Vocoder` respectivey and run each cell in the notebook.
+## ASR & F0 Models
+The pretrained F0 and ASR models are provided under the `Utils` folder. Both the F0 and ASR models are trained with melspectrograms preprocessed using [meldataset.py](https://github.com/yl4579/StarGANv2-VC/blob/main/meldataset.py), and both models are trained on speech data only.
+The ASR model is trained on English corpus, but it appears to work when training StarGANv2 models in other languages such as Japanese. The F0 model also appears to work with singing data. For the best performance, however, training your own ASR and F0 models is encouraged for non-English and non-speech data.
+You can edit the [meldataset.py](https://github.com/yl4579/StarGANv2-VC/blob/main/meldataset.py) with your own melspectrogram preprocessing, but the provided pretrained models will no longer work. You will need to train your own ASR and F0 models with the new preprocessing. You may refer to repo [Diamondfan/CTC_pytorch](https://github.com/Diamondfan/CTC_pytorch) and [keums/melodyExtraction_JDC](https://github.com/keums/melodyExtraction_JDC) to train your own the ASR and F0 models, for example.
+## References
+- [clovaai/stargan-v2](https://github.com/clovaai/stargan-v2)
+- [kan-bayashi/ParallelWaveGAN](https://github.com/kan-bayashi/ParallelWaveGAN)
+- [tosaka-m/japanese_realtime_tts](https://github.com/tosaka-m/japanese_realtime_tts)
+- [keums/melodyExtraction_JDC](https://github.com/keums/melodyExtraction_JDC)
+- [Diamondfan/CTC_pytorch](https://github.com/Diamondfan/CTC_pytorch)
+## Acknowledgement
+The author would like to thank [@tosaka-m](https://github.com/tosaka-m) for his great repository and valuable discussions.

Utils/ASR/config.yml ADDED Viewed

	@@ -0,0 +1,28 @@

+log_dir: "logs"
+save_freq: 20
+epochs: 180
+batch_size: 48
+pretrained_model: ""
+train_data: "asr_train_list.txt"
+val_data: "asr_val_list.txt"
+dataset_params:
+  data_augmentation: true
+preprocess_parasm:
+  sr: 24000
+  spect_params:
+    n_fft: 2048
+    win_length: 1200
+    hop_length: 300
+  mel_params:
+    n_mels: 80
+model_params:
+   input_dim: 80
+   hidden_dim: 256
+   n_token: 80
+   token_embedding_dim: 256
+optimizer_params:
+  lr: 0.0005

app.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import os
+os.system("pip install gradio==2.9b24")
+import gradio as gr
+vocoder_url = 'https://bj.bcebos.com/v1/ai-studio-online/e46d52315a504f1fa520528582a8422b6fa7006463844b84b8a2c3d21cc314db?/Vocoder.zip'
+models_url = 'https://bj.bcebos.com/v1/ai-studio-online/6c081f29caad483ebd4cded087ee6ddbfc8dca8fb89d4ab69d44253ce5525e32?/Models.zip'
+from io import BytesIO
+from zipfile import ZipFile
+from urllib.request import urlopen
+if not (os.path.isdir('Vocoder') and os.path.isdir('Models')):
+    for url in [vocoder_url, models_url]:
+        resp = urlopen(url)
+        zipfile = ZipFile(BytesIO(resp.read()))
+        zipfile.extractall()
+import random
+import yaml
+from munch import Munch
+import numpy as np
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+import paddleaudio
+import librosa
+from starganv2vc_paddle.Utils.JDC.model import JDCNet
+from starganv2vc_paddle.models import Generator, MappingNetwork, StyleEncoder
+speakers = [225,228,229,230,231,233,236,239,240,244,226,227,232,243,254,256,258,259,270,273]
+to_mel = paddleaudio.features.MelSpectrogram(
+    n_mels=80, n_fft=2048, win_length=1200, hop_length=300)
+to_mel.fbank_matrix[:] = paddle.load('starganv2vc_paddle/fbank_matrix.pd')['fbank_matrix']
+mean, std = -4, 4
+def preprocess(wave):
+    wave_tensor = paddle.to_tensor(wave).astype(paddle.float32)
+    mel_tensor = to_mel(wave_tensor)
+    mel_tensor = (paddle.log(1e-5 + mel_tensor.unsqueeze(0)) - mean) / std
+    return mel_tensor
+def build_model(model_params={}):
+    args = Munch(model_params)
+    generator = Generator(args.dim_in, args.style_dim, args.max_conv_dim, w_hpf=args.w_hpf, F0_channel=args.F0_channel)
+    mapping_network = MappingNetwork(args.latent_dim, args.style_dim, args.num_domains, hidden_dim=args.max_conv_dim)
+    style_encoder = StyleEncoder(args.dim_in, args.style_dim, args.num_domains, args.max_conv_dim)
+    nets_ema = Munch(generator=generator,
+                     mapping_network=mapping_network,
+                     style_encoder=style_encoder)
+    return nets_ema
+def compute_style(speaker_dicts):
+    reference_embeddings = {}
+    for key, (path, speaker) in speaker_dicts.items():
+        if path == "":
+            label = paddle.to_tensor([speaker], dtype=paddle.int64)
+            latent_dim = starganv2.mapping_network.shared[0].weight.shape[0]
+            ref = starganv2.mapping_network(paddle.randn([1, latent_dim]), label)
+        else:
+            wave, sr = librosa.load(path, sr=24000)
+            audio, index = librosa.effects.trim(wave, top_db=30)
+            if sr != 24000:
+                wave = librosa.resample(wave, sr, 24000)
+            mel_tensor = preprocess(wave)
+            with paddle.no_grad():
+                label = paddle.to_tensor([speaker], dtype=paddle.int64)
+                ref = starganv2.style_encoder(mel_tensor.unsqueeze(1), label)
+        reference_embeddings[key] = (ref, label)
+    return reference_embeddings
+F0_model = JDCNet(num_class=1, seq_len=192)
+params = paddle.load("Models/bst.pd")['net']
+F0_model.set_state_dict(params)
+_ = F0_model.eval()
+import yaml
+import paddle
+from yacs.config import CfgNode
+from paddlespeech.t2s.models.parallel_wavegan import PWGGenerator
+with open('Vocoder/config.yml') as f:
+    voc_config = CfgNode(yaml.safe_load(f))
+voc_config["generator_params"].pop("upsample_net")
+voc_config["generator_params"]["upsample_scales"] = voc_config["generator_params"].pop("upsample_params")["upsample_scales"]
+vocoder = PWGGenerator(**voc_config["generator_params"])
+vocoder.remove_weight_norm()
+vocoder.eval()
+vocoder.set_state_dict(paddle.load('Vocoder/checkpoint-400000steps.pd'))
+model_path = 'Models/vc_ema.pd'
+with open('Models/config.yml') as f:
+    starganv2_config = yaml.safe_load(f)
+starganv2 = build_model(model_params=starganv2_config["model_params"])
+params = paddle.load(model_path)
+params = params['model_ema']
+_ = [starganv2[key].set_state_dict(params[key]) for key in starganv2]
+_ = [starganv2[key].eval() for key in starganv2]
+starganv2.style_encoder = starganv2.style_encoder
+starganv2.mapping_network = starganv2.mapping_network
+starganv2.generator = starganv2.generator
+# Compute speakers' styles under the Demo directory
+speaker_dicts = {}
+selected_speakers = [273, 259, 258, 243, 254, 244, 236, 233, 230, 228]
+for s in selected_speakers:
+    k = s
+    speaker_dicts['p' + str(s)] = ('Demo/VCTK-corpus/p' + str(k) + '/p' + str(k) + '_023.wav', speakers.index(s))
+reference_embeddings = compute_style(speaker_dicts)
+examples = [['Demo/VCTK-corpus/p243/p243_023.wav', 'p236'], ['Demo/VCTK-corpus/p236/p236_023.wav', 'p243']]
+def app(wav_path, speaker_id):
+    audio, _ = librosa.load(wav_path, sr=24000)
+    audio = audio / np.max(np.abs(audio))
+    audio.dtype = np.float32
+    source = preprocess(audio)
+    ref = reference_embeddings[speaker_id][0]
+    with paddle.no_grad():
+        f0_feat = F0_model.get_feature_GAN(source.unsqueeze(1))
+        out = starganv2.generator(source.unsqueeze(1), ref, F0=f0_feat)
+        c = out.transpose([0,1,3,2]).squeeze()
+        y_out = vocoder.inference(c)
+        y_out = y_out.reshape([-1])
+    return (24000, y_out.numpy())
+title="StarGANv2 Voice Conversion"
+description="Gradio Demo for voice conversion using paddlepaddle. "
+iface = gr.Interface(app, [gr.inputs.Audio(source="microphone", type="filepath"),
+    gr.inputs.Radio(list(speaker_dicts.keys()), type="value", default='p228', label='speaker id')],
+    "audio", title=title, description=description, examples=examples)
+iface.launch()

convert_parallel_wavegan_weights_to_paddle.ipynb ADDED Viewed

	@@ -0,0 +1,177 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "ParallelWaveGAN to paddle.ipynb",
+      "provenance": [],
+      "collapsed_sections": [],
+      "private_outputs": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "gZNDsJweNp1L"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install parallel_wavegan paddlepaddle-gpu==2.2.2 \"paddlespeech<1\" pytest-runner"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!gdown https://drive.google.com/uc?id=1q8oSAzwkqi99oOGXDZyLypCiz0Qzn3Ab\n",
+        "!unzip -qq Vocoder.zip"
+      ],
+      "metadata": {
+        "id": "HqA0VNKEOGfv"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# load torch vocoder\n",
+        "import torch\n",
+        "from parallel_wavegan.utils import load_model\n",
+        "\n",
+        "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
+        "\n",
+        "vocoder_torch = load_model(\"Vocoder/checkpoint-400000steps.pkl\").to(device).eval()\n",
+        "vocoder_torch.remove_weight_norm()\n",
+        "_ = vocoder_torch.eval()"
+      ],
+      "metadata": {
+        "id": "9F0yA_dyPOVe"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import yaml\n",
+        "import paddle\n",
+        "\n",
+        "from yacs.config import CfgNode\n",
+        "from paddlespeech.s2t.utils.dynamic_import import dynamic_import\n",
+        "from paddlespeech.t2s.models.parallel_wavegan import PWGGenerator\n",
+        "\n",
+        "with open('Vocoder/config.yml') as f:\n",
+        "    voc_config = CfgNode(yaml.safe_load(f))\n",
+        "voc_config[\"generator_params\"].pop(\"upsample_net\")\n",
+        "voc_config[\"generator_params\"][\"upsample_scales\"] = voc_config[\"generator_params\"].pop(\"upsample_params\")[\"upsample_scales\"]\n",
+        "vocoder_paddle = PWGGenerator(**voc_config[\"generator_params\"])\n",
+        "vocoder_paddle.remove_weight_norm()\n",
+        "vocoder_paddle.eval()\n",
+        "\n",
+        "\n",
+        "@paddle.no_grad()\n",
+        "def convert_weights(torch_model, paddle_model):\n",
+        "    _ = torch_model.eval()\n",
+        "    _ = paddle_model.eval()\n",
+        "    dense_layers = []\n",
+        "    for name, layer in torch_model.named_modules():\n",
+        "        if isinstance(layer, torch.nn.Linear):\n",
+        "            dense_layers.append(name)\n",
+        "    torch_state_dict = torch_model.state_dict()\n",
+        "    for name, param in paddle_model.named_parameters():\n",
+        "        name = name.replace('._mean', '.running_mean')\n",
+        "        name = name.replace('._variance', '.running_var')\n",
+        "        name = name.replace('.scale', '.weight')\n",
+        "        target_param = torch_state_dict[name].detach().cpu().numpy()\n",
+        "        if '.'.join(name.split('.')[:-1]) in dense_layers:\n",
+        "            if len(param.shape) == 2:\n",
+        "                target_param = target_param.transpose((1,0))\n",
+        "        param.set_value(paddle.to_tensor(target_param))\n",
+        "\n",
+        "convert_weights(vocoder_torch, vocoder_paddle)"
+      ],
+      "metadata": {
+        "id": "ch2uVW8OdKN0"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import os\n",
+        "import librosa\n",
+        "import torchaudio\n",
+        "import paddleaudio\n",
+        "import numpy as np\n",
+        "import IPython.display as ipd\n",
+        "\n",
+        "\n",
+        "to_mel = torchaudio.transforms.MelSpectrogram(\n",
+        "    n_mels=80, n_fft=2048, win_length=1200, hop_length=300)\n",
+        "fb = to_mel.mel_scale.fb.detach().cpu().numpy().transpose([1,0])\n",
+        "to_mel = paddleaudio.features.MelSpectrogram(\n",
+        "    n_mels=80, n_fft=2048, win_length=1200, hop_length=300)\n",
+        "to_mel.fbank_matrix[:] = fb\n",
+        "mean, std = -4, 4\n",
+        "\n",
+        "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
+        "\n",
+        "def preprocess(wave):\n",
+        "    wave_tensor = paddle.to_tensor(wave).astype(paddle.float32)\n",
+        "    mel_tensor = 2*to_mel(wave_tensor)\n",
+        "    mel_tensor = (paddle.log(1e-5 + mel_tensor.unsqueeze(0)) - mean) / std\n",
+        "    return mel_tensor\n",
+        "\n",
+        "if not os.path.exists('p228_023.wav'):\n",
+        "    !wget https://github.com/yl4579/StarGANv2-VC/raw/main/Demo/VCTK-corpus/p228/p228_023.wav\n",
+        "audio, source_sr = librosa.load('p228_023.wav', sr=24000)\n",
+        "audio = audio / np.max(np.abs(audio))\n",
+        "audio.dtype = np.float32\n",
+        "mel = preprocess(audio)\n",
+        "\n",
+        "import numpy as np\n",
+        "with torch.no_grad():\n",
+        "    with paddle.no_grad():\n",
+        "        c = mel.transpose([0, 2, 1]).squeeze()\n",
+        "        recon_paddle = vocoder_paddle.inference(c)\n",
+        "        recon_paddle = recon_paddle.reshape([-1]).numpy()\n",
+        "        recon_torch = vocoder_torch.inference(torch.from_numpy(c.numpy()).to(device))\n",
+        "        recon_torch = recon_torch.view(-1).cpu().numpy()\n",
+        "        print(np.mean((recon_paddle - recon_torch)**2))\n",
+        "\n",
+        "print('Paddle recon:')\n",
+        "display(ipd.Audio(recon_paddle, rate=24000))\n",
+        "print('Torch recon:')\n",
+        "display(ipd.Audio(recon_torch, rate=24000))"
+      ],
+      "metadata": {
+        "id": "Q9dK5j1CleJM"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "paddle.save(vocoder_paddle.state_dict(), 'checkpoint-400000steps.pd')\n",
+        "paddle.save({ 'fbank_matrix': to_mel.fbank_matrix }, 'fbank_matrix.pd')"
+      ],
+      "metadata": {
+        "id": "HwaLd_Eq3JrH"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}

convert_starganv2_vc_weights_to_paddle.ipynb ADDED Viewed

	@@ -0,0 +1,236 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "starganv2_vc_weights_converter.ipynb",
+      "private_outputs": true,
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "CA5i7YAlagUA"
+      },
+      "outputs": [],
+      "source": [
+        "!git clone https://github.com/yl4579/StarGANv2-VC\n",
+        "!pip install SoundFile torchaudio munch\n",
+        "!git clone https://github.com/HighCWu/starganv2vc-paddle\n",
+        "!cd starganv2vc-paddle && pip install paddlepaddle-gpu==2.2.2 paddleaudio munch pydub\n",
+        "!cp -r starganv2vc-paddle/starganv2vc_paddle StarGANv2-VC/"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!gdown https://drive.google.com/uc?id=1nzTyyl-9A1Hmqya2Q_f2bpZkUoRjbZsY"
+      ],
+      "metadata": {
+        "id": "ac4g4L1Bbx1t"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!unzip -qq Models.zip\n",
+        "!rm -rf Models.zip\n",
+        "!mv Models StarGANv2-VC/Models"
+      ],
+      "metadata": {
+        "id": "EJ3vG_RvcOD8"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "%cd StarGANv2-VC"
+      ],
+      "metadata": {
+        "id": "rKovh1Egi4mJ"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import os\n",
+        "import yaml\n",
+        "import numpy as np\n",
+        "import torch\n",
+        "import warnings\n",
+        "warnings.simplefilter('ignore')\n",
+        "\n",
+        "from munch import Munch\n",
+        "\n",
+        "from models import build_model\n",
+        "\n",
+        "from Utils.ASR.models import ASRCNN\n",
+        "from Utils.JDC.model import JDCNet\n",
+        "\n",
+        "torch.backends.cudnn.benchmark = True #\n",
+        "\n",
+        "def main(config_path):\n",
+        "    config = yaml.safe_load(open(config_path))\n",
+        "    \n",
+        "    device = config.get('device', 'cpu')\n",
+        "\n",
+        "    # load pretrained ASR model\n",
+        "    ASR_config = config.get('ASR_config', False)\n",
+        "    ASR_path = config.get('ASR_path', False)\n",
+        "    with open(ASR_config) as f:\n",
+        "            ASR_config = yaml.safe_load(f)\n",
+        "    ASR_model_config = ASR_config['model_params']\n",
+        "    ASR_model = ASRCNN(**ASR_model_config)\n",
+        "    params = torch.load(ASR_path, map_location='cpu')['model']\n",
+        "    ASR_model.load_state_dict(params)\n",
+        "    ASR_model.to(device)\n",
+        "    _ = ASR_model.eval()\n",
+        "    \n",
+        "    # load pretrained F0 model\n",
+        "    F0_path = config.get('F0_path', False)\n",
+        "    F0_model = JDCNet(num_class=1, seq_len=192)\n",
+        "    params = torch.load(F0_path, map_location='cpu')['net']\n",
+        "    F0_model.load_state_dict(params)\n",
+        "    F0_model.to(device)\n",
+        "    \n",
+        "    # build model\n",
+        "    _, model_ema = build_model(Munch(config['model_params']), F0_model, ASR_model)\n",
+        "    pretrained_path = 'Models/epoch_00150.pth'# config.get('pretrained_model', False)\n",
+        "    params = torch.load(pretrained_path, map_location='cpu')['model_ema']\n",
+        "    [model_ema[key].load_state_dict(state_dict) for key, state_dict in params.items()]\n",
+        "    _ = [model_ema[key].to(device) for key in model_ema]\n",
+        "\n",
+        "    return ASR_model, F0_model, model_ema\n",
+        "\n",
+        "ASR_model_torch, F0_model_torch, model_ema_torch = main('./Models/config.yml')\n"
+      ],
+      "metadata": {
+        "id": "UpMuk5kni67B"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import os\n",
+        "import yaml\n",
+        "import numpy as np\n",
+        "import paddle\n",
+        "import warnings\n",
+        "warnings.simplefilter('ignore')\n",
+        "\n",
+        "from munch import Munch\n",
+        "\n",
+        "from starganv2vc_paddle.models import build_model\n",
+        "\n",
+        "from starganv2vc_paddle.Utils.ASR.models import ASRCNN\n",
+        "from starganv2vc_paddle.Utils.JDC.model import JDCNet\n",
+        "\n",
+        "@paddle.no_grad()\n",
+        "def convert_weights(torch_model, paddle_model):\n",
+        "    _ = torch_model.eval()\n",
+        "    _ = paddle_model.eval()\n",
+        "    dense_layers = []\n",
+        "    for name, layer in torch_model.named_modules():\n",
+        "        if isinstance(layer, torch.nn.Linear):\n",
+        "            dense_layers.append(name)\n",
+        "    torch_state_dict = torch_model.state_dict()\n",
+        "    for name, param in paddle_model.named_parameters():\n",
+        "        name = name.replace('._mean', '.running_mean')\n",
+        "        name = name.replace('._variance', '.running_var')\n",
+        "        name = name.replace('.scale', '.weight')\n",
+        "        target_param = torch_state_dict[name].detach().cpu().numpy()\n",
+        "        if '.'.join(name.split('.')[:-1]) in dense_layers:\n",
+        "            if len(param.shape) == 2:\n",
+        "                target_param = target_param.transpose((1,0))\n",
+        "        param.set_value(paddle.to_tensor(target_param))\n",
+        "\n",
+        "@torch.no_grad()\n",
+        "@paddle.no_grad()\n",
+        "def main(config_path):\n",
+        "    config = yaml.safe_load(open(config_path))\n",
+        "    \n",
+        "    ASR_config = config.get('ASR_config', False)\n",
+        "    with open(ASR_config) as f:\n",
+        "            ASR_config = yaml.safe_load(f)\n",
+        "    ASR_model_config = ASR_config['model_params']\n",
+        "    ASR_model = ASRCNN(**ASR_model_config)\n",
+        "    _ = ASR_model.eval()\n",
+        "    convert_weights(ASR_model_torch, ASR_model)\n",
+        "\n",
+        "    F0_model = JDCNet(num_class=1, seq_len=192)\n",
+        "    _ = F0_model.eval()\n",
+        "    convert_weights(F0_model_torch, F0_model)\n",
+        "    \n",
+        "    # build model\n",
+        "    model, model_ema = build_model(Munch(config['model_params']), F0_model, ASR_model)\n",
+        "\n",
+        "    asr_input = paddle.randn([2, 80, 192])\n",
+        "    asr_output = ASR_model(asr_input)\n",
+        "    asr_output_torch = ASR_model_torch(torch.from_numpy(asr_input.numpy()).cuda())\n",
+        "    print('ASR model input:', asr_input.shape, 'output:', asr_output.shape)\n",
+        "    print('Error:', (asr_output_torch.cpu().numpy() - asr_output.numpy()).mean())\n",
+        "    mel_input = paddle.randn([2, 1, 192, 512])\n",
+        "    f0_output = F0_model(mel_input)\n",
+        "    f0_output_torch = F0_model_torch(torch.from_numpy(mel_input.numpy()).cuda())\n",
+        "    print('F0 model input:', mel_input.shape, 'output:', [t.shape for t in f0_output])\n",
+        "    # print('Error:', (t_dict2['output'].cpu().numpy() - t_dict1['output'].numpy()).mean())\n",
+        "    print('Error:', [(t1.cpu().numpy() - t2.numpy()).mean() for t1, t2 in zip(f0_output_torch, f0_output)])\n",
+        "\n",
+        "    _ = [convert_weights(model_ema_torch[k], model_ema[k]) for k in model_ema.keys()]\n",
+        "    label = paddle.to_tensor([0,0], dtype=paddle.int64)\n",
+        "    latent_dim = model_ema.mapping_network.shared[0].weight.shape[0]\n",
+        "    latent_style = paddle.randn([2, latent_dim])\n",
+        "    ref = model_ema.mapping_network(latent_style, label)\n",
+        "    ref_torch = model_ema_torch.mapping_network(torch.from_numpy(latent_style.numpy()).cuda(), torch.from_numpy(label.numpy()).cuda())\n",
+        "    print('Error of mapping network:', (ref_torch.cpu().numpy() - ref.numpy()).mean())\n",
+        "    mel_input2 = paddle.randn([2, 1, 192, 512])\n",
+        "    style_ref = model_ema.style_encoder(mel_input2, label)\n",
+        "    style_ref_torch = model_ema_torch.style_encoder(torch.from_numpy(mel_input2.numpy()).cuda(), torch.from_numpy(label.numpy()).cuda())\n",
+        "    print('StyleGANv2-VC encoder inputs:', mel_input2.shape, 'output:', style_ref.shape, 'should has the same shape as the ref:', ref.shape)\n",
+        "    print('Error of style encoder:', (style_ref_torch.cpu().numpy() - style_ref.numpy()).mean())\n",
+        "    f0_feat = F0_model.get_feature_GAN(mel_input)\n",
+        "    f0_feat_torch = F0_model_torch.get_feature_GAN(torch.from_numpy(mel_input.numpy()).cuda())\n",
+        "    print('Error of f0 feat:', (f0_feat_torch.cpu().numpy() - f0_feat.numpy()).mean())\n",
+        "    out = model_ema.generator(mel_input, style_ref, F0=f0_feat)\n",
+        "    out_torch = model_ema_torch.generator(torch.from_numpy(mel_input.numpy()).cuda(), torch.from_numpy(style_ref.numpy()).cuda(), F0=torch.from_numpy(f0_feat.numpy()).cuda())\n",
+        "    print('StyleGANv2-VC inputs:', label.shape, latent_style.shape, mel_input.shape, 'output:', out.shape)\n",
+        "    print('Error:', (out_torch.cpu().numpy() - out.numpy()).mean())\n",
+        "\n",
+        "    paddle.save({'model': ASR_model.state_dict()}, 'ASR.pd')\n",
+        "    paddle.save({ 'net': F0_model.state_dict()}, 'F0.pd')\n",
+        "    model_ema_dict = {key: model.state_dict() for key, model in model_ema.items()}\n",
+        "    \n",
+        "    paddle.save({ 'model_ema': model_ema_dict }, 'VC.pd')\n",
+        "\n",
+        "    return 0\n",
+        "\n",
+        "main('./Models/config.yml')\n"
+      ],
+      "metadata": {
+        "id": "PnuApVuyIIyd"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}

prepare_data.ipynb ADDED Viewed

	@@ -0,0 +1,179 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "347ace04",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "# VCTK Corpus Path\n",
+    "__CORPUSPATH__ = os.path.expanduser(\"~/data/VCTK-Corpus\") \n",
+    "\n",
+    "# output path\n",
+    "__OUTPATH__ = \"./Data\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4ce9eb2e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from scipy.io import wavfile\n",
+    "from pydub import AudioSegment\n",
+    "\n",
+    "from pydub import AudioSegment\n",
+    "from pydub.silence import split_on_silence\n",
+    "import os\n",
+    "\n",
+    "def split(sound):\n",
+    "    dBFS = sound.dBFS\n",
+    "    chunks = split_on_silence(sound,\n",
+    "        min_silence_len = 100,\n",
+    "        silence_thresh = dBFS-16,\n",
+    "        keep_silence = 100\n",
+    "    )\n",
+    "    return chunks\n",
+    "\n",
+    "def combine(_src):\n",
+    "    audio = AudioSegment.empty()\n",
+    "    for i,filename in enumerate(os.listdir(_src)):\n",
+    "        if filename.endswith('.wav'):\n",
+    "            filename = os.path.join(_src, filename)\n",
+    "            audio += AudioSegment.from_wav(filename)\n",
+    "    return audio\n",
+    "\n",
+    "def save_chunks(chunks, directory):\n",
+    "    if not os.path.exists(directory):\n",
+    "        os.makedirs(directory)\n",
+    "    counter = 0\n",
+    "\n",
+    "    target_length = 5 * 1000\n",
+    "    output_chunks = [chunks[0]]\n",
+    "    for chunk in chunks[1:]:\n",
+    "        if len(output_chunks[-1]) < target_length:\n",
+    "            output_chunks[-1] += chunk\n",
+    "        else:\n",
+    "            # if the last output chunk is longer than the target length,\n",
+    "            # we can start a new one\n",
+    "            output_chunks.append(chunk)\n",
+    "\n",
+    "    for chunk in output_chunks:\n",
+    "        chunk = chunk.set_frame_rate(24000)\n",
+    "        chunk = chunk.set_channels(1)\n",
+    "        counter = counter + 1\n",
+    "        chunk.export(os.path.join(directory, str(counter) + '.wav'), format=\"wav\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "769a7f62",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Source: http://speech.ee.ntu.edu.tw/~jjery2243542/resource/model/is18/en_speaker_used.txt\n",
+    "# Source: https://github.com/jjery2243542/voice_conversion\n",
+    "\n",
+    "speakers = [225,228,229,230,231,233,236,239,240,244,226,227,232,243,254,256,258,259,270,273]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9302fb6a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# downsample to 24 kHz\n",
+    "\n",
+    "for p in speakers:\n",
+    "    directory = __OUTPATH__ + '/p' + str(p)\n",
+    "    if not os.path.exists(directory):\n",
+    "        audio = combine(__CORPUSPATH__ + '/wav48/p' + str(p))\n",
+    "        chunks = split(audio)\n",
+    "        save_chunks(chunks, directory)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4b0ca022",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# get all speakers\n",
+    "\n",
+    "data_list = []\n",
+    "for path, subdirs, files in os.walk(__OUTPATH__):\n",
+    "    for name in files:\n",
+    "        if name.endswith(\".wav\"):\n",
+    "            speaker = int(path.split('/')[-1].replace('p', ''))\n",
+    "            if speaker in speakers:\n",
+    "                data_list.append({\"Path\": os.path.join(path, name), \"Speaker\": int(speakers.index(speaker)) + 1})\n",
+    "                \n",
+    "import pandas as pd\n",
+    "\n",
+    "data_list = pd.DataFrame(data_list)\n",
+    "data_list = data_list.sample(frac=1)\n",
+    "\n",
+    "import random\n",
+    "\n",
+    "split_idx = round(len(data_list) * 0.1)\n",
+    "\n",
+    "test_data = data_list[:split_idx]\n",
+    "train_data = data_list[split_idx:]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "88df2a45",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# write to file \n",
+    "\n",
+    "file_str = \"\"\n",
+    "for index, k in train_data.iterrows():\n",
+    "    file_str += k['Path'] + \"|\" +str(k['Speaker'] - 1)+ '\\n'\n",
+    "text_file = open(__OUTPATH__ + \"/train_list.txt\", \"w\")\n",
+    "text_file.write(file_str)\n",
+    "text_file.close()\n",
+    "\n",
+    "file_str = \"\"\n",
+    "for index, k in test_data.iterrows():\n",
+    "    file_str += k['Path'] + \"|\" + str(k['Speaker'] - 1) + '\\n'\n",
+    "text_file = open(__OUTPATH__ + \"/val_list.txt\", \"w\")\n",
+    "text_file.write(file_str)\n",
+    "text_file.close()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+paddlepaddle-gpu>=2.2.2
+paddlespeech==0.2.0
+visualdl
+munch
+pydub

starganv2vc_paddle/LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2021 Aaron (Yinghao) Li
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

starganv2vc_paddle/Utils/ASR/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

starganv2vc_paddle/Utils/ASR/layers.py ADDED Viewed

	@@ -0,0 +1,359 @@

+import math
+import paddle
+from paddle import nn
+from typing import Optional, Any
+from paddle import Tensor
+import paddle.nn.functional as F
+import paddleaudio
+import paddleaudio.functional as audio_F
+import random
+random.seed(0)
+def _get_activation_fn(activ):
+    if activ == 'relu':
+        return nn.ReLU()
+    elif activ == 'lrelu':
+        return nn.LeakyReLU(0.2)
+    elif activ == 'swish':
+        return nn.Swish()
+    else:
+        raise RuntimeError('Unexpected activ type %s, expected [relu, lrelu, swish]' % activ)
+class LinearNorm(paddle.nn.Layer):
+    def __init__(self, in_dim, out_dim, bias=True, w_init_gain='linear'):
+        super(LinearNorm, self).__init__()
+        self.linear_layer = paddle.nn.Linear(in_dim, out_dim, bias_attr=bias)
+        if float('.'.join(paddle.__version__.split('.')[:2])) >= 2.3:
+            gain = paddle.nn.initializer.calculate_gain(w_init_gain)
+            paddle.nn.initializer.XavierUniform()(self.linear_layer.weight)
+            self.linear_layer.weight.set_value(gain * self.linear_layer.weight)
+    def forward(self, x):
+        return self.linear_layer(x)
+class ConvNorm(paddle.nn.Layer):
+    def __init__(self, in_channels, out_channels, kernel_size=1, stride=1,
+                 padding=None, dilation=1, bias=True, w_init_gain='linear', param=None):
+        super(ConvNorm, self).__init__()
+        if padding is None:
+            assert(kernel_size % 2 == 1)
+            padding = int(dilation * (kernel_size - 1) / 2)
+        self.conv = paddle.nn.Conv1D(in_channels, out_channels,
+                                    kernel_size=kernel_size, stride=stride,
+                                    padding=padding, dilation=dilation,
+                                    bias_attr=bias)
+        if float('.'.join(paddle.__version__.split('.')[:2])) >= 2.3:
+            gain = paddle.nn.initializer.calculate_gain(w_init_gain, param=param)
+            paddle.nn.initializer.XavierUniform()(self.conv.weight)
+            self.conv.weight.set_value(gain * self.conv.weight)
+    def forward(self, signal):
+        conv_signal = self.conv(signal)
+        return conv_signal
+class CausualConv(nn.Layer):
+    def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding=1, dilation=1, bias=True, w_init_gain='linear', param=None):
+        super(CausualConv, self).__init__()
+        if padding is None:
+            assert(kernel_size % 2 == 1)
+            padding = int(dilation * (kernel_size - 1) / 2) * 2
+        else:
+            self.padding = padding * 2
+        self.conv = nn.Conv1D(in_channels, out_channels,
+                              kernel_size=kernel_size, stride=stride,
+                              padding=self.padding,
+                              dilation=dilation,
+                              bias_attr=bias)
+        if float('.'.join(paddle.__version__.split('.')[:2])) >= 2.3:
+            gain = paddle.nn.initializer.calculate_gain(w_init_gain, param=param)
+            paddle.nn.initializer.XavierUniform()(self.conv.weight)
+            self.conv.weight.set_value(gain * self.conv.weight)
+    def forward(self, x):
+        x = self.conv(x)
+        x = x[:, :, :-self.padding]
+        return x
+class CausualBlock(nn.Layer):
+    def __init__(self, hidden_dim, n_conv=3, dropout_p=0.2, activ='lrelu'):
+        super(CausualBlock, self).__init__()
+        self.blocks = nn.LayerList([
+            self._get_conv(hidden_dim, dilation=3**i, activ=activ, dropout_p=dropout_p)
+            for i in range(n_conv)])
+    def forward(self, x):
+        for block in self.blocks:
+            res = x
+            x = block(x)
+            x += res
+        return x
+    def _get_conv(self, hidden_dim, dilation, activ='lrelu', dropout_p=0.2):
+        layers = [
+            CausualConv(hidden_dim, hidden_dim, kernel_size=3, padding=dilation, dilation=dilation),
+            _get_activation_fn(activ),
+            nn.BatchNorm1D(hidden_dim),
+            nn.Dropout(p=dropout_p),
+            CausualConv(hidden_dim, hidden_dim, kernel_size=3, padding=1, dilation=1),
+            _get_activation_fn(activ),
+            nn.Dropout(p=dropout_p)
+        ]
+        return nn.Sequential(*layers)
+class ConvBlock(nn.Layer):
+    def __init__(self, hidden_dim, n_conv=3, dropout_p=0.2, activ='relu'):
+        super().__init__()
+        self._n_groups = 8
+        self.blocks = nn.LayerList([
+            self._get_conv(hidden_dim, dilation=3**i, activ=activ, dropout_p=dropout_p)
+            for i in range(n_conv)])
+    def forward(self, x):
+        for block in self.blocks:
+            res = x
+            x = block(x)
+            x += res
+        return x
+    def _get_conv(self, hidden_dim, dilation, activ='relu', dropout_p=0.2):
+        layers = [
+            ConvNorm(hidden_dim, hidden_dim, kernel_size=3, padding=dilation, dilation=dilation),
+            _get_activation_fn(activ),
+            nn.GroupNorm(num_groups=self._n_groups, num_channels=hidden_dim),
+            nn.Dropout(p=dropout_p),
+            ConvNorm(hidden_dim, hidden_dim, kernel_size=3, padding=1, dilation=1),
+            _get_activation_fn(activ),
+            nn.Dropout(p=dropout_p)
+        ]
+        return nn.Sequential(*layers)
+class LocationLayer(nn.Layer):
+    def __init__(self, attention_n_filters, attention_kernel_size,
+                 attention_dim):
+        super(LocationLayer, self).__init__()
+        padding = int((attention_kernel_size - 1) / 2)
+        self.location_conv = ConvNorm(2, attention_n_filters,
+                                      kernel_size=attention_kernel_size,
+                                      padding=padding, bias=False, stride=1,
+                                      dilation=1)
+        self.location_dense = LinearNorm(attention_n_filters, attention_dim,
+                                         bias=False, w_init_gain='tanh')
+    def forward(self, attention_weights_cat):
+        processed_attention = self.location_conv(attention_weights_cat)
+        processed_attention = processed_attention.transpose([0, 2, 1])
+        processed_attention = self.location_dense(processed_attention)
+        return processed_attention
+class Attention(nn.Layer):
+    def __init__(self, attention_rnn_dim, embedding_dim, attention_dim,
+                 attention_location_n_filters, attention_location_kernel_size):
+        super(Attention, self).__init__()
+        self.query_layer = LinearNorm(attention_rnn_dim, attention_dim,
+                                      bias=False, w_init_gain='tanh')
+        self.memory_layer = LinearNorm(embedding_dim, attention_dim, bias=False,
+                                       w_init_gain='tanh')
+        self.v = LinearNorm(attention_dim, 1, bias=False)
+        self.location_layer = LocationLayer(attention_location_n_filters,
+                                            attention_location_kernel_size,
+                                            attention_dim)
+        self.score_mask_value = -float("inf")
+    def get_alignment_energies(self, query, processed_memory,
+                               attention_weights_cat):
+        """
+        PARAMS
+        ------
+        query: decoder output (batch, n_mel_channels * n_frames_per_step)
+        processed_memory: processed encoder outputs (B, T_in, attention_dim)
+        attention_weights_cat: cumulative and prev. att weights (B, 2, max_time)
+        RETURNS
+        -------
+        alignment (batch, max_time)
+        """
+        processed_query = self.query_layer(query.unsqueeze(1))
+        processed_attention_weights = self.location_layer(attention_weights_cat)
+        energies = self.v(paddle.tanh(
+            processed_query + processed_attention_weights + processed_memory))
+        energies = energies.squeeze(-1)
+        return energies
+    def forward(self, attention_hidden_state, memory, processed_memory,
+                attention_weights_cat, mask):
+        """
+        PARAMS
+        ------
+        attention_hidden_state: attention rnn last output
+        memory: encoder outputs
+        processed_memory: processed encoder outputs
+        attention_weights_cat: previous and cummulative attention weights
+        mask: binary mask for padded data
+        """
+        alignment = self.get_alignment_energies(
+            attention_hidden_state, processed_memory, attention_weights_cat)
+        if mask is not None:
+            alignment.data.masked_fill_(mask, self.score_mask_value)
+        attention_weights = F.softmax(alignment, axis=1)
+        attention_context = paddle.bmm(attention_weights.unsqueeze(1), memory)
+        attention_context = attention_context.squeeze(1)
+        return attention_context, attention_weights
+class ForwardAttentionV2(nn.Layer):
+    def __init__(self, attention_rnn_dim, embedding_dim, attention_dim,
+                 attention_location_n_filters, attention_location_kernel_size):
+        super(ForwardAttentionV2, self).__init__()
+        self.query_layer = LinearNorm(attention_rnn_dim, attention_dim,
+                                      bias=False, w_init_gain='tanh')
+        self.memory_layer = LinearNorm(embedding_dim, attention_dim, bias=False,
+                                       w_init_gain='tanh')
+        self.v = LinearNorm(attention_dim, 1, bias=False)
+        self.location_layer = LocationLayer(attention_location_n_filters,
+                                            attention_location_kernel_size,
+                                            attention_dim)
+        self.score_mask_value = -float(1e20)
+    def get_alignment_energies(self, query, processed_memory,
+                               attention_weights_cat):
+        """
+        PARAMS
+        ------
+        query: decoder output (batch, n_mel_channels * n_frames_per_step)
+        processed_memory: processed encoder outputs (B, T_in, attention_dim)
+        attention_weights_cat:  prev. and cumulative att weights (B, 2, max_time)
+        RETURNS
+        -------
+        alignment (batch, max_time)
+        """
+        processed_query = self.query_layer(query.unsqueeze(1))
+        processed_attention_weights = self.location_layer(attention_weights_cat)
+        energies = self.v(paddle.tanh(
+            processed_query + processed_attention_weights + processed_memory))
+        energies = energies.squeeze(-1)
+        return energies
+    def forward(self, attention_hidden_state, memory, processed_memory,
+                attention_weights_cat, mask, log_alpha):
+        """
+        PARAMS
+        ------
+        attention_hidden_state: attention rnn last output
+        memory: encoder outputs
+        processed_memory: processed encoder outputs
+        attention_weights_cat: previous and cummulative attention weights
+        mask: binary mask for padded data
+        """
+        log_energy = self.get_alignment_energies(
+            attention_hidden_state, processed_memory, attention_weights_cat)
+        #log_energy =
+        if mask is not None:
+            log_energy[:] = paddle.where(mask, paddle.full(log_energy.shape, self.score_mask_value, log_energy.dtype), log_energy)
+        #attention_weights = F.softmax(alignment, dim=1)
+        #content_score = log_energy.unsqueeze(1) #[B, MAX_TIME] -> [B, 1, MAX_TIME]
+        #log_alpha = log_alpha.unsqueeze(2) #[B, MAX_TIME] -> [B, MAX_TIME, 1]
+        #log_total_score = log_alpha + content_score
+        #previous_attention_weights = attention_weights_cat[:,0,:]
+        log_alpha_shift_padded = []
+        max_time = log_energy.shape[1]
+        for sft in range(2):
+            shifted = log_alpha[:,:max_time-sft]
+            shift_padded = F.pad(shifted, (sft,0), 'constant', self.score_mask_value)
+            log_alpha_shift_padded.append(shift_padded.unsqueeze(2))
+        biased = paddle.logsumexp(paddle.conat(log_alpha_shift_padded,2), 2)
+        log_alpha_new = biased +  log_energy
+        attention_weights =  F.softmax(log_alpha_new, axis=1)
+        attention_context = paddle.bmm(attention_weights.unsqueeze(1), memory)
+        attention_context = attention_context.squeeze(1)
+        return attention_context, attention_weights, log_alpha_new
+class PhaseShuffle2D(nn.Layer):
+    def __init__(self, n=2):
+        super(PhaseShuffle2D, self).__init__()
+        self.n = n
+        self.random = random.Random(1)
+    def forward(self, x, move=None):
+        # x.size = (B, C, M, L)
+        if move is None:
+            move = self.random.randint(-self.n, self.n)
+        if move == 0:
+            return x
+        else:
+            left = x[:, :, :, :move]
+            right = x[:, :, :, move:]
+            shuffled = paddle.concat([right, left], axis=3)
+        return shuffled
+class PhaseShuffle1D(nn.Layer):
+    def __init__(self, n=2):
+        super(PhaseShuffle1D, self).__init__()
+        self.n = n
+        self.random = random.Random(1)
+    def forward(self, x, move=None):
+        # x.size = (B, C, M, L)
+        if move is None:
+            move = self.random.randint(-self.n, self.n)
+        if move == 0:
+            return x
+        else:
+            left = x[:, :,  :move]
+            right = x[:, :, move:]
+            shuffled = paddle.concat([right, left], axis=2)
+        return shuffled
+class MFCC(nn.Layer):
+    def __init__(self, n_mfcc=40, n_mels=80):
+        super(MFCC, self).__init__()
+        self.n_mfcc = n_mfcc
+        self.n_mels = n_mels
+        self.norm = 'ortho'
+        dct_mat = audio_F.create_dct(self.n_mfcc, self.n_mels, self.norm)
+        self.register_buffer('dct_mat', dct_mat)
+    def forward(self, mel_specgram):
+        if len(mel_specgram.shape) == 2:
+            mel_specgram = mel_specgram.unsqueeze(0)
+            unsqueezed = True
+        else:
+            unsqueezed = False
+        # (channel, n_mels, time).tranpose(...) dot (n_mels, n_mfcc)
+        # -> (channel, time, n_mfcc).tranpose(...)
+        mfcc = paddle.matmul(mel_specgram.transpose([0, 2, 1]), self.dct_mat).transpose([0, 2, 1])
+        # unpack batch
+        if unsqueezed:
+            mfcc = mfcc.squeeze(0)
+        return mfcc

starganv2vc_paddle/Utils/ASR/models.py ADDED Viewed

	@@ -0,0 +1,187 @@

+import math
+import paddle
+from paddle import nn
+from paddle.nn import TransformerEncoder
+import paddle.nn.functional as F
+from .layers import MFCC, Attention, LinearNorm, ConvNorm, ConvBlock
+class ASRCNN(nn.Layer):
+    def __init__(self,
+                 input_dim=80,
+                 hidden_dim=256,
+                 n_token=35,
+                 n_layers=6,
+                 token_embedding_dim=256,
+    ):
+        super().__init__()
+        self.n_token = n_token
+        self.n_down = 1
+        self.to_mfcc = MFCC()
+        self.init_cnn = ConvNorm(input_dim//2, hidden_dim, kernel_size=7, padding=3, stride=2)
+        self.cnns = nn.Sequential(
+            *[nn.Sequential(
+                ConvBlock(hidden_dim),
+                nn.GroupNorm(num_groups=1, num_channels=hidden_dim)
+            ) for n in range(n_layers)])
+        self.projection = ConvNorm(hidden_dim, hidden_dim // 2)
+        self.ctc_linear = nn.Sequential(
+            LinearNorm(hidden_dim//2, hidden_dim),
+            nn.ReLU(),
+            LinearNorm(hidden_dim, n_token))
+        self.asr_s2s = ASRS2S(
+            embedding_dim=token_embedding_dim,
+            hidden_dim=hidden_dim//2,
+            n_token=n_token)
+    def forward(self, x, src_key_padding_mask=None, text_input=None):
+        x = self.to_mfcc(x)
+        x = self.init_cnn(x)
+        x = self.cnns(x)
+        x = self.projection(x)
+        x = x.transpose([0, 2, 1])
+        ctc_logit = self.ctc_linear(x)
+        if text_input is not None:
+            _, s2s_logit, s2s_attn = self.asr_s2s(x, src_key_padding_mask, text_input)
+            return ctc_logit, s2s_logit, s2s_attn
+        else:
+            return ctc_logit
+    def get_feature(self, x):
+        x = self.to_mfcc(x.squeeze(1))
+        x = self.init_cnn(x)
+        x = self.cnns(x)
+        x = self.projection(x)
+        return x
+    def length_to_mask(self, lengths):
+        mask = paddle.arange(lengths.max()).unsqueeze(0).expand((lengths.shape[0], -1)).astype(lengths.dtype)
+        mask = paddle.greater_than(mask+1, lengths.unsqueeze(1))
+        return mask
+    def get_future_mask(self, out_length, unmask_future_steps=0):
+        """
+        Args:
+            out_length (int): returned mask shape is (out_length, out_length).
+            unmask_futre_steps (int): unmasking future step size.
+        Return:
+            mask (paddle.BoolTensor): mask future timesteps mask[i, j] = True if i > j + unmask_future_steps else False
+        """
+        index_tensor = paddle.arange(out_length).unsqueeze(0).expand([out_length, -1])
+        mask = paddle.greater_than(index_tensor, index_tensor.T + unmask_future_steps)
+        return mask
+class ASRS2S(nn.Layer):
+    def __init__(self,
+                 embedding_dim=256,
+                 hidden_dim=512,
+                 n_location_filters=32,
+                 location_kernel_size=63,
+                 n_token=40):
+        super(ASRS2S, self).__init__()
+        self.embedding = nn.Embedding(n_token, embedding_dim)
+        val_range = math.sqrt(6 / hidden_dim)
+        nn.initializer.Uniform(-val_range, val_range)(self.embedding.weight)
+        self.decoder_rnn_dim = hidden_dim
+        self.project_to_n_symbols = nn.Linear(self.decoder_rnn_dim, n_token)
+        self.attention_layer = Attention(
+            self.decoder_rnn_dim,
+            hidden_dim,
+            hidden_dim,
+            n_location_filters,
+            location_kernel_size
+        )
+        self.decoder_rnn = nn.LSTMCell(self.decoder_rnn_dim + embedding_dim, self.decoder_rnn_dim)
+        self.project_to_hidden = nn.Sequential(
+            LinearNorm(self.decoder_rnn_dim * 2, hidden_dim),
+            nn.Tanh())
+        self.sos = 1
+        self.eos = 2
+    def initialize_decoder_states(self, memory, mask):
+        """
+        moemory.shape = (B, L, H) = (Batchsize, Maxtimestep, Hiddendim)
+        """
+        B, L, H = memory.shape
+        self.decoder_hidden = paddle.zeros((B, self.decoder_rnn_dim)).astype(memory.dtype)
+        self.decoder_cell = paddle.zeros((B, self.decoder_rnn_dim)).astype(memory.dtype)
+        self.attention_weights = paddle.zeros((B, L)).astype(memory.dtype)
+        self.attention_weights_cum = paddle.zeros((B, L)).astype(memory.dtype)
+        self.attention_context = paddle.zeros((B, H)).astype(memory.dtype)
+        self.memory = memory
+        self.processed_memory = self.attention_layer.memory_layer(memory)
+        self.mask = mask
+        self.unk_index = 3
+        self.random_mask = 0.1
+    def forward(self, memory, memory_mask, text_input):
+        """
+        moemory.shape = (B, L, H) = (Batchsize, Maxtimestep, Hiddendim)
+        moemory_mask.shape = (B, L, )
+        texts_input.shape = (B, T)
+        """
+        self.initialize_decoder_states(memory, memory_mask)
+        # text random mask
+        random_mask = (paddle.rand(text_input.shape) < self.random_mask)
+        _text_input = text_input.clone()
+        _text_input[:] = paddle.where(random_mask, paddle.full(_text_input.shape, self.unk_index, _text_input.dtype), _text_input)
+        decoder_inputs = self.embedding(_text_input).transpose([1, 0, 2]) # -> [T, B, channel]
+        start_embedding = self.embedding(
+            paddle.to_tensor([self.sos]*decoder_inputs.shape[1], dtype=paddle.long))
+        decoder_inputs = paddle.concat((start_embedding.unsqueeze(0), decoder_inputs), axis=0)
+        hidden_outputs, logit_outputs, alignments = [], [], []
+        while len(hidden_outputs) < decoder_inputs.shape[0]:
+            decoder_input = decoder_inputs[len(hidden_outputs)]
+            hidden, logit, attention_weights = self.decode(decoder_input)
+            hidden_outputs += [hidden]
+            logit_outputs += [logit]
+            alignments += [attention_weights]
+        hidden_outputs, logit_outputs, alignments = \
+            self.parse_decoder_outputs(
+                hidden_outputs, logit_outputs, alignments)
+        return hidden_outputs, logit_outputs, alignments
+    def decode(self, decoder_input):
+        cell_input = paddle.concat((decoder_input, self.attention_context), -1)
+        self.decoder_rnn.flatten_parameters()
+        self.decoder_hidden, self.decoder_cell = self.decoder_rnn(
+            cell_input,
+            (self.decoder_hidden, self.decoder_cell))
+        attention_weights_cat = paddle.concat(
+            (self.attention_weights.unsqueeze(1),
+            self.attention_weights_cum.unsqueeze(1)),axis=1)
+        self.attention_context, self.attention_weights = self.attention_layer(
+            self.decoder_hidden,
+            self.memory,
+            self.processed_memory,
+            attention_weights_cat,
+            self.mask)
+        self.attention_weights_cum += self.attention_weights
+        hidden_and_context = paddle.concat((self.decoder_hidden, self.attention_context), -1)
+        hidden = self.project_to_hidden(hidden_and_context)
+        # dropout to increasing g
+        logit = self.project_to_n_symbols(F.dropout(hidden, 0.5, self.training))
+        return hidden, logit, self.attention_weights
+    def parse_decoder_outputs(self, hidden, logit, alignments):
+        # -> [B, T_out + 1, max_time]
+        alignments = paddle.stack(alignments).transpose([1,0,2])
+        # [T_out + 1, B, n_symbols] -> [B, T_out + 1,  n_symbols]
+        logit = paddle.stack(logit).transpose([1,0,2])
+        hidden = paddle.stack(hidden).transpose([1,0,2])
+        return hidden, logit, alignments

starganv2vc_paddle/Utils/JDC/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

starganv2vc_paddle/Utils/JDC/model.py ADDED Viewed

	@@ -0,0 +1,174 @@

+"""
+Implementation of model from:
+Kum et al. - "Joint Detection and Classification of Singing Voice Melody Using
+Convolutional Recurrent Neural Networks" (2019)
+Link: https://www.semanticscholar.org/paper/Joint-Detection-and-Classification-of-Singing-Voice-Kum-Nam/60a2ad4c7db43bace75805054603747fcd062c0d
+"""
+import paddle
+from paddle import nn
+class JDCNet(nn.Layer):
+    """
+    Joint Detection and Classification Network model for singing voice melody.
+    """
+    def __init__(self, num_class=722, seq_len=31, leaky_relu_slope=0.01):
+        super().__init__()
+        self.seq_len = seq_len  # 31
+        self.num_class = num_class
+        # input = (b, 1, 31, 513), b = batch size
+        self.conv_block = nn.Sequential(
+            nn.Conv2D(in_channels=1, out_channels=64, kernel_size=3, padding=1, bias_attr=False),  # out: (b, 64, 31, 513)
+            nn.BatchNorm2D(num_features=64),
+            nn.LeakyReLU(leaky_relu_slope),
+            nn.Conv2D(64, 64, 3, padding=1, bias_attr=False),  # (b, 64, 31, 513)
+        )
+        # res blocks
+        self.res_block1 = ResBlock(in_channels=64, out_channels=128)  # (b, 128, 31, 128)
+        self.res_block2 = ResBlock(in_channels=128, out_channels=192)  # (b, 192, 31, 32)
+        self.res_block3 = ResBlock(in_channels=192, out_channels=256)  # (b, 256, 31, 8)
+        # pool block
+        self.pool_block = nn.Sequential(
+            nn.BatchNorm2D(num_features=256),
+            nn.LeakyReLU(leaky_relu_slope),
+            nn.MaxPool2D(kernel_size=(1, 4)),  # (b, 256, 31, 2)
+            nn.Dropout(p=0.5),
+        )
+        # maxpool layers (for auxiliary network inputs)
+        # in = (b, 128, 31, 513) from conv_block, out = (b, 128, 31, 2)
+        self.maxpool1 = nn.MaxPool2D(kernel_size=(1, 40))
+        # in = (b, 128, 31, 128) from res_block1, out = (b, 128, 31, 2)
+        self.maxpool2 = nn.MaxPool2D(kernel_size=(1, 20))
+        # in = (b, 128, 31, 32) from res_block2, out = (b, 128, 31, 2)
+        self.maxpool3 = nn.MaxPool2D(kernel_size=(1, 10))
+        # in = (b, 640, 31, 2), out = (b, 256, 31, 2)
+        self.detector_conv = nn.Sequential(
+            nn.Conv2D(640, 256, 1, bias_attr=False),
+            nn.BatchNorm2D(256),
+            nn.LeakyReLU(leaky_relu_slope),
+            nn.Dropout(p=0.5),
+        )
+        # input: (b, 31, 512) - resized from (b, 256, 31, 2)
+        self.bilstm_classifier = nn.LSTM(
+            input_size=512, hidden_size=256,
+            time_major=False, direction='bidirectional')  # (b, 31, 512)
+        # input: (b, 31, 512) - resized from (b, 256, 31, 2)
+        self.bilstm_detector = nn.LSTM(
+            input_size=512, hidden_size=256,
+            time_major=False, direction='bidirectional')  # (b, 31, 512)
+        # input: (b * 31, 512)
+        self.classifier = nn.Linear(in_features=512, out_features=self.num_class)  # (b * 31, num_class)
+        # input: (b * 31, 512)
+        self.detector = nn.Linear(in_features=512, out_features=2)  # (b * 31, 2) - binary classifier
+        # initialize weights
+        self.apply(self.init_weights)
+    def get_feature_GAN(self, x):
+        seq_len = x.shape[-2]
+        x = x.astype(paddle.float32).transpose([0,1,3,2] if len(x.shape) == 4 else [0,2,1])
+        convblock_out = self.conv_block(x)
+        resblock1_out = self.res_block1(convblock_out)
+        resblock2_out = self.res_block2(resblock1_out)
+        resblock3_out = self.res_block3(resblock2_out)
+        poolblock_out = self.pool_block[0](resblock3_out)
+        poolblock_out = self.pool_block[1](poolblock_out)
+        return poolblock_out.transpose([0,1,3,2] if len(poolblock_out.shape) == 4 else [0,2,1])
+    def forward(self, x):
+        """
+        Returns:
+            classification_prediction, detection_prediction
+            sizes: (b, 31, 722), (b, 31, 2)
+        """
+        ###############################
+        # forward pass for classifier #
+        ###############################
+        x = x.astype(paddle.float32).transpose([0,1,3,2] if len(x.shape) == 4 else [0,2,1])
+        convblock_out = self.conv_block(x)
+        resblock1_out = self.res_block1(convblock_out)
+        resblock2_out = self.res_block2(resblock1_out)
+        resblock3_out = self.res_block3(resblock2_out)
+        poolblock_out = self.pool_block[0](resblock3_out)
+        poolblock_out = self.pool_block[1](poolblock_out)
+        GAN_feature = poolblock_out.transpose([0,1,3,2] if len(poolblock_out.shape) == 4 else [0,2,1])
+        poolblock_out = self.pool_block[2](poolblock_out)
+        # (b, 256, 31, 2) => (b, 31, 256, 2) => (b, 31, 512)
+        classifier_out = poolblock_out.transpose([0, 2, 1, 3]).reshape((-1, self.seq_len, 512))
+        self.bilstm_classifier.flatten_parameters()
+        classifier_out, _ = self.bilstm_classifier(classifier_out)  # ignore the hidden states
+        classifier_out = classifier_out.reshape((-1, 512))  # (b * 31, 512)
+        classifier_out = self.classifier(classifier_out)
+        classifier_out = classifier_out.reshape((-1, self.seq_len, self.num_class))  # (b, 31, num_class)
+        # sizes: (b, 31, 722), (b, 31, 2)
+        # classifier output consists of predicted pitch classes per frame
+        # detector output consists of: (isvoice, notvoice) estimates per frame
+        return paddle.abs(classifier_out.squeeze()), GAN_feature, poolblock_out
+    @staticmethod
+    def init_weights(m):
+        if isinstance(m, nn.Linear):
+            nn.initializer.KaimingUniform()(m.weight)
+            if m.bias is not None:
+                nn.initializer.Constant(0)(m.bias)
+        elif isinstance(m, nn.Conv2D):
+            nn.initializer.XavierNormal()(m.weight)
+        elif isinstance(m, nn.LSTM) or isinstance(m, nn.LSTMCell):
+            for p in m.parameters():
+                if len(p.shape) >= 2 and float('.'.join(paddle.__version__.split('.')[:2])) >= 2.3:
+                    nn.initializer.Orthogonal()(p)
+                else:
+                    nn.initializer.Normal()(p)
+class ResBlock(nn.Layer):
+    def __init__(self, in_channels: int, out_channels: int, leaky_relu_slope=0.01):
+        super().__init__()
+        self.downsample = in_channels != out_channels
+        # BN / LReLU / MaxPool layer before the conv layer - see Figure 1b in the paper
+        self.pre_conv = nn.Sequential(
+            nn.BatchNorm2D(num_features=in_channels),
+            nn.LeakyReLU(leaky_relu_slope),
+            nn.MaxPool2D(kernel_size=(1, 2)),  # apply downsampling on the y axis only
+        )
+        # conv layers
+        self.conv = nn.Sequential(
+            nn.Conv2D(in_channels=in_channels, out_channels=out_channels,
+                      kernel_size=3, padding=1, bias_attr=False),
+            nn.BatchNorm2D(out_channels),
+            nn.LeakyReLU(leaky_relu_slope),
+            nn.Conv2D(out_channels, out_channels, 3, padding=1, bias_attr=False),
+        )
+        # 1 x 1 convolution layer to match the feature dimensions
+        self.conv1by1 = None
+        if self.downsample:
+            self.conv1by1 = nn.Conv2D(in_channels, out_channels, 1, bias_attr=False)
+    def forward(self, x):
+        x = self.pre_conv(x)
+        if self.downsample:
+            x = self.conv(x) + self.conv1by1(x)
+        else:
+            x = self.conv(x) + x
+        return x

starganv2vc_paddle/Utils/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

starganv2vc_paddle/fbank_matrix.pd ADDED Viewed

Binary file (328 kB). View file

starganv2vc_paddle/losses.py ADDED Viewed

	@@ -0,0 +1,215 @@

+#coding:utf-8
+import os
+import paddle
+from paddle import nn
+from munch import Munch
+from starganv2vc_paddle.transforms import build_transforms
+import paddle.nn.functional as F
+import numpy as np
+def compute_d_loss(nets, args, x_real, y_org, y_trg, z_trg=None, x_ref=None, use_r1_reg=True, use_adv_cls=False, use_con_reg=False):
+    args = Munch(args)
+    assert (z_trg is None) != (x_ref is None)
+    # with real audios
+    x_real.stop_gradient = False
+    out = nets.discriminator(x_real, y_org)
+    loss_real = adv_loss(out, 1)
+    # R1 regularizaition (https://arxiv.org/abs/1801.04406v4)
+    if use_r1_reg:
+        loss_reg = r1_reg(out, x_real)
+    else:
+        loss_reg = paddle.to_tensor([0.], dtype=paddle.float32)
+    # consistency regularization (bCR-GAN: https://arxiv.org/abs/2002.04724)
+    loss_con_reg = paddle.to_tensor([0.], dtype=paddle.float32)
+    if use_con_reg:
+        t = build_transforms()
+        out_aug = nets.discriminator(t(x_real).detach(), y_org)
+        loss_con_reg += F.smooth_l1_loss(out, out_aug)
+    # with fake audios
+    with paddle.no_grad():
+        if z_trg is not None:
+            s_trg = nets.mapping_network(z_trg, y_trg)
+        else:  # x_ref is not None
+            s_trg = nets.style_encoder(x_ref, y_trg)
+        F0 = nets.f0_model.get_feature_GAN(x_real)
+        x_fake = nets.generator(x_real, s_trg, masks=None, F0=F0)
+    out = nets.discriminator(x_fake, y_trg)
+    loss_fake = adv_loss(out, 0)
+    if use_con_reg:
+        out_aug = nets.discriminator(t(x_fake).detach(), y_trg)
+        loss_con_reg += F.smooth_l1_loss(out, out_aug)
+    # adversarial classifier loss
+    if use_adv_cls:
+        out_de = nets.discriminator.classifier(x_fake)
+        loss_real_adv_cls = F.cross_entropy(out_de[y_org != y_trg], y_org[y_org != y_trg])
+        if use_con_reg:
+            out_de_aug = nets.discriminator.classifier(t(x_fake).detach())
+            loss_con_reg += F.smooth_l1_loss(out_de, out_de_aug)
+    else:
+        loss_real_adv_cls = paddle.zeros([1]).mean()
+    loss = loss_real + loss_fake + args.lambda_reg * loss_reg + \
+            args.lambda_adv_cls * loss_real_adv_cls + \
+            args.lambda_con_reg * loss_con_reg
+    return loss, Munch(real=loss_real.item(),
+                       fake=loss_fake.item(),
+                       reg=loss_reg.item(),
+                       real_adv_cls=loss_real_adv_cls.item(),
+                       con_reg=loss_con_reg.item())
+def compute_g_loss(nets, args, x_real, y_org, y_trg, z_trgs=None, x_refs=None, use_adv_cls=False):
+    args = Munch(args)
+    assert (z_trgs is None) != (x_refs is None)
+    if z_trgs is not None:
+        z_trg, z_trg2 = z_trgs
+    if x_refs is not None:
+        x_ref, x_ref2 = x_refs
+    # compute style vectors
+    if z_trgs is not None:
+        s_trg = nets.mapping_network(z_trg, y_trg)
+    else:
+        s_trg = nets.style_encoder(x_ref, y_trg)
+    # compute ASR/F0 features (real)
+    with paddle.no_grad():
+        F0_real, GAN_F0_real, cyc_F0_real = nets.f0_model(x_real)
+        ASR_real = nets.asr_model.get_feature(x_real)
+    # adversarial loss
+    x_fake = nets.generator(x_real, s_trg, masks=None, F0=GAN_F0_real)
+    out = nets.discriminator(x_fake, y_trg)
+    loss_adv = adv_loss(out, 1)
+    # compute ASR/F0 features (fake)
+    F0_fake, GAN_F0_fake, _ = nets.f0_model(x_fake)
+    ASR_fake = nets.asr_model.get_feature(x_fake)
+    # norm consistency loss
+    x_fake_norm = log_norm(x_fake)
+    x_real_norm = log_norm(x_real)
+    loss_norm = ((paddle.nn.ReLU()(paddle.abs(x_fake_norm - x_real_norm) - args.norm_bias))**2).mean()
+    # F0 loss
+    loss_f0 = f0_loss(F0_fake, F0_real)
+    # style F0 loss (style initialization)
+    if x_refs is not None and args.lambda_f0_sty > 0 and not use_adv_cls:
+        F0_sty, _, _ = nets.f0_model(x_ref)
+        loss_f0_sty = F.l1_loss(compute_mean_f0(F0_fake), compute_mean_f0(F0_sty))
+    else:
+        loss_f0_sty = paddle.zeros([1]).mean()
+    # ASR loss
+    loss_asr = F.smooth_l1_loss(ASR_fake, ASR_real)
+    # style reconstruction loss
+    s_pred = nets.style_encoder(x_fake, y_trg)
+    loss_sty = paddle.mean(paddle.abs(s_pred - s_trg))
+    # diversity sensitive loss
+    if z_trgs is not None:
+        s_trg2 = nets.mapping_network(z_trg2, y_trg)
+    else:
+        s_trg2 = nets.style_encoder(x_ref2, y_trg)
+    x_fake2 = nets.generator(x_real, s_trg2, masks=None, F0=GAN_F0_real)
+    x_fake2 = x_fake2.detach()
+    _, GAN_F0_fake2, _ = nets.f0_model(x_fake2)
+    loss_ds = paddle.mean(paddle.abs(x_fake - x_fake2))
+    loss_ds += F.smooth_l1_loss(GAN_F0_fake, GAN_F0_fake2.detach())
+    # cycle-consistency loss
+    s_org = nets.style_encoder(x_real, y_org)
+    x_rec = nets.generator(x_fake, s_org, masks=None, F0=GAN_F0_fake)
+    loss_cyc = paddle.mean(paddle.abs(x_rec - x_real))
+    # F0 loss in cycle-consistency loss
+    if args.lambda_f0 > 0:
+        _, _, cyc_F0_rec = nets.f0_model(x_rec)
+        loss_cyc += F.smooth_l1_loss(cyc_F0_rec, cyc_F0_real)
+    if args.lambda_asr > 0:
+        ASR_recon = nets.asr_model.get_feature(x_rec)
+        loss_cyc += F.smooth_l1_loss(ASR_recon, ASR_real)
+    # adversarial classifier loss
+    if use_adv_cls:
+        out_de = nets.discriminator.classifier(x_fake)
+        loss_adv_cls = F.cross_entropy(out_de[y_org != y_trg], y_trg[y_org != y_trg])
+    else:
+        loss_adv_cls = paddle.zeros([1]).mean()
+    loss = args.lambda_adv * loss_adv + args.lambda_sty * loss_sty \
+           - args.lambda_ds * loss_ds + args.lambda_cyc * loss_cyc\
+           + args.lambda_norm * loss_norm \
+           + args.lambda_asr * loss_asr \
+           + args.lambda_f0 * loss_f0 \
+           + args.lambda_f0_sty * loss_f0_sty \
+           + args.lambda_adv_cls * loss_adv_cls
+    return loss, Munch(adv=loss_adv.item(),
+                       sty=loss_sty.item(),
+                       ds=loss_ds.item(),
+                       cyc=loss_cyc.item(),
+                       norm=loss_norm.item(),
+                       asr=loss_asr.item(),
+                       f0=loss_f0.item(),
+                       adv_cls=loss_adv_cls.item())
+# for norm consistency loss
+def log_norm(x, mean=-4, std=4, axis=2):
+    """
+    normalized log mel -> mel -> norm -> log(norm)
+    """
+    x = paddle.log(paddle.exp(x * std + mean).norm(axis=axis))
+    return x
+# for adversarial loss
+def adv_loss(logits, target):
+    assert target in [1, 0]
+    if len(logits.shape) > 1:
+        logits = logits.reshape([-1])
+    targets = paddle.full_like(logits, fill_value=target)
+    logits = logits.clip(min=-10, max=10) # prevent nan
+    loss = F.binary_cross_entropy_with_logits(logits, targets)
+    return loss
+# for R1 regularization loss
+def r1_reg(d_out, x_in):
+    # zero-centered gradient penalty for real images
+    batch_size = x_in.shape[0]
+    grad_dout = paddle.grad(
+        outputs=d_out.sum(), inputs=x_in,
+        create_graph=True, retain_graph=True, only_inputs=True
+    )[0]
+    grad_dout2 = grad_dout.pow(2)
+    assert(grad_dout2.shape == x_in.shape)
+    reg = 0.5 * grad_dout2.reshape((batch_size, -1)).sum(1).mean(0)
+    return reg
+# for F0 consistency loss
+def compute_mean_f0(f0):
+    f0_mean = f0.mean(-1)
+    f0_mean = f0_mean.expand((f0.shape[-1], f0_mean.shape[0])).transpose((1, 0)) # (B, M)
+    return f0_mean
+def f0_loss(x_f0, y_f0):
+    """
+    x.shape = (B, 1, M, L): predict
+    y.shape = (B, 1, M, L): target
+    """
+    # compute the mean
+    x_mean = compute_mean_f0(x_f0)
+    y_mean = compute_mean_f0(y_f0)
+    loss = F.l1_loss(x_f0 / x_mean, y_f0 / y_mean)
+    return loss

starganv2vc_paddle/meldataset.py ADDED Viewed

	@@ -0,0 +1,155 @@

+#coding: utf-8
+import os
+import time
+import random
+import random
+import paddle
+import paddleaudio
+import numpy as np
+import soundfile as sf
+import paddle.nn.functional as F
+from paddle import nn
+from paddle.io import DataLoader
+import logging
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+np.random.seed(1)
+random.seed(1)
+SPECT_PARAMS = {
+    "n_fft": 2048,
+    "win_length": 1200,
+    "hop_length": 300
+}
+MEL_PARAMS = {
+    "n_mels": 80,
+    "n_fft": 2048,
+    "win_length": 1200,
+    "hop_length": 300
+}
+class MelDataset(paddle.io.Dataset):
+    def __init__(self,
+                 data_list,
+                 sr=24000,
+                 validation=False,
+                 ):
+        _data_list = [l[:-1].split('|') for l in data_list]
+        self.data_list = [(path, int(label)) for path, label in _data_list]
+        self.data_list_per_class = {
+            target: [(path, label) for path, label in self.data_list if label == target] \
+            for target in list(set([label for _, label in self.data_list]))}
+        self.sr = sr
+        self.to_melspec = paddleaudio.features.MelSpectrogram(**MEL_PARAMS)
+        self.to_melspec.fbank_matrix[:] = paddle.load(os.path.dirname(__file__) + '/fbank_matrix.pd')['fbank_matrix']
+        self.mean, self.std = -4, 4
+        self.validation = validation
+        self.max_mel_length = 192
+    def __len__(self):
+        return len(self.data_list)
+    def __getitem__(self, idx):
+        with paddle.fluid.dygraph.guard(paddle.CPUPlace()):
+            data = self.data_list[idx]
+            mel_tensor, label = self._load_data(data)
+            ref_data = random.choice(self.data_list)
+            ref_mel_tensor, ref_label = self._load_data(ref_data)
+            ref2_data = random.choice(self.data_list_per_class[ref_label])
+            ref2_mel_tensor, _ = self._load_data(ref2_data)
+            return mel_tensor, label, ref_mel_tensor, ref2_mel_tensor, ref_label
+    def _load_data(self, path):
+        wave_tensor, label = self._load_tensor(path)
+        if not self.validation: # random scale for robustness
+            random_scale = 0.5 + 0.5 * np.random.random()
+            wave_tensor = random_scale * wave_tensor
+        mel_tensor = self.to_melspec(wave_tensor)
+        mel_tensor = (paddle.log(1e-5 + mel_tensor) - self.mean) / self.std
+        mel_length = mel_tensor.shape[1]
+        if mel_length > self.max_mel_length:
+            random_start = np.random.randint(0, mel_length - self.max_mel_length)
+            mel_tensor = mel_tensor[:, random_start:random_start + self.max_mel_length]
+        return mel_tensor, label
+    def _preprocess(self, wave_tensor, ):
+        mel_tensor = self.to_melspec(wave_tensor)
+        mel_tensor = (paddle.log(1e-5 + mel_tensor) - self.mean) / self.std
+        return mel_tensor
+    def _load_tensor(self, data):
+        wave_path, label = data
+        label = int(label)
+        wave, sr = sf.read(wave_path)
+        wave_tensor = paddle.from_numpy(wave).astype(paddle.float32)
+        return wave_tensor, label
+class Collater(object):
+    """
+    Args:
+      adaptive_batch_size (bool): if true, decrease batch size when long data comes.
+    """
+    def __init__(self, return_wave=False):
+        self.text_pad_index = 0
+        self.return_wave = return_wave
+        self.max_mel_length = 192
+        self.mel_length_step = 16
+        self.latent_dim = 16
+    def __call__(self, batch):
+        batch_size = len(batch)
+        nmels = batch[0][0].shape[0]
+        mels = paddle.zeros((batch_size, nmels, self.max_mel_length)).astype(paddle.float32)
+        labels = paddle.zeros((batch_size)).astype(paddle.int64)
+        ref_mels = paddle.zeros((batch_size, nmels, self.max_mel_length)).astype(paddle.float32)
+        ref2_mels = paddle.zeros((batch_size, nmels, self.max_mel_length)).astype(paddle.float32)
+        ref_labels = paddle.zeros((batch_size)).astype(paddle.int64)
+        for bid, (mel, label, ref_mel, ref2_mel, ref_label) in enumerate(batch):
+            mel_size = mel.shape[1]
+            mels[bid, :, :mel_size] = mel
+            ref_mel_size = ref_mel.shape[1]
+            ref_mels[bid, :, :ref_mel_size] = ref_mel
+            ref2_mel_size = ref2_mel.shape[1]
+            ref2_mels[bid, :, :ref2_mel_size] = ref2_mel
+            labels[bid] = label
+            ref_labels[bid] = ref_label
+        z_trg = paddle.randn((batch_size, self.latent_dim))
+        z_trg2 = paddle.randn((batch_size, self.latent_dim))
+        mels, ref_mels, ref2_mels = mels.unsqueeze(1), ref_mels.unsqueeze(1), ref2_mels.unsqueeze(1)
+        return mels, labels, ref_mels, ref2_mels, ref_labels, z_trg, z_trg2
+def build_dataloader(path_list,
+                     validation=False,
+                     batch_size=4,
+                     num_workers=1,
+                     collate_config={},
+                     dataset_config={}):
+    dataset = MelDataset(path_list, validation=validation)
+    collate_fn = Collater(**collate_config)
+    data_loader = DataLoader(dataset,
+                             batch_size=batch_size,
+                             shuffle=(not validation),
+                             num_workers=num_workers,
+                             drop_last=(not validation),
+                             collate_fn=collate_fn)
+    return data_loader

starganv2vc_paddle/models.py ADDED Viewed

	@@ -0,0 +1,391 @@

+"""
+StarGAN v2
+Copyright (c) 2020-present NAVER Corp.
+This work is licensed under the Creative Commons Attribution-NonCommercial
+4.0 International License. To view a copy of this license, visit
+http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to
+Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
+"""
+import os
+import os.path as osp
+import copy
+import math
+from munch import Munch
+import numpy as np
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+class DownSample(nn.Layer):
+    def __init__(self, layer_type):
+        super().__init__()
+        self.layer_type = layer_type
+    def forward(self, x):
+        if self.layer_type == 'none':
+            return x
+        elif self.layer_type == 'timepreserve':
+            return F.avg_pool2d(x, (2, 1))
+        elif self.layer_type == 'half':
+            return F.avg_pool2d(x, 2)
+        else:
+            raise RuntimeError('Got unexpected donwsampletype %s, expected is [none, timepreserve, half]' % self.layer_type)
+class UpSample(nn.Layer):
+    def __init__(self, layer_type):
+        super().__init__()
+        self.layer_type = layer_type
+    def forward(self, x):
+        if self.layer_type == 'none':
+            return x
+        elif self.layer_type == 'timepreserve':
+            return F.interpolate(x, scale_factor=(2, 1), mode='nearest')
+        elif self.layer_type == 'half':
+            return F.interpolate(x, scale_factor=2, mode='nearest')
+        else:
+            raise RuntimeError('Got unexpected upsampletype %s, expected is [none, timepreserve, half]' % self.layer_type)
+class ResBlk(nn.Layer):
+    def __init__(self, dim_in, dim_out, actv=nn.LeakyReLU(0.2),
+                 normalize=False, downsample='none'):
+        super().__init__()
+        self.actv = actv
+        self.normalize = normalize
+        self.downsample = DownSample(downsample)
+        self.learned_sc = dim_in != dim_out
+        self._build_weights(dim_in, dim_out)
+    def _build_weights(self, dim_in, dim_out):
+        self.conv1 = nn.Conv2D(dim_in, dim_in, 3, 1, 1)
+        self.conv2 = nn.Conv2D(dim_in, dim_out, 3, 1, 1)
+        if self.normalize:
+            self.norm1 = nn.InstanceNorm2D(dim_in)
+            self.norm2 = nn.InstanceNorm2D(dim_in)
+        if self.learned_sc:
+            self.conv1x1 = nn.Conv2D(dim_in, dim_out, 1, 1, 0, bias_attr=False)
+    def _shortcut(self, x):
+        if self.learned_sc:
+            x = self.conv1x1(x)
+        if self.downsample:
+            x = self.downsample(x)
+        return x
+    def _residual(self, x):
+        if self.normalize:
+            x = self.norm1(x)
+        x = self.actv(x)
+        x = self.conv1(x)
+        x = self.downsample(x)
+        if self.normalize:
+            x = self.norm2(x)
+        x = self.actv(x)
+        x = self.conv2(x)
+        return x
+    def forward(self, x):
+        x = self._shortcut(x) + self._residual(x)
+        return x / math.sqrt(2)  # unit variance
+class AdaIN(nn.Layer):
+    def __init__(self, style_dim, num_features):
+        super().__init__()
+        self.norm = nn.InstanceNorm2D(num_features, weight_attr=False, bias_attr=False)
+        self.fc = nn.Linear(style_dim, num_features*2)
+    def forward(self, x, s):
+        if len(s.shape) == 1:
+            s = s[None]
+        h = self.fc(s)
+        h = h.reshape((h.shape[0], h.shape[1], 1, 1))
+        gamma, beta = paddle.split(h, 2, axis=1)
+        return (1 + gamma) * self.norm(x) + beta
+class AdainResBlk(nn.Layer):
+    def __init__(self, dim_in, dim_out, style_dim=64, w_hpf=0,
+                 actv=nn.LeakyReLU(0.2), upsample='none'):
+        super().__init__()
+        self.w_hpf = w_hpf
+        self.actv = actv
+        self.upsample = UpSample(upsample)
+        self.learned_sc = dim_in != dim_out
+        self._build_weights(dim_in, dim_out, style_dim)
+    def _build_weights(self, dim_in, dim_out, style_dim=64):
+        self.conv1 = nn.Conv2D(dim_in, dim_out, 3, 1, 1)
+        self.conv2 = nn.Conv2D(dim_out, dim_out, 3, 1, 1)
+        self.norm1 = AdaIN(style_dim, dim_in)
+        self.norm2 = AdaIN(style_dim, dim_out)
+        if self.learned_sc:
+            self.conv1x1 = nn.Conv2D(dim_in, dim_out, 1, 1, 0, bias_attr=False)
+    def _shortcut(self, x):
+        x = self.upsample(x)
+        if self.learned_sc:
+            x = self.conv1x1(x)
+        return x
+    def _residual(self, x, s):
+        x = self.norm1(x, s)
+        x = self.actv(x)
+        x = self.upsample(x)
+        x = self.conv1(x)
+        x = self.norm2(x, s)
+        x = self.actv(x)
+        x = self.conv2(x)
+        return x
+    def forward(self, x, s):
+        out = self._residual(x, s)
+        if self.w_hpf == 0:
+            out = (out + self._shortcut(x)) / math.sqrt(2)
+        return out
+class HighPass(nn.Layer):
+    def __init__(self, w_hpf):
+        super(HighPass, self).__init__()
+        self.filter = paddle.to_tensor([[-1, -1, -1],
+                                    [-1, 8., -1],
+                                    [-1, -1, -1]]) / w_hpf
+    def forward(self, x):
+        filter = self.filter.unsqueeze(0).unsqueeze(1).tile([x.shape[1], 1, 1, 1])
+        return F.conv2d(x, filter, padding=1, groups=x.shape[1])
+class Generator(nn.Layer):
+    def __init__(self, dim_in=48, style_dim=48, max_conv_dim=48*8, w_hpf=1, F0_channel=0):
+        super().__init__()
+        self.stem = nn.Conv2D(1, dim_in, 3, 1, 1)
+        self.encode = nn.LayerList()
+        self.decode = nn.LayerList()
+        self.to_out = nn.Sequential(
+            nn.InstanceNorm2D(dim_in),
+            nn.LeakyReLU(0.2),
+            nn.Conv2D(dim_in, 1, 1, 1, 0))
+        self.F0_channel = F0_channel
+        # down/up-sampling blocks
+        repeat_num = 4 #int(np.log2(img_size)) - 4
+        if w_hpf > 0:
+            repeat_num += 1
+        for lid in range(repeat_num):
+            if lid in [1, 3]:
+                _downtype = 'timepreserve'
+            else:
+                _downtype = 'half'
+            dim_out = min(dim_in*2, max_conv_dim)
+            self.encode.append(
+                ResBlk(dim_in, dim_out, normalize=True, downsample=_downtype))
+            (self.decode.insert if lid else lambda i, sublayer: self.decode.append(sublayer))(
+                0, AdainResBlk(dim_out, dim_in, style_dim,
+                               w_hpf=w_hpf, upsample=_downtype))  # stack-like
+            dim_in = dim_out
+        # bottleneck blocks (encoder)
+        for _ in range(2):
+            self.encode.append(
+                ResBlk(dim_out, dim_out, normalize=True))
+        # F0 blocks
+        if F0_channel != 0:
+            self.decode.insert(
+                0, AdainResBlk(dim_out + int(F0_channel / 2), dim_out, style_dim, w_hpf=w_hpf))
+        # bottleneck blocks (decoder)
+        for _ in range(2):
+            self.decode.insert(
+                    0, AdainResBlk(dim_out + int(F0_channel / 2), dim_out + int(F0_channel / 2), style_dim, w_hpf=w_hpf))
+        if F0_channel != 0:
+            self.F0_conv = nn.Sequential(
+                ResBlk(F0_channel, int(F0_channel / 2), normalize=True, downsample="half"),
+            )
+        if w_hpf > 0:
+            self.hpf = HighPass(w_hpf)
+    def forward(self, x, s, masks=None, F0=None):
+        x = self.stem(x)
+        cache = {}
+        for block in self.encode:
+            if (masks is not None) and (x.shape[2] in [32, 64, 128]):
+                cache[x.shape[2]] = x
+            x = block(x)
+        if F0 is not None:
+            F0 = self.F0_conv(F0)
+            F0 = F.adaptive_avg_pool2d(F0, [x.shape[-2], x.shape[-1]])
+            x = paddle.concat([x, F0], axis=1)
+        for block in self.decode:
+            x = block(x, s)
+            if (masks is not None) and (x.shape[2] in [32, 64, 128]):
+                mask = masks[0] if x.shape[2] in [32] else masks[1]
+                mask = F.interpolate(mask, size=x.shape[2], mode='bilinear')
+                x = x + self.hpf(mask * cache[x.shape[2]])
+        return self.to_out(x)
+class MappingNetwork(nn.Layer):
+    def __init__(self, latent_dim=16, style_dim=48, num_domains=2, hidden_dim=384):
+        super().__init__()
+        layers = []
+        layers += [nn.Linear(latent_dim, hidden_dim)]
+        layers += [nn.ReLU()]
+        for _ in range(3):
+            layers += [nn.Linear(hidden_dim, hidden_dim)]
+            layers += [nn.ReLU()]
+        self.shared = nn.Sequential(*layers)
+        self.unshared = nn.LayerList()
+        for _ in range(num_domains):
+            self.unshared.extend([nn.Sequential(nn.Linear(hidden_dim, hidden_dim),
+                                            nn.ReLU(),
+                                            nn.Linear(hidden_dim, hidden_dim),
+                                            nn.ReLU(),
+                                            nn.Linear(hidden_dim, hidden_dim),
+                                            nn.ReLU(),
+                                            nn.Linear(hidden_dim, style_dim))])
+    def forward(self, z, y):
+        h = self.shared(z)
+        out = []
+        for layer in self.unshared:
+            out += [layer(h)]
+        out = paddle.stack(out, axis=1)  # (batch, num_domains, style_dim)
+        idx = paddle.arange(y.shape[0])
+        s = out[idx, y]  # (batch, style_dim)
+        return s
+class StyleEncoder(nn.Layer):
+    def __init__(self, dim_in=48, style_dim=48, num_domains=2, max_conv_dim=384):
+        super().__init__()
+        blocks = []
+        blocks += [nn.Conv2D(1, dim_in, 3, 1, 1)]
+        repeat_num = 4
+        for _ in range(repeat_num):
+            dim_out = min(dim_in*2, max_conv_dim)
+            blocks += [ResBlk(dim_in, dim_out, downsample='half')]
+            dim_in = dim_out
+        blocks += [nn.LeakyReLU(0.2)]
+        blocks += [nn.Conv2D(dim_out, dim_out, 5, 1, 0)]
+        blocks += [nn.AdaptiveAvgPool2D(1)]
+        blocks += [nn.LeakyReLU(0.2)]
+        self.shared = nn.Sequential(*blocks)
+        self.unshared = nn.LayerList()
+        for _ in range(num_domains):
+            self.unshared.append(nn.Linear(dim_out, style_dim))
+    def forward(self, x, y):
+        h = self.shared(x)
+        h = h.reshape((h.shape[0], -1))
+        out = []
+        for layer in self.unshared:
+            out += [layer(h)]
+        out = paddle.stack(out, axis=1)  # (batch, num_domains, style_dim)
+        idx = paddle.arange(y.shape[0])
+        s = out[idx, y]  # (batch, style_dim)
+        return s
+class Discriminator(nn.Layer):
+    def __init__(self, dim_in=48, num_domains=2, max_conv_dim=384, repeat_num=4):
+        super().__init__()
+        # real/fake discriminator
+        self.dis = Discriminator2D(dim_in=dim_in, num_domains=num_domains,
+                                  max_conv_dim=max_conv_dim, repeat_num=repeat_num)
+        # adversarial classifier
+        self.cls = Discriminator2D(dim_in=dim_in, num_domains=num_domains,
+                                  max_conv_dim=max_conv_dim, repeat_num=repeat_num)
+        self.num_domains = num_domains
+    def forward(self, x, y):
+        return self.dis(x, y)
+    def classifier(self, x):
+        return self.cls.get_feature(x)
+class LinearNorm(paddle.nn.Layer):
+    def __init__(self, in_dim, out_dim, bias=True, w_init_gain='linear'):
+        super(LinearNorm, self).__init__()
+        self.linear_layer = paddle.nn.Linear(in_dim, out_dim, bias_attr=bias)
+        if float('.'.join(paddle.__version__.split('.')[:2])) >= 2.3:
+            gain = paddle.nn.initializer.calculate_gain(w_init_gain)
+            paddle.nn.initializer.XavierUniform()(self.linear_layer.weight)
+            self.linear_layer.weight.set_value(gain*self.linear_layer.weight)
+    def forward(self, x):
+        return self.linear_layer(x)
+class Discriminator2D(nn.Layer):
+    def __init__(self, dim_in=48, num_domains=2, max_conv_dim=384, repeat_num=4):
+        super().__init__()
+        blocks = []
+        blocks += [nn.Conv2D(1, dim_in, 3, 1, 1)]
+        for lid in range(repeat_num):
+            dim_out = min(dim_in*2, max_conv_dim)
+            blocks += [ResBlk(dim_in, dim_out, downsample='half')]
+            dim_in = dim_out
+        blocks += [nn.LeakyReLU(0.2)]
+        blocks += [nn.Conv2D(dim_out, dim_out, 5, 1, 0)]
+        blocks += [nn.LeakyReLU(0.2)]
+        blocks += [nn.AdaptiveAvgPool2D(1)]
+        blocks += [nn.Conv2D(dim_out, num_domains, 1, 1, 0)]
+        self.main = nn.Sequential(*blocks)
+    def get_feature(self, x):
+        out = self.main(x)
+        out = out.reshape((out.shape[0], -1))  # (batch, num_domains)
+        return out
+    def forward(self, x, y):
+        out = self.get_feature(x)
+        idx = paddle.arange(y.shape[0])
+        out = out[idx, y]  # (batch)
+        return out
+def build_model(args, F0_model, ASR_model):
+    generator = Generator(args.dim_in, args.style_dim, args.max_conv_dim, w_hpf=args.w_hpf, F0_channel=args.F0_channel)
+    mapping_network = MappingNetwork(args.latent_dim, args.style_dim, args.num_domains, hidden_dim=args.max_conv_dim)
+    style_encoder = StyleEncoder(args.dim_in, args.style_dim, args.num_domains, args.max_conv_dim)
+    discriminator = Discriminator(args.dim_in, args.num_domains, args.max_conv_dim, args.n_repeat)
+    generator_ema = copy.deepcopy(generator)
+    mapping_network_ema = copy.deepcopy(mapping_network)
+    style_encoder_ema = copy.deepcopy(style_encoder)
+    nets = Munch(generator=generator,
+                 mapping_network=mapping_network,
+                 style_encoder=style_encoder,
+                 discriminator=discriminator,
+                 f0_model=F0_model,
+                 asr_model=ASR_model)
+    nets_ema = Munch(generator=generator_ema,
+                     mapping_network=mapping_network_ema,
+                     style_encoder=style_encoder_ema)
+    return nets, nets_ema

starganv2vc_paddle/optimizers.py ADDED Viewed

	@@ -0,0 +1,80 @@

+#coding:utf-8
+import os, sys
+import os.path as osp
+import numpy as np
+import paddle
+from paddle import nn
+from paddle.optimizer import Optimizer
+from functools import reduce
+from paddle.optimizer import AdamW
+class MultiOptimizer:
+    def __init__(self, optimizers={}, schedulers={}):
+        self.optimizers = optimizers
+        self.schedulers = schedulers
+        self.keys = list(optimizers.keys())
+    def get_lr(self):
+        return max([self.optimizers[key].get_lr()
+                    for key in self.keys])
+    def state_dict(self):
+        state_dicts = [(key, self.optimizers[key].state_dict())\
+                       for key in self.keys]
+        return state_dicts
+    def set_state_dict(self, state_dict):
+        for key, val in state_dict:
+            try:
+                self.optimizers[key].set_state_dict(val)
+            except:
+                print("Unloaded %s" % key)
+    def step(self, key=None, scaler=None):
+        keys = [key] if key is not None else self.keys
+        _ = [self._step(key, scaler) for key in keys]
+    def _step(self, key, scaler=None):
+        if scaler is not None:
+            scaler.step(self.optimizers[key])
+            scaler.update()
+        else:
+            self.optimizers[key].step()
+    def clear_grad(self, key=None):
+        if key is not None:
+            self.optimizers[key].clear_grad()
+        else:
+            _ = [self.optimizers[key].clear_grad() for key in self.keys]
+    def scheduler(self, *args, key=None):
+        if key is not None:
+            self.schedulers[key].step(*args)
+        else:
+            _ = [self.schedulers[key].step(*args) for key in self.keys]
+def define_scheduler(params):
+    print(params)
+    # scheduler = paddle.optim.lr_scheduler.OneCycleLR(
+    #     max_lr=params.get('max_lr', 2e-4),
+    #     epochs=params.get('epochs', 200),
+    #     steps_per_epoch=params.get('steps_per_epoch', 1000),
+    #     pct_start=params.get('pct_start', 0.0),
+    #     div_factor=1,
+    #     final_div_factor=1)
+    scheduler = paddle.optimizer.lr.CosineAnnealingDecay(
+        learning_rate=params.get('max_lr', 2e-4),
+        T_max=10)
+    return scheduler
+def build_optimizer(parameters_dict, scheduler_params_dict):
+    schedulers = dict([(key, define_scheduler(params)) \
+                       for key, params in scheduler_params_dict.items()])
+    optim = dict([(key, AdamW(parameters=parameters_dict[key], learning_rate=sch, weight_decay=1e-4, beta1=0.1, beta2=0.99, epsilon=1e-9))
+                   for key, sch in schedulers.items()])
+    multi_optim = MultiOptimizer(optim, schedulers)
+    return multi_optim

starganv2vc_paddle/trainer.py ADDED Viewed

	@@ -0,0 +1,276 @@

+# -*- coding: utf-8 -*-
+import os
+import os.path as osp
+import sys
+import time
+from collections import defaultdict
+import numpy as np
+import paddle
+from paddle import nn
+from PIL import Image
+from tqdm import tqdm
+from starganv2vc_paddle.losses import compute_d_loss, compute_g_loss
+import logging
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+class Trainer(object):
+    def __init__(self,
+                 args,
+                 model=None,
+                 model_ema=None,
+                 optimizer=None,
+                 scheduler=None,
+                 config={},
+                 logger=logger,
+                 train_dataloader=None,
+                 val_dataloader=None,
+                 initial_steps=0,
+                 initial_epochs=0,
+                 fp16_run=False
+    ):
+        self.args = args
+        self.steps = initial_steps
+        self.epochs = initial_epochs
+        self.model = model
+        self.model_ema = model_ema
+        self.optimizer = optimizer
+        self.scheduler = scheduler
+        self.train_dataloader = train_dataloader
+        self.val_dataloader = val_dataloader
+        self.config = config
+        self.finish_train = False
+        self.logger = logger
+        self.fp16_run = fp16_run
+    def _train_epoch(self):
+        """Train model one epoch."""
+        raise NotImplementedError
+    @paddle.no_grad()
+    def _eval_epoch(self):
+        """Evaluate model one epoch."""
+        pass
+    def save_checkpoint(self, checkpoint_path):
+        """Save checkpoint.
+        Args:
+            checkpoint_path (str): Checkpoint path to be saved.
+        """
+        state_dict = {
+            "optimizer": self.optimizer.state_dict(),
+            "steps": self.steps,
+            "epochs": self.epochs,
+            "model": {key: self.model[key].state_dict() for key in self.model}
+        }
+        if self.model_ema is not None:
+            state_dict['model_ema'] = {key: self.model_ema[key].state_dict() for key in self.model_ema}
+        if not os.path.exists(os.path.dirname(checkpoint_path)):
+            os.makedirs(os.path.dirname(checkpoint_path))
+        paddle.save(state_dict, checkpoint_path)
+    def load_checkpoint(self, checkpoint_path, load_only_params=False):
+        """Load checkpoint.
+        Args:
+            checkpoint_path (str): Checkpoint path to be loaded.
+            load_only_params (bool): Whether to load only model parameters.
+        """
+        state_dict = paddle.load(checkpoint_path)
+        if state_dict["model"] is not None:
+            for key in self.model:
+                self._load(state_dict["model"][key], self.model[key])
+        if self.model_ema is not None:
+            for key in self.model_ema:
+                self._load(state_dict["model_ema"][key], self.model_ema[key])
+        if not load_only_params:
+            self.steps = state_dict["steps"]
+            self.epochs = state_dict["epochs"]
+            self.optimizer.set_state_dict(state_dict["optimizer"])
+    def _load(self, states, model, force_load=True):
+        model_states = model.state_dict()
+        for key, val in states.items():
+            try:
+                if key not in model_states:
+                    continue
+                if isinstance(val, nn.Parameter):
+                    val = val.clone().detach()
+                if val.shape != model_states[key].shape:
+                    self.logger.info("%s does not have same shape" % key)
+                    print(val.shape, model_states[key].shape)
+                    if not force_load:
+                        continue
+                    min_shape = np.minimum(np.array(val.shape), np.array(model_states[key].shape))
+                    slices = [slice(0, min_index) for min_index in min_shape]
+                    model_states[key][slices][:] = val[slices]
+                else:
+                    model_states[key][:] = val
+            except:
+                self.logger.info("not exist :%s" % key)
+                print("not exist ", key)
+    @staticmethod
+    def get_gradient_norm(model):
+        total_norm = 0
+        for p in model.parameters():
+            param_norm = p.grad.data.norm(2)
+            total_norm += param_norm.item() ** 2
+        total_norm = np.sqrt(total_norm)
+        return total_norm
+    @staticmethod
+    def length_to_mask(lengths):
+        mask = paddle.arange(lengths.max()).unsqueeze(0).expand([lengths.shape[0], -1]).astype(lengths.dtype)
+        mask = paddle.greater_than(mask+1, lengths.unsqueeze(1))
+        return mask
+    def _get_lr(self):
+        return self.optimizer.get_lr()
+    @staticmethod
+    def moving_average(model, model_test, beta=0.999):
+        for param, param_test in zip(model.parameters(), model_test.parameters()):
+            param_test.set_value(param + beta * (param_test - param))
+    def _train_epoch(self):
+        self.epochs += 1
+        train_losses = defaultdict(list)
+        _ = [self.model[k].train() for k in self.model]
+        scaler = paddle.amp.GradScaler() if self.fp16_run else None
+        use_con_reg = (self.epochs >= self.args.con_reg_epoch)
+        use_adv_cls = (self.epochs >= self.args.adv_cls_epoch)
+        for train_steps_per_epoch, batch in enumerate(tqdm(self.train_dataloader, desc="[train]"), 1):
+            ### load data
+            x_real, y_org, x_ref, x_ref2, y_trg, z_trg, z_trg2 = batch
+            # train the discriminator (by random reference)
+            self.optimizer.clear_grad()
+            if scaler is not None:
+                with paddle.amp.autocast():
+                    d_loss, d_losses_latent = compute_d_loss(self.model, self.args.d_loss, x_real, y_org, y_trg, z_trg=z_trg, use_adv_cls=use_adv_cls, use_con_reg=use_con_reg)
+                scaler.scale(d_loss).backward()
+            else:
+                d_loss, d_losses_latent = compute_d_loss(self.model, self.args.d_loss, x_real, y_org, y_trg, z_trg=z_trg, use_adv_cls=use_adv_cls, use_con_reg=use_con_reg)
+                d_loss.backward()
+            self.optimizer.step('discriminator', scaler=scaler)
+            # train the discriminator (by target reference)
+            self.optimizer.clear_grad()
+            if scaler is not None:
+                with paddle.amp.autocast():
+                    d_loss, d_losses_ref = compute_d_loss(self.model, self.args.d_loss, x_real, y_org, y_trg, x_ref=x_ref, use_adv_cls=use_adv_cls, use_con_reg=use_con_reg)
+                scaler.scale(d_loss).backward()
+            else:
+                d_loss, d_losses_ref = compute_d_loss(self.model, self.args.d_loss, x_real, y_org, y_trg, x_ref=x_ref, use_adv_cls=use_adv_cls, use_con_reg=use_con_reg)
+                d_loss.backward()
+            self.optimizer.step('discriminator', scaler=scaler)
+            # train the generator (by random reference)
+            self.optimizer.clear_grad()
+            if scaler is not None:
+                with paddle.amp.autocast():
+                    g_loss, g_losses_latent = compute_g_loss(
+                        self.model, self.args.g_loss, x_real, y_org, y_trg, z_trgs=[z_trg, z_trg2], use_adv_cls=use_adv_cls)
+                scaler.scale(g_loss).backward()
+            else:
+                g_loss, g_losses_latent = compute_g_loss(
+                    self.model, self.args.g_loss, x_real, y_org, y_trg, z_trgs=[z_trg, z_trg2], use_adv_cls=use_adv_cls)
+                g_loss.backward()
+            self.optimizer.step('generator', scaler=scaler)
+            self.optimizer.step('mapping_network', scaler=scaler)
+            self.optimizer.step('style_encoder', scaler=scaler)
+            # train the generator (by target reference)
+            self.optimizer.clear_grad()
+            if scaler is not None:
+                with paddle.amp.autocast():
+                    g_loss, g_losses_ref = compute_g_loss(
+                        self.model, self.args.g_loss, x_real, y_org, y_trg, x_refs=[x_ref, x_ref2], use_adv_cls=use_adv_cls)
+                scaler.scale(g_loss).backward()
+            else:
+                g_loss, g_losses_ref = compute_g_loss(
+                    self.model, self.args.g_loss, x_real, y_org, y_trg, x_refs=[x_ref, x_ref2], use_adv_cls=use_adv_cls)
+                g_loss.backward()
+            self.optimizer.step('generator', scaler=scaler)
+            # compute moving average of network parameters
+            self.moving_average(self.model.generator, self.model_ema.generator, beta=0.999)
+            self.moving_average(self.model.mapping_network, self.model_ema.mapping_network, beta=0.999)
+            self.moving_average(self.model.style_encoder, self.model_ema.style_encoder, beta=0.999)
+            self.optimizer.scheduler()
+            for key in d_losses_latent:
+                train_losses["train/%s" % key].append(d_losses_latent[key])
+            for key in g_losses_latent:
+                train_losses["train/%s" % key].append(g_losses_latent[key])
+        train_losses = {key: np.mean(value) for key, value in train_losses.items()}
+        return train_losses
+    @paddle.no_grad()
+    def _eval_epoch(self):
+        use_adv_cls = (self.epochs >= self.args.adv_cls_epoch)
+        eval_losses = defaultdict(list)
+        eval_images = defaultdict(list)
+        _ = [self.model[k].eval() for k in self.model]
+        for eval_steps_per_epoch, batch in enumerate(tqdm(self.val_dataloader, desc="[eval]"), 1):
+            ### load data
+            x_real, y_org, x_ref, x_ref2, y_trg, z_trg, z_trg2 = batch
+            # train the discriminator
+            d_loss, d_losses_latent = compute_d_loss(
+                self.model, self.args.d_loss, x_real, y_org, y_trg, z_trg=z_trg, use_r1_reg=False, use_adv_cls=use_adv_cls)
+            d_loss, d_losses_ref = compute_d_loss(
+                self.model, self.args.d_loss, x_real, y_org, y_trg, x_ref=x_ref, use_r1_reg=False, use_adv_cls=use_adv_cls)
+            # train the generator
+            g_loss, g_losses_latent = compute_g_loss(
+                self.model, self.args.g_loss, x_real, y_org, y_trg, z_trgs=[z_trg, z_trg2], use_adv_cls=use_adv_cls)
+            g_loss, g_losses_ref = compute_g_loss(
+                self.model, self.args.g_loss, x_real, y_org, y_trg, x_refs=[x_ref, x_ref2], use_adv_cls=use_adv_cls)
+            for key in d_losses_latent:
+                eval_losses["eval/%s" % key].append(d_losses_latent[key])
+            for key in g_losses_latent:
+                eval_losses["eval/%s" % key].append(g_losses_latent[key])
+#             if eval_steps_per_epoch % 10 == 0:
+#                 # generate x_fake
+#                 s_trg = self.model_ema.style_encoder(x_ref, y_trg)
+#                 F0 = self.model.f0_model.get_feature_GAN(x_real)
+#                 x_fake = self.model_ema.generator(x_real, s_trg, masks=None, F0=F0)
+#                 # generate x_recon
+#                 s_real = self.model_ema.style_encoder(x_real, y_org)
+#                 F0_fake = self.model.f0_model.get_feature_GAN(x_fake)
+#                 x_recon = self.model_ema.generator(x_fake, s_real, masks=None, F0=F0_fake)
+#                 eval_images['eval/image'].append(
+#                     ([x_real[0, 0].numpy(),
+#                     x_fake[0, 0].numpy(),
+#                     x_recon[0, 0].numpy()]))
+        eval_losses = {key: np.mean(value) for key, value in eval_losses.items()}
+        eval_losses.update(eval_images)
+        return eval_losses

starganv2vc_paddle/transforms.py ADDED Viewed

	@@ -0,0 +1,120 @@

+# -*- coding: utf-8 -*-
+import numpy as np
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+import paddleaudio
+import paddleaudio.functional as audio_F
+import random
+## 1. RandomTimeStrech
+class TimeStrech(nn.Layer):
+    def __init__(self, scale):
+        super(TimeStrech, self).__init__()
+        self.scale = scale
+    def forward(self, x):
+        mel_size = x.shape[-1]
+        x = F.interpolate(x, scale_factor=(1, self.scale), align_corners=False,
+                          mode='bilinear').squeeze()
+        if x.shape[-1] < mel_size:
+            noise_length = (mel_size - x.shape[-1])
+            random_pos = random.randint(0, x.shape[-1]) - noise_length
+            if random_pos < 0:
+                random_pos = 0
+            noise = x[..., random_pos:random_pos + noise_length]
+            x = paddle.concat([x, noise], axis=-1)
+        else:
+            x = x[..., :mel_size]
+        return x.unsqueeze(1)
+## 2. PitchShift
+class PitchShift(nn.Layer):
+    def __init__(self, shift):
+        super(PitchShift, self).__init__()
+        self.shift = shift
+    def forward(self, x):
+        if len(x.shape) == 2:
+            x = x.unsqueeze(0)
+        x = x.squeeze()
+        mel_size = x.shape[1]
+        shift_scale = (mel_size + self.shift) / mel_size
+        x = F.interpolate(x.unsqueeze(1), scale_factor=(shift_scale, 1.), align_corners=False,
+                          mode='bilinear').squeeze(1)
+        x = x[:, :mel_size]
+        if x.shape[1] < mel_size:
+            pad_size = mel_size - x.shape[1]
+            x = paddle.cat([x, paddle.zeros(x.shape[0], pad_size, x.shape[2])], axis=1)
+        x = x.squeeze()
+        return x.unsqueeze(1)
+## 3. ShiftBias
+class ShiftBias(nn.Layer):
+    def __init__(self, bias):
+        super(ShiftBias, self).__init__()
+        self.bias = bias
+    def forward(self, x):
+        return x + self.bias
+## 4. Scaling
+class SpectScaling(nn.Layer):
+    def __init__(self, scale):
+        super(SpectScaling, self).__init__()
+        self.scale = scale
+    def forward(self, x):
+        return x * self.scale
+## 5. Time Flip
+class TimeFlip(nn.Layer):
+    def __init__(self, length):
+        super(TimeFlip, self).__init__()
+        self.length = round(length)
+    def forward(self, x):
+        if self.length > 1:
+          start = np.random.randint(0, x.shape[-1] - self.length)
+          x_ret = x.clone()
+          x_ret[..., start:start + self.length] = paddle.flip(x[..., start:start + self.length], axis=[-1])
+          x = x_ret
+        return x
+class PhaseShuffle2D(nn.Layer):
+    def __init__(self, n=2):
+        super(PhaseShuffle2D, self).__init__()
+        self.n = n
+        self.random = random.Random(1)
+    def forward(self, x, move=None):
+        # x.size = (B, C, M, L)
+        if move is None:
+            move = self.random.randint(-self.n, self.n)
+        if move == 0:
+            return x
+        else:
+            left = x[:, :, :, :move]
+            right = x[:, :, :, move:]
+            shuffled = paddle.concat([right, left], axis=3)
+        return shuffled
+def build_transforms():
+    transforms = [
+        lambda M: TimeStrech(1+ (np.random.random()-0.5)*M*0.2),
+        lambda M: SpectScaling(1 + (np.random.random()-1)*M*0.1),
+        lambda M: PhaseShuffle2D(192),
+    ]
+    N, M = len(transforms), np.random.random()
+    composed = nn.Sequential(
+        *[trans(M) for trans in np.random.choice(transforms, N)]
+    )
+    return composed

test_arch.py ADDED Viewed

	@@ -0,0 +1,65 @@

+#!/usr/bin/env python3
+#coding:utf-8
+import os
+import yaml
+import paddle
+import click
+import warnings
+warnings.simplefilter('ignore')
+from munch import Munch
+from starganv2vc_paddle.models import build_model
+from starganv2vc_paddle.Utils.ASR.models import ASRCNN
+from starganv2vc_paddle.Utils.JDC.model import JDCNet
+@click.command()
+@click.option('-p', '--config_path', default='Configs/config.yml', type=str)
+def main(config_path):
+    config = yaml.safe_load(open(config_path))
+    # load ASR model
+    ASR_config = config.get('ASR_config', False)
+    with open(ASR_config) as f:
+            ASR_config = yaml.safe_load(f)
+    ASR_model_config = ASR_config['model_params']
+    ASR_model = ASRCNN(**ASR_model_config)
+    _ = ASR_model.eval()
+    # load F0 model
+    F0_model = JDCNet(num_class=1, seq_len=192)
+    _ = F0_model.eval()
+    # build model
+    _, model_ema = build_model(Munch(config['model_params']), F0_model, ASR_model)
+    asr_input = paddle.randn([4, 80, 192])
+    print('ASR model input:', asr_input.shape, 'output:', ASR_model(asr_input).shape)
+    mel_input = paddle.randn([4, 1, 192, 512])
+    print('F0 model input:', mel_input.shape, 'output:', [t.shape for t in F0_model(mel_input)])
+    _ = [v.eval() for v in model_ema.values()]
+    label = paddle.to_tensor([0,1,2,3], dtype=paddle.int64)
+    latent_dim = model_ema.mapping_network.shared[0].weight.shape[0]
+    latent_style = paddle.randn([4, latent_dim])
+    ref = model_ema.mapping_network(latent_style, label)
+    mel_input2 = paddle.randn([4, 1, 192, 512])
+    style_ref = model_ema.style_encoder(mel_input2, label)
+    print('StyleGANv2-VC encoder inputs:', mel_input2.shape, 'output:', style_ref.shape, 'should has the same shape as the ref:', ref.shape)
+    f0_feat = F0_model.get_feature_GAN(mel_input)
+    out = model_ema.generator(mel_input, style_ref, F0=f0_feat)
+    print('StyleGANv2-VC inputs:', label.shape, latent_style.shape, mel_input.shape, 'output:', out.shape)
+    paddle.save({k: v.state_dict() for k, v in model_ema.items()}, 'test_arch.pd')
+    file_size = os.path.getsize('test_arch.pd') / float(1024*1024)
+    print(f'Main models occupied {file_size:.2f} MB')
+    os.remove('test_arch.pd')
+    return 0
+if __name__=="__main__":
+    main()

train.py ADDED Viewed

	@@ -0,0 +1,149 @@

+#!/usr/bin/env python3
+#coding:utf-8
+import os
+import os.path as osp
+import re
+import sys
+import yaml
+import shutil
+import numpy as np
+import paddle
+import click
+import warnings
+warnings.simplefilter('ignore')
+from functools import reduce
+from munch import Munch
+from starganv2vc_paddle.meldataset import build_dataloader
+from starganv2vc_paddle.optimizers import build_optimizer
+from starganv2vc_paddle.models import build_model
+from starganv2vc_paddle.trainer import Trainer
+from visualdl import LogWriter
+from starganv2vc_paddle.Utils.ASR.models import ASRCNN
+from starganv2vc_paddle.Utils.JDC.model import JDCNet
+import logging
+from logging import StreamHandler
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+handler = StreamHandler()
+handler.setLevel(logging.DEBUG)
+logger.addHandler(handler)
+@click.command()
+@click.option('-p', '--config_path', default='Configs/config.yml', type=str)
+def main(config_path):
+    config = yaml.safe_load(open(config_path))
+    log_dir = config['log_dir']
+    if not osp.exists(log_dir): os.makedirs(log_dir, exist_ok=True)
+    shutil.copy(config_path, osp.join(log_dir, osp.basename(config_path)))
+    writer = LogWriter(log_dir + "/visualdl")
+    # write logs
+    file_handler = logging.FileHandler(osp.join(log_dir, 'train.log'))
+    file_handler.setLevel(logging.DEBUG)
+    file_handler.setFormatter(logging.Formatter('%(levelname)s:%(asctime)s: %(message)s'))
+    logger.addHandler(file_handler)
+    batch_size = config.get('batch_size', 10)
+    epochs = config.get('epochs', 1000)
+    save_freq = config.get('save_freq', 20)
+    train_path = config.get('train_data', None)
+    val_path = config.get('val_data', None)
+    stage = config.get('stage', 'star')
+    fp16_run = config.get('fp16_run', False)
+    # load data
+    train_list, val_list = get_data_path_list(train_path, val_path)
+    train_dataloader = build_dataloader(train_list,
+                                        batch_size=batch_size,
+                                        num_workers=4)
+    val_dataloader = build_dataloader(val_list,
+                                      batch_size=batch_size,
+                                      validation=True,
+                                      num_workers=2)
+    # load pretrained ASR model
+    ASR_config = config.get('ASR_config', False)
+    ASR_path = config.get('ASR_path', False)
+    with open(ASR_config) as f:
+            ASR_config = yaml.safe_load(f)
+    ASR_model_config = ASR_config['model_params']
+    ASR_model = ASRCNN(**ASR_model_config)
+    params = paddle.load(ASR_path)['model']
+    ASR_model.set_state_dict(params)
+    _ = ASR_model.eval()
+    # load pretrained F0 model
+    F0_path = config.get('F0_path', False)
+    F0_model = JDCNet(num_class=1, seq_len=192)
+    params = paddle.load(F0_path)['net']
+    F0_model.set_state_dict(params)
+    # build model
+    model, model_ema = build_model(Munch(config['model_params']), F0_model, ASR_model)
+    scheduler_params = {
+        "max_lr": float(config['optimizer_params'].get('lr', 2e-4)),
+        "pct_start": float(config['optimizer_params'].get('pct_start', 0.0)),
+        "epochs": epochs,
+        "steps_per_epoch": len(train_dataloader),
+    }
+    scheduler_params_dict = {key: scheduler_params.copy() for key in model}
+    scheduler_params_dict['mapping_network']['max_lr'] = 2e-6
+    optimizer = build_optimizer({key: model[key].parameters() for key in model},
+                                      scheduler_params_dict=scheduler_params_dict)
+    trainer = Trainer(args=Munch(config['loss_params']), model=model,
+                            model_ema=model_ema,
+                            optimizer=optimizer,
+                            train_dataloader=train_dataloader,
+                            val_dataloader=val_dataloader,
+                            logger=logger,
+                            fp16_run=fp16_run)
+    if config.get('pretrained_model', '') != '':
+        trainer.load_checkpoint(config['pretrained_model'],
+                                load_only_params=config.get('load_only_params', True))
+    for _ in range(1, epochs+1):
+        epoch = trainer.epochs
+        train_results = trainer._train_epoch()
+        eval_results = trainer._eval_epoch()
+        results = train_results.copy()
+        results.update(eval_results)
+        logger.info('--- epoch %d ---' % epoch)
+        for key, value in results.items():
+            if isinstance(value, float):
+                logger.info('%-15s: %.4f' % (key, value))
+                writer.add_scalar(key, value, epoch)
+            else:
+                for v in value:
+                    writer.add_histogram('eval_spec', v, epoch)
+        if (epoch % save_freq) == 0:
+            trainer.save_checkpoint(osp.join(log_dir, 'epoch_%05d.pd' % epoch))
+    return 0
+def get_data_path_list(train_path=None, val_path=None):
+    if train_path is None:
+        train_path = "Data/train_list.txt"
+    if val_path is None:
+        val_path = "Data/val_list.txt"
+    with open(train_path, 'r') as f:
+        train_list = f.readlines()
+    with open(val_path, 'r') as f:
+        val_list = f.readlines()
+    return train_list, val_list
+if __name__=="__main__":
+    main()