alibabasglab
commited on
Commit
•
3f76abf
1
Parent(s):
2c29e5c
Upload 37 files
Browse files- checkpoints/.DS_Store +0 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/config.yaml +54 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/last_best_checkpoint.pt +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/last_checkpoint.pt +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/log_2024-11-13(17:55:07).txt +651 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731491824.dlc1h2tsljxspymy-master-0.29.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731566864.dlc1uz4efbcdp34x-master-0.26.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731570913.dlc1uz4efbcdp34x-master-0.26.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731640001.dlc1uz4efbcdp34x-master-0.26.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731643472.dlc1uz4efbcdp34x-master-0.28.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731649164.dlc1uz4efbcdp34x-master-0.26.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731651543.dlc1uz4efbcdp34x-master-0.26.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731669934.dlc1uz4efbcdp34x-master-0.26.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731762939.dlc1uz4efbcdp34x-master-0.26.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731781044.dlc1uz4efbcdp34x-master-0.27.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731814134.dlc1uz4efbcdp34x-master-0.26.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731827391.dlc1uz4efbcdp34x-master-0.26.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731834146.dlc1uz4efbcdp34x-master-0.26.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731839773.dlc1uz4efbcdp34x-master-0.26.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731918387.dlc1uz4efbcdp34x-master-0.26.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731921663.dlc1uz4efbcdp34x-master-0.26.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731923844.dlc1uz4efbcdp34x-master-0.26.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731932635.dlc1uz4efbcdp34x-master-0.26.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731944330.dlc1uz4efbcdp34x-master-0.26.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1732008941.dlc1uz4efbcdp34x-master-0.26.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1732009565.dlc1uz4efbcdp34x-master-0.26.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1732070579.dlc1uz4efbcdp34x-master-0.25.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1732085511.dlc1evi5tz54lvk8-master-0.26.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1732091307.dlc1evi5tz54lvk8-master-0.26.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1732108121.dlc1evi5tz54lvk8-master-0.26.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1732592656.dlc1evi5tz54lvk8-master-0.26.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1732622764.dlc1evi5tz54lvk8-master-0.27.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1732631132.dlc1evi5tz54lvk8-master-0.24.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1732635246.dlc1evi5tz54lvk8-master-0.26.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1732656623.dlc1evi5tz54lvk8-master-0.26.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1732674150.dlcb6t1c7cg4v7av-master-0.26.0 +3 -0
- checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1732683557.dlcb6t1c7cg4v7av-master-0.26.0 +3 -0
checkpoints/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/config.yaml
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Config file
|
2 |
+
|
3 |
+
# Log
|
4 |
+
seed: 777
|
5 |
+
use_cuda: 1 # 1 for True, 0 for False
|
6 |
+
|
7 |
+
# dataset
|
8 |
+
speaker_no: 2
|
9 |
+
mix_lst_path: ./data/VoxCeleb2/mixture_data_list_2mix.csv
|
10 |
+
audio_direc: /mnt/nas_sg/wulanchabu/zexu.pan/datasets/VoxCeleb2/audio_clean/
|
11 |
+
reference_direc: /mnt/nas_sg/wulanchabu/zexu.pan/datasets/VoxCeleb2/orig/
|
12 |
+
audio_sr: 16000
|
13 |
+
ref_sr: 25
|
14 |
+
|
15 |
+
# dataloader
|
16 |
+
num_workers: 4
|
17 |
+
batch_size: 2 # 4-GPU training with a total effective batch size of 8
|
18 |
+
accu_grad: 0
|
19 |
+
effec_batch_size: 2 # per GPU, only used if accu_grad is set to 1, must be multiple times of batch size
|
20 |
+
max_length: 3 # truncate the utterances in dataloader, in seconds
|
21 |
+
|
22 |
+
# network settings
|
23 |
+
init_from: None # 'None' or a log name 'log_2024-07-22(18:12:13)'
|
24 |
+
causal: 0 # 1 for True, 0 for False
|
25 |
+
network_reference:
|
26 |
+
cue: lip # lip or speech or gesture or EEG
|
27 |
+
backbone: resnet18 # resnet18 or shufflenetV2 or blazenet64
|
28 |
+
emb_size: 256 # resnet18:256
|
29 |
+
network_audio:
|
30 |
+
backbone: av_mossformer2
|
31 |
+
encoder_kernel_size: 16
|
32 |
+
encoder_out_nchannels: 512
|
33 |
+
encoder_in_nchannels: 1
|
34 |
+
|
35 |
+
masknet_numspks: 1
|
36 |
+
masknet_chunksize: 250
|
37 |
+
masknet_numlayers: 1
|
38 |
+
masknet_norm: "ln"
|
39 |
+
masknet_useextralinearlayer: False
|
40 |
+
masknet_extraskipconnection: True
|
41 |
+
|
42 |
+
intra_numlayers: 24
|
43 |
+
intra_nhead: 8
|
44 |
+
intra_dffn: 1024
|
45 |
+
intra_dropout: 0
|
46 |
+
intra_use_positional: True
|
47 |
+
intra_norm_before: True
|
48 |
+
|
49 |
+
|
50 |
+
# optimizer
|
51 |
+
loss_type: sisdr # "snr", "sisdr", "hybrid"
|
52 |
+
init_learning_rate: 0.00015
|
53 |
+
max_epoch: 150
|
54 |
+
clip_grad_norm: 5
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/last_best_checkpoint.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:56c2db4be71c90fd9937dd491538de7e94ab16342c9727ebcbc40957d96e3b5c
|
3 |
+
size 734561014
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/last_checkpoint.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a1ee4517d931abde67565facbc30fe5b77ff80cb82baecf6fe171074673ad2bf
|
3 |
+
size 734537584
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/log_2024-11-13(17:55:07).txt
ADDED
@@ -0,0 +1,651 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Config file
|
2 |
+
|
3 |
+
# Log
|
4 |
+
seed: 777
|
5 |
+
use_cuda: 1 # 1 for True, 0 for False
|
6 |
+
|
7 |
+
# dataset
|
8 |
+
speaker_no: 2
|
9 |
+
mix_lst_path: ./data/VoxCeleb2/mixture_data_list_2mix.csv
|
10 |
+
audio_direc: /mnt/nas_sg/wulanchabu/zexu.pan/datasets/VoxCeleb2/audio_clean/
|
11 |
+
reference_direc: /mnt/nas_sg/wulanchabu/zexu.pan/datasets/VoxCeleb2/orig/
|
12 |
+
audio_sr: 16000
|
13 |
+
ref_sr: 25
|
14 |
+
|
15 |
+
# dataloader
|
16 |
+
num_workers: 4
|
17 |
+
batch_size: 2 # 2-GPU training with a total effective batch size of 8
|
18 |
+
accu_grad: 1
|
19 |
+
effec_batch_size: 4 # per GPU, only used if accu_grad is set to 1, must be multiple times of batch size
|
20 |
+
max_length: 3 # truncate the utterances in dataloader, in seconds
|
21 |
+
|
22 |
+
# network settings
|
23 |
+
init_from: None # 'None' or a log name 'log_2024-07-22(18:12:13)'
|
24 |
+
causal: 0 # 1 for True, 0 for False
|
25 |
+
network_reference:
|
26 |
+
cue: lip # lip or speech or gesture or EEG
|
27 |
+
backbone: resnet18 # resnet18 or shufflenetV2 or blazenet64
|
28 |
+
emb_size: 256 # resnet18:256
|
29 |
+
network_audio:
|
30 |
+
backbone: av_mossformer2
|
31 |
+
encoder_kernel_size: 16
|
32 |
+
encoder_out_nchannels: 512
|
33 |
+
encoder_in_nchannels: 1
|
34 |
+
|
35 |
+
masknet_numspks: 1
|
36 |
+
masknet_chunksize: 250
|
37 |
+
masknet_numlayers: 1
|
38 |
+
masknet_norm: "ln"
|
39 |
+
masknet_useextralinearlayer: False
|
40 |
+
masknet_extraskipconnection: True
|
41 |
+
|
42 |
+
intra_numlayers: 24
|
43 |
+
intra_nhead: 8
|
44 |
+
intra_dffn: 1024
|
45 |
+
intra_dropout: 0
|
46 |
+
intra_use_positional: True
|
47 |
+
intra_norm_before: True
|
48 |
+
|
49 |
+
|
50 |
+
# optimizer
|
51 |
+
loss_type: sisdr # "snr", "sisdr", "hybrid"
|
52 |
+
init_learning_rate: 0.00015
|
53 |
+
max_epoch: 150
|
54 |
+
clip_grad_norm: 5
|
55 |
+
W1113 17:55:46.456669 140327764465472 torch/distributed/run.py:779]
|
56 |
+
W1113 17:55:46.456669 140327764465472 torch/distributed/run.py:779] *****************************************
|
57 |
+
W1113 17:55:46.456669 140327764465472 torch/distributed/run.py:779] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
|
58 |
+
W1113 17:55:46.456669 140327764465472 torch/distributed/run.py:779] *****************************************
|
59 |
+
[W1113 17:56:11.149805096 Utils.hpp:164] Warning: Environment variable NCCL_BLOCKING_WAIT is deprecated; use TORCH_NCCL_BLOCKING_WAIT instead (function operator())
|
60 |
+
[W1113 17:56:11.149815679 Utils.hpp:164] Warning: Environment variable NCCL_BLOCKING_WAIT is deprecated; use TORCH_NCCL_BLOCKING_WAIT instead (function operator())
|
61 |
+
[W1113 17:56:11.150638714 Utils.hpp:135] Warning: Environment variable NCCL_ASYNC_ERROR_HANDLING is deprecated; use TORCH_NCCL_ASYNC_ERROR_HANDLING instead (function operator())
|
62 |
+
[W1113 17:56:11.150654063 Utils.hpp:135] Warning: Environment variable NCCL_ASYNC_ERROR_HANDLING is deprecated; use TORCH_NCCL_ASYNC_ERROR_HANDLING instead (function operator())
|
63 |
+
started on checkpoints/log_2024-11-13(17:55:06)
|
64 |
+
|
65 |
+
namespace(accu_grad=1, audio_direc='/mnt/nas_sg/wulanchabu/zexu.pan/datasets/VoxCeleb2/audio_clean/', audio_sr=16000, batch_size=2, causal=0, checkpoint_dir='checkpoints/log_2024-11-13(17:55:06)', clip_grad_norm=5.0, config=[<yamlargparse.Path object at 0x7f21a4b85c10>], device=device(type='cuda'), distributed=True, effec_batch_size=4, evaluate_only=0, init_from='None', init_learning_rate=0.00015, local_rank=0, loss_type='sisdr', lr_warmup=0, max_epoch=150, max_length=3, mix_lst_path='./data/VoxCeleb2/mixture_data_list_2mix.csv', network_audio=namespace(backbone='av_mossformer2', encoder_in_nchannels=1, encoder_kernel_size=16, encoder_out_nchannels=512, intra_dffn=1024, intra_dropout=0, intra_nhead=8, intra_norm_before=True, intra_numlayers=24, intra_use_positional=True, masknet_chunksize=250, masknet_extraskipconnection=True, masknet_norm='ln', masknet_numlayers=1, masknet_numspks=1, masknet_useextralinearlayer=False), network_reference=namespace(backbone='resnet18', cue='lip', emb_size=256), num_workers=4, ref_sr=25, reference_direc='/mnt/nas_sg/wulanchabu/zexu.pan/datasets/VoxCeleb2/orig/', seed=777, speaker_no=2, train_from_last_checkpoint=0, use_cuda=1, world_size=2)
|
66 |
+
network_wrapper(
|
67 |
+
(sep_network): av_Mossformer(
|
68 |
+
(encoder): Encoder(
|
69 |
+
(conv1d_U): Conv1d(1, 512, kernel_size=(16,), stride=(8,), bias=False)
|
70 |
+
)
|
71 |
+
(separator): Separator(
|
72 |
+
(layer_norm): GroupNorm(1, 512, eps=1e-08, affine=True)
|
73 |
+
(bottleneck_conv1x1): Conv1d(512, 512, kernel_size=(1,), stride=(1,), bias=False)
|
74 |
+
(masknet): Dual_Path_Model(
|
75 |
+
(pos_enc): ScaledSinuEmbedding()
|
76 |
+
(dual_mdl): ModuleList(
|
77 |
+
(0): Dual_Computation_Block(
|
78 |
+
(intra_mdl): SBFLASHBlock_DualA(
|
79 |
+
(mdl): TransformerEncoder_FLASH_DualA_FSMN(
|
80 |
+
(flashT): FLASHTransformer_DualA_FSMN(
|
81 |
+
(fsmn): ModuleList(
|
82 |
+
(0-23): 24 x Gated_FSMN_Block_Dilated(
|
83 |
+
(conv1): Sequential(
|
84 |
+
(0): Conv1d(512, 256, kernel_size=(1,), stride=(1,))
|
85 |
+
(1): PReLU(num_parameters=1)
|
86 |
+
)
|
87 |
+
(norm1): CLayerNorm((256,), eps=1e-05, elementwise_affine=True)
|
88 |
+
(gated_fsmn): Gated_FSMN_dilated(
|
89 |
+
(to_u): FFConvM(
|
90 |
+
(mdl): Sequential(
|
91 |
+
(0): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
|
92 |
+
(1): Linear(in_features=256, out_features=256, bias=True)
|
93 |
+
(2): SiLU()
|
94 |
+
(3): ConvModule(
|
95 |
+
(sequential): Sequential(
|
96 |
+
(0): Transpose()
|
97 |
+
(1): DepthwiseConv1d(
|
98 |
+
(conv): Conv1d(256, 256, kernel_size=(17,), stride=(1,), padding=(8,), groups=256, bias=False)
|
99 |
+
)
|
100 |
+
)
|
101 |
+
)
|
102 |
+
(4): Dropout(p=0.1, inplace=False)
|
103 |
+
)
|
104 |
+
)
|
105 |
+
(to_v): FFConvM(
|
106 |
+
(mdl): Sequential(
|
107 |
+
(0): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
|
108 |
+
(1): Linear(in_features=256, out_features=256, bias=True)
|
109 |
+
(2): SiLU()
|
110 |
+
(3): ConvModule(
|
111 |
+
(sequential): Sequential(
|
112 |
+
(0): Transpose()
|
113 |
+
(1): DepthwiseConv1d(
|
114 |
+
(conv): Conv1d(256, 256, kernel_size=(17,), stride=(1,), padding=(8,), groups=256, bias=False)
|
115 |
+
)
|
116 |
+
)
|
117 |
+
)
|
118 |
+
(4): Dropout(p=0.1, inplace=False)
|
119 |
+
)
|
120 |
+
)
|
121 |
+
(fsmn): UniDeepFsmn_dilated(
|
122 |
+
(linear): Linear(in_features=256, out_features=256, bias=True)
|
123 |
+
(project): Linear(in_features=256, out_features=256, bias=False)
|
124 |
+
(conv): DilatedDenseNet(
|
125 |
+
(pad): ConstantPad2d(padding=(1, 1, 1, 0), value=0.0)
|
126 |
+
(pad1): ConstantPad2d(padding=(0, 0, 19, 19), value=0.0)
|
127 |
+
(conv1): Conv2d(256, 256, kernel_size=(39, 1), stride=(1, 1), groups=256, bias=False)
|
128 |
+
(norm1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=False)
|
129 |
+
(prelu1): PReLU(num_parameters=256)
|
130 |
+
(pad2): ConstantPad2d(padding=(0, 0, 38, 38), value=0.0)
|
131 |
+
(conv2): Conv2d(512, 256, kernel_size=(39, 1), stride=(1, 1), dilation=(2, 1), groups=256, bias=False)
|
132 |
+
(norm2): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=False)
|
133 |
+
(prelu2): PReLU(num_parameters=256)
|
134 |
+
)
|
135 |
+
)
|
136 |
+
)
|
137 |
+
(norm2): CLayerNorm((256,), eps=1e-05, elementwise_affine=True)
|
138 |
+
(conv2): Conv1d(256, 512, kernel_size=(1,), stride=(1,))
|
139 |
+
)
|
140 |
+
)
|
141 |
+
(layers): ModuleList(
|
142 |
+
(0-23): 24 x FLASH_ShareA_FFConvM(
|
143 |
+
(rotary_pos_emb): RotaryEmbedding()
|
144 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
145 |
+
(to_hidden): FFConvM(
|
146 |
+
(mdl): Sequential(
|
147 |
+
(0): ScaleNorm()
|
148 |
+
(1): Linear(in_features=512, out_features=2048, bias=True)
|
149 |
+
(2): SiLU()
|
150 |
+
(3): ConvModule(
|
151 |
+
(sequential): Sequential(
|
152 |
+
(0): Transpose()
|
153 |
+
(1): DepthwiseConv1d(
|
154 |
+
(conv): Conv1d(2048, 2048, kernel_size=(17,), stride=(1,), padding=(8,), groups=2048, bias=False)
|
155 |
+
)
|
156 |
+
)
|
157 |
+
)
|
158 |
+
(4): Dropout(p=0.1, inplace=False)
|
159 |
+
)
|
160 |
+
)
|
161 |
+
(to_qk): FFConvM(
|
162 |
+
(mdl): Sequential(
|
163 |
+
(0): ScaleNorm()
|
164 |
+
(1): Linear(in_features=512, out_features=128, bias=True)
|
165 |
+
(2): SiLU()
|
166 |
+
(3): ConvModule(
|
167 |
+
(sequential): Sequential(
|
168 |
+
(0): Transpose()
|
169 |
+
(1): DepthwiseConv1d(
|
170 |
+
(conv): Conv1d(128, 128, kernel_size=(17,), stride=(1,), padding=(8,), groups=128, bias=False)
|
171 |
+
)
|
172 |
+
)
|
173 |
+
)
|
174 |
+
(4): Dropout(p=0.1, inplace=False)
|
175 |
+
)
|
176 |
+
)
|
177 |
+
(qk_offset_scale): OffsetScale()
|
178 |
+
(to_out): FFConvM(
|
179 |
+
(mdl): Sequential(
|
180 |
+
(0): ScaleNorm()
|
181 |
+
(1): Linear(in_features=1024, out_features=512, bias=True)
|
182 |
+
(2): SiLU()
|
183 |
+
(3): ConvModule(
|
184 |
+
(sequential): Sequential(
|
185 |
+
(0): Transpose()
|
186 |
+
(1): DepthwiseConv1d(
|
187 |
+
(conv): Conv1d(512, 512, kernel_size=(17,), stride=(1,), padding=(8,), groups=512, bias=False)
|
188 |
+
)
|
189 |
+
)
|
190 |
+
)
|
191 |
+
(4): Dropout(p=0.1, inplace=False)
|
192 |
+
)
|
193 |
+
)
|
194 |
+
(gateActivate): Sigmoid()
|
195 |
+
)
|
196 |
+
)
|
197 |
+
)
|
198 |
+
(norm): LayerNorm(
|
199 |
+
(norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
|
200 |
+
)
|
201 |
+
)
|
202 |
+
)
|
203 |
+
(intra_norm): GroupNorm(1, 512, eps=1e-08, affine=True)
|
204 |
+
)
|
205 |
+
)
|
206 |
+
(conv1d_out): Conv1d(512, 512, kernel_size=(1,), stride=(1,))
|
207 |
+
(conv1_decoder): Conv1d(512, 512, kernel_size=(1,), stride=(1,), bias=False)
|
208 |
+
(prelu): PReLU(num_parameters=1)
|
209 |
+
(activation): ReLU()
|
210 |
+
(output): Sequential(
|
211 |
+
(0): Conv1d(512, 512, kernel_size=(1,), stride=(1,))
|
212 |
+
(1): Tanh()
|
213 |
+
)
|
214 |
+
(output_gate): Sequential(
|
215 |
+
(0): Conv1d(512, 512, kernel_size=(1,), stride=(1,))
|
216 |
+
(1): Sigmoid()
|
217 |
+
)
|
218 |
+
)
|
219 |
+
(av_conv): Conv1d(768, 512, kernel_size=(1,), stride=(1,))
|
220 |
+
)
|
221 |
+
(decoder): Decoder(
|
222 |
+
(basis_signals): Linear(in_features=512, out_features=16, bias=False)
|
223 |
+
)
|
224 |
+
)
|
225 |
+
(ref_encoder): Visual_encoder(
|
226 |
+
(v_frontend): VisualFrontend(
|
227 |
+
(frontend3D): Sequential(
|
228 |
+
(0): Conv3d(1, 64, kernel_size=(5, 7, 7), stride=(1, 2, 2), padding=(2, 3, 3), bias=False)
|
229 |
+
(1): SyncBatchNorm(64, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
|
230 |
+
(2): ReLU()
|
231 |
+
(3): MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1), dilation=1, ceil_mode=False)
|
232 |
+
)
|
233 |
+
(resnet): ResNet(
|
234 |
+
(layer1): ResNetLayer(
|
235 |
+
(conv1a): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
|
236 |
+
(bn1a): SyncBatchNorm(64, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
|
237 |
+
(conv2a): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
|
238 |
+
(downsample): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
|
239 |
+
(outbna): SyncBatchNorm(64, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
|
240 |
+
(conv1b): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
|
241 |
+
(bn1b): SyncBatchNorm(64, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
|
242 |
+
(conv2b): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
|
243 |
+
(outbnb): SyncBatchNorm(64, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
|
244 |
+
)
|
245 |
+
(layer2): ResNetLayer(
|
246 |
+
(conv1a): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
|
247 |
+
(bn1a): SyncBatchNorm(128, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
|
248 |
+
(conv2a): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
|
249 |
+
(downsample): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
|
250 |
+
(outbna): SyncBatchNorm(128, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
|
251 |
+
(conv1b): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
|
252 |
+
(bn1b): SyncBatchNorm(128, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
|
253 |
+
(conv2b): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
|
254 |
+
(outbnb): SyncBatchNorm(128, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
|
255 |
+
)
|
256 |
+
(layer3): ResNetLayer(
|
257 |
+
(conv1a): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
|
258 |
+
(bn1a): SyncBatchNorm(256, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
|
259 |
+
(conv2a): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
|
260 |
+
(downsample): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
|
261 |
+
(outbna): SyncBatchNorm(256, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
|
262 |
+
(conv1b): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
|
263 |
+
(bn1b): SyncBatchNorm(256, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
|
264 |
+
(conv2b): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
|
265 |
+
(outbnb): SyncBatchNorm(256, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
|
266 |
+
)
|
267 |
+
(layer4): ResNetLayer(
|
268 |
+
(conv1a): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
|
269 |
+
(bn1a): SyncBatchNorm(512, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
|
270 |
+
(conv2a): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
|
271 |
+
(downsample): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
|
272 |
+
(outbna): SyncBatchNorm(512, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
|
273 |
+
(conv1b): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
|
274 |
+
(bn1b): SyncBatchNorm(512, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
|
275 |
+
(conv2b): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
|
276 |
+
(outbnb): SyncBatchNorm(512, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
|
277 |
+
)
|
278 |
+
(avgpool): AvgPool2d(kernel_size=(4, 4), stride=(1, 1), padding=0)
|
279 |
+
)
|
280 |
+
)
|
281 |
+
(v_ds): Conv1d(512, 256, kernel_size=(1,), stride=(1,), bias=False)
|
282 |
+
(visual_conv): Sequential(
|
283 |
+
(0): VisualConv1D(
|
284 |
+
(relu_0): ReLU()
|
285 |
+
(norm_0): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
|
286 |
+
(conv1x1): Conv1d(256, 512, kernel_size=(1,), stride=(1,), bias=False)
|
287 |
+
(relu): ReLU()
|
288 |
+
(norm_1): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
|
289 |
+
(dsconv): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=(1,), groups=512)
|
290 |
+
(prelu): PReLU(num_parameters=1)
|
291 |
+
(norm_2): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
|
292 |
+
(pw_conv): Conv1d(512, 256, kernel_size=(1,), stride=(1,), bias=False)
|
293 |
+
)
|
294 |
+
(1): VisualConv1D(
|
295 |
+
(relu_0): ReLU()
|
296 |
+
(norm_0): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
|
297 |
+
(conv1x1): Conv1d(256, 512, kernel_size=(1,), stride=(1,), bias=False)
|
298 |
+
(relu): ReLU()
|
299 |
+
(norm_1): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
|
300 |
+
(dsconv): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=(1,), groups=512)
|
301 |
+
(prelu): PReLU(num_parameters=1)
|
302 |
+
(norm_2): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
|
303 |
+
(pw_conv): Conv1d(512, 256, kernel_size=(1,), stride=(1,), bias=False)
|
304 |
+
)
|
305 |
+
(2): VisualConv1D(
|
306 |
+
(relu_0): ReLU()
|
307 |
+
(norm_0): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
|
308 |
+
(conv1x1): Conv1d(256, 512, kernel_size=(1,), stride=(1,), bias=False)
|
309 |
+
(relu): ReLU()
|
310 |
+
(norm_1): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
|
311 |
+
(dsconv): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=(1,), groups=512)
|
312 |
+
(prelu): PReLU(num_parameters=1)
|
313 |
+
(norm_2): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
|
314 |
+
(pw_conv): Conv1d(512, 256, kernel_size=(1,), stride=(1,), bias=False)
|
315 |
+
)
|
316 |
+
(3): VisualConv1D(
|
317 |
+
(relu_0): ReLU()
|
318 |
+
(norm_0): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
|
319 |
+
(conv1x1): Conv1d(256, 512, kernel_size=(1,), stride=(1,), bias=False)
|
320 |
+
(relu): ReLU()
|
321 |
+
(norm_1): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
|
322 |
+
(dsconv): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=(1,), groups=512)
|
323 |
+
(prelu): PReLU(num_parameters=1)
|
324 |
+
(norm_2): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
|
325 |
+
(pw_conv): Conv1d(512, 256, kernel_size=(1,), stride=(1,), bias=False)
|
326 |
+
)
|
327 |
+
(4): VisualConv1D(
|
328 |
+
(relu_0): ReLU()
|
329 |
+
(norm_0): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
|
330 |
+
(conv1x1): Conv1d(256, 512, kernel_size=(1,), stride=(1,), bias=False)
|
331 |
+
(relu): ReLU()
|
332 |
+
(norm_1): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
|
333 |
+
(dsconv): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=(1,), groups=512)
|
334 |
+
(prelu): PReLU(num_parameters=1)
|
335 |
+
(norm_2): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
|
336 |
+
(pw_conv): Conv1d(512, 256, kernel_size=(1,), stride=(1,), bias=False)
|
337 |
+
)
|
338 |
+
)
|
339 |
+
)
|
340 |
+
)
|
341 |
+
|
342 |
+
Total number of parameters: 68516407
|
343 |
+
|
344 |
+
|
345 |
+
Total number of trainable parameters: 57331303
|
346 |
+
|
347 |
+
dlc1h2tsljxspymy-master-0:29:29 [0] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth
|
348 |
+
dlc1h2tsljxspymy-master-0:29:29 [0] NCCL INFO Bootstrap : Using eth0:22.6.229.207<0>
|
349 |
+
dlc1h2tsljxspymy-master-0:29:29 [0] NCCL INFO Plugin name set by env to libnccl-net-none.so
|
350 |
+
dlc1h2tsljxspymy-master-0:29:29 [0] NCCL INFO NET/Plugin : dlerror=libnccl-net-none.so: cannot open shared object file: No such file or directory No plugin found (libnccl-net-none.so), using internal implementation
|
351 |
+
dlc1h2tsljxspymy-master-0:29:29 [0] NCCL INFO cudaDriverVersion 11040
|
352 |
+
dlc1h2tsljxspymy-master-0:30:30 [1] NCCL INFO cudaDriverVersion 11040
|
353 |
+
NCCL version 2.20.5+cuda11.8
|
354 |
+
dlc1h2tsljxspymy-master-0:30:30 [1] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth
|
355 |
+
dlc1h2tsljxspymy-master-0:30:30 [1] NCCL INFO Bootstrap : Using eth0:22.6.229.207<0>
|
356 |
+
dlc1h2tsljxspymy-master-0:30:30 [1] NCCL INFO Plugin name set by env to libnccl-net-none.so
|
357 |
+
dlc1h2tsljxspymy-master-0:30:30 [1] NCCL INFO NET/Plugin : dlerror=libnccl-net-none.so: cannot open shared object file: No such file or directory No plugin found (libnccl-net-none.so), using internal implementation
|
358 |
+
dlc1h2tsljxspymy-master-0:30:49 [1] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth
|
359 |
+
dlc1h2tsljxspymy-master-0:29:48 [0] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth
|
360 |
+
dlc1h2tsljxspymy-master-0:30:49 [1] NCCL INFO NCCL_IB_HCA set to mlx5
|
361 |
+
dlc1h2tsljxspymy-master-0:29:48 [0] NCCL INFO NCCL_IB_HCA set to mlx5
|
362 |
+
libibverbs: Warning: couldn't load driver 'libhfi1verbs-rdmav25.so': libhfi1verbs-rdmav25.so: cannot open shared object file: No such file or directory
|
363 |
+
libibverbs: Warning: couldn't load driver 'libhfi1verbs-rdmav25.so': libhfi1verbs-rdmav25.so: cannot open shared object file: No such file or directory
|
364 |
+
libibverbs: Warning: couldn't load driver 'librxe-rdmav25.so': librxe-rdmav25.so: cannot open shared object file: No such file or directory
|
365 |
+
libibverbs: Warning: couldn't load driver 'librxe-rdmav25.so': librxe-rdmav25.so: cannot open shared object file: No such file or directory
|
366 |
+
libibverbs: Warning: couldn't load driver 'libmthca-rdmav25.so': libmthca-rdmav25.so: cannot open shared object file: No such file or directory
|
367 |
+
libibverbs: Warning: couldn't load driver 'libmthca-rdmav25.so': libmthca-rdmav25.so: cannot open shared object file: No such file or directory
|
368 |
+
libibverbs: Warning: couldn't load driver 'libvmw_pvrdma-rdmav25.so': libvmw_pvrdma-rdmav25.so: cannot open shared object file: No such file or directory
|
369 |
+
libibverbs: Warning: couldn't load driver 'libvmw_pvrdma-rdmav25.so': libvmw_pvrdma-rdmav25.so: cannot open shared object file: No such file or directory
|
370 |
+
libibverbs: Warning: couldn't load driver 'libhns-rdmav25.so': libhns-rdmav25.so: cannot open shared object file: No such file or directory
|
371 |
+
libibverbs: Warning: couldn't load driver 'libhns-rdmav25.so': libhns-rdmav25.so: cannot open shared object file: No such file or directory
|
372 |
+
libibverbs: Warning: couldn't load driver 'libipathverbs-rdmav25.so': libipathverbs-rdmav25.so: cannot open shared object file: No such file or directory
|
373 |
+
libibverbs: Warning: couldn't load driver 'libipathverbs-rdmav25.so': libipathverbs-rdmav25.so: cannot open shared object file: No such file or directory
|
374 |
+
libibverbs: Warning: couldn't load driver 'libsiw-rdmav25.so': libsiw-rdmav25.so: cannot open shared object file: No such file or directory
|
375 |
+
libibverbs: Warning: couldn't load driver 'libsiw-rdmav25.so': libsiw-rdmav25.so: cannot open shared object file: No such file or directory
|
376 |
+
libibverbs: Warning: couldn't load driver 'libbnxt_re-rdmav25.so': libbnxt_re-rdmav25.so: cannot open shared object file: No such file or directory
|
377 |
+
libibverbs: Warning: couldn't load driver 'libbnxt_re-rdmav25.so': libbnxt_re-rdmav25.so: cannot open shared object file: No such file or directory
|
378 |
+
libibverbs: Warning: couldn't load driver 'libocrdma-rdmav25.so': libocrdma-rdmav25.so: cannot open shared object file: No such file or directory
|
379 |
+
libibverbs: Warning: couldn't load driver 'libocrdma-rdmav25.so': libocrdma-rdmav25.so: cannot open shared object file: No such file or directory
|
380 |
+
libibverbs: Warning: couldn't load driver 'libmlx4-rdmav25.so': libmlx4-rdmav25.so: cannot open shared object file: No such file or directory
|
381 |
+
libibverbs: Warning: couldn't load driver 'libmlx4-rdmav25.so': libmlx4-rdmav25.so: cannot open shared object file: No such file or directory
|
382 |
+
libibverbs: Warning: couldn't load driver 'libqedr-rdmav25.so': libqedr-rdmav25.so: cannot open shared object file: No such file or directory
|
383 |
+
libibverbs: Warning: couldn't load driver 'libqedr-rdmav25.so': libqedr-rdmav25.so: cannot open shared object file: No such file or directory
|
384 |
+
libibverbs: Warning: couldn't load driver 'libcxgb4-rdmav25.so': libcxgb4-rdmav25.so: cannot open shared object file: No such file or directory
|
385 |
+
libibverbs: Warning: couldn't load driver 'libcxgb4-rdmav25.so': libcxgb4-rdmav25.so: cannot open shared object file: No such file or directory
|
386 |
+
libibverbs: Warning: couldn't load driver 'libi40iw-rdmav25.so': libi40iw-rdmav25.so: cannot open shared object file: No such file or directory
|
387 |
+
libibverbs: Warning: couldn't load driver 'libi40iw-rdmav25.so': libi40iw-rdmav25.so: cannot open shared object file: No such file or directory
|
388 |
+
libibverbs: Warning: couldn't load driver 'libefa-rdmav25.so': libefa-rdmav25.so: cannot open shared object file: No such file or directory
|
389 |
+
libibverbs: Warning: couldn't load driver 'libefa-rdmav25.so': libefa-rdmav25.so: cannot open shared object file: No such file or directory
|
390 |
+
dlc1h2tsljxspymy-master-0:29:48 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth0:22.6.229.207<0>
|
391 |
+
dlc1h2tsljxspymy-master-0:29:48 [0] NCCL INFO Using non-device net plugin version 0
|
392 |
+
dlc1h2tsljxspymy-master-0:29:48 [0] NCCL INFO Using network IB
|
393 |
+
dlc1h2tsljxspymy-master-0:30:49 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth0:22.6.229.207<0>
|
394 |
+
dlc1h2tsljxspymy-master-0:30:49 [1] NCCL INFO Using non-device net plugin version 0
|
395 |
+
dlc1h2tsljxspymy-master-0:30:49 [1] NCCL INFO Using network IB
|
396 |
+
dlc1h2tsljxspymy-master-0:30:49 [1] NCCL INFO comm 0x5fab2a0 rank 1 nranks 2 cudaDev 1 nvmlDev 1 busId 20 commId 0x4354081b9a4fac3e - Init START
|
397 |
+
dlc1h2tsljxspymy-master-0:29:48 [0] NCCL INFO comm 0xcadc830 rank 0 nranks 2 cudaDev 0 nvmlDev 0 busId 10 commId 0x4354081b9a4fac3e - Init START
|
398 |
+
dlc1h2tsljxspymy-master-0:30:49 [1] NCCL INFO comm 0x5fab2a0 rank 1 nRanks 2 nNodes 1 localRanks 2 localRank 1 MNNVL 0
|
399 |
+
dlc1h2tsljxspymy-master-0:29:48 [0] NCCL INFO comm 0xcadc830 rank 0 nRanks 2 nNodes 1 localRanks 2 localRank 0 MNNVL 0
|
400 |
+
dlc1h2tsljxspymy-master-0:30:49 [1] NCCL INFO NCCL_MIN_NCHANNELS set by environment to 4.
|
401 |
+
dlc1h2tsljxspymy-master-0:29:48 [0] NCCL INFO NCCL_MIN_NCHANNELS set by environment to 4.
|
402 |
+
dlc1h2tsljxspymy-master-0:30:49 [1] NCCL INFO Trees [0] -1/-1/-1->1->0 [1] 0/-1/-1->1->-1 [2] -1/-1/-1->1->0 [3] 0/-1/-1->1->-1
|
403 |
+
dlc1h2tsljxspymy-master-0:29:48 [0] NCCL INFO Channel 00/04 : 0 1
|
404 |
+
dlc1h2tsljxspymy-master-0:30:49 [1] NCCL INFO P2P Chunksize set to 524288
|
405 |
+
dlc1h2tsljxspymy-master-0:29:48 [0] NCCL INFO Channel 01/04 : 0 1
|
406 |
+
dlc1h2tsljxspymy-master-0:29:48 [0] NCCL INFO Channel 02/04 : 0 1
|
407 |
+
dlc1h2tsljxspymy-master-0:29:48 [0] NCCL INFO Channel 03/04 : 0 1
|
408 |
+
dlc1h2tsljxspymy-master-0:29:48 [0] NCCL INFO Trees [0] 1/-1/-1->0->-1 [1] -1/-1/-1->0->1 [2] 1/-1/-1->0->-1 [3] -1/-1/-1->0->1
|
409 |
+
dlc1h2tsljxspymy-master-0:29:48 [0] NCCL INFO P2P Chunksize set to 524288
|
410 |
+
dlc1h2tsljxspymy-master-0:29:48 [0] NCCL INFO Channel 00/0 : 0[0] -> 1[1] via P2P/IPC/read
|
411 |
+
dlc1h2tsljxspymy-master-0:30:49 [1] NCCL INFO Channel 00/0 : 1[1] -> 0[0] via P2P/IPC/read
|
412 |
+
dlc1h2tsljxspymy-master-0:29:48 [0] NCCL INFO Channel 01/0 : 0[0] -> 1[1] via P2P/IPC/read
|
413 |
+
dlc1h2tsljxspymy-master-0:30:49 [1] NCCL INFO Channel 01/0 : 1[1] -> 0[0] via P2P/IPC/read
|
414 |
+
dlc1h2tsljxspymy-master-0:29:48 [0] NCCL INFO Channel 02/0 : 0[0] -> 1[1] via P2P/IPC/read
|
415 |
+
dlc1h2tsljxspymy-master-0:30:49 [1] NCCL INFO Channel 02/0 : 1[1] -> 0[0] via P2P/IPC/read
|
416 |
+
dlc1h2tsljxspymy-master-0:29:48 [0] NCCL INFO Channel 03/0 : 0[0] -> 1[1] via P2P/IPC/read
|
417 |
+
dlc1h2tsljxspymy-master-0:30:49 [1] NCCL INFO Channel 03/0 : 1[1] -> 0[0] via P2P/IPC/read
|
418 |
+
dlc1h2tsljxspymy-master-0:29:48 [0] NCCL INFO Connected all rings
|
419 |
+
dlc1h2tsljxspymy-master-0:29:48 [0] NCCL INFO Connected all trees
|
420 |
+
dlc1h2tsljxspymy-master-0:30:49 [1] NCCL INFO Connected all rings
|
421 |
+
dlc1h2tsljxspymy-master-0:30:49 [1] NCCL INFO Connected all trees
|
422 |
+
dlc1h2tsljxspymy-master-0:30:49 [1] NCCL INFO threadThresholds 8/8/64 | 16/8/64 | 512 | 512
|
423 |
+
dlc1h2tsljxspymy-master-0:30:49 [1] NCCL INFO 4 coll channels, 0 collnet channels, 0 nvls channels, 4 p2p channels, 4 p2p channels per peer
|
424 |
+
dlc1h2tsljxspymy-master-0:29:48 [0] NCCL INFO threadThresholds 8/8/64 | 16/8/64 | 512 | 512
|
425 |
+
dlc1h2tsljxspymy-master-0:29:48 [0] NCCL INFO 4 coll channels, 0 collnet channels, 0 nvls channels, 4 p2p channels, 4 p2p channels per peer
|
426 |
+
dlc1h2tsljxspymy-master-0:29:48 [0] NCCL INFO comm 0xcadc830 rank 0 nranks 2 cudaDev 0 nvmlDev 0 busId 10 commId 0x4354081b9a4fac3e - Init COMPLETE
|
427 |
+
dlc1h2tsljxspymy-master-0:30:49 [1] NCCL INFO comm 0x5fab2a0 rank 1 nranks 2 cudaDev 1 nvmlDev 1 busId 20 commId 0x4354081b9a4fac3e - Init COMPLETE
|
428 |
+
[rank1]:[W1113 17:57:04.466839887 Utils.hpp:110] Warning: Environment variable NCCL_BLOCKING_WAIT is deprecated; use TORCH_NCCL_BLOCKING_WAIT instead (function operator())
|
429 |
+
[rank0]:[W1113 17:57:04.466860587 Utils.hpp:110] Warning: Environment variable NCCL_BLOCKING_WAIT is deprecated; use TORCH_NCCL_BLOCKING_WAIT instead (function operator())
|
430 |
+
Start new training from scratch
|
431 |
+
[rank1]:[W1113 17:57:35.632585586 reducer.cpp:1400] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
|
432 |
+
[rank0]:[W1113 17:57:35.632728951 reducer.cpp:1400] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
|
433 |
+
Train Summary | End of Epoch 1 | Time 32159.15s | Train Loss -1.994
|
434 |
+
Valid Summary | End of Epoch 1 | Time 903.30s | Valid Loss -4.076
|
435 |
+
Test Summary | End of Epoch 1 | Time 542.42s | Test Loss -4.185
|
436 |
+
Fund new best model, dict saved
|
437 |
+
Train Summary | End of Epoch 2 | Time 32169.63s | Train Loss -6.164
|
438 |
+
Valid Summary | End of Epoch 2 | Time 902.76s | Valid Loss -7.340
|
439 |
+
Test Summary | End of Epoch 2 | Time 541.88s | Test Loss -7.135
|
440 |
+
Fund new best model, dict saved
|
441 |
+
Train Summary | End of Epoch 3 | Time 16655.07s | Train Loss -8.472
|
442 |
+
Valid Summary | End of Epoch 3 | Time 450.73s | Valid Loss -8.972
|
443 |
+
Test Summary | End of Epoch 3 | Time 270.90s | Test Loss -8.697
|
444 |
+
Fund new best model, dict saved
|
445 |
+
Train Summary | End of Epoch 4 | Time 16672.81s | Train Loss -9.791
|
446 |
+
Valid Summary | End of Epoch 4 | Time 450.47s | Valid Loss -10.237
|
447 |
+
Test Summary | End of Epoch 4 | Time 270.82s | Test Loss -9.893
|
448 |
+
Fund new best model, dict saved
|
449 |
+
Train Summary | End of Epoch 5 | Time 16667.17s | Train Loss -10.707
|
450 |
+
Valid Summary | End of Epoch 5 | Time 450.19s | Valid Loss -10.793
|
451 |
+
Test Summary | End of Epoch 5 | Time 270.34s | Test Loss -10.466
|
452 |
+
Fund new best model, dict saved
|
453 |
+
Train Summary | End of Epoch 6 | Time 16675.25s | Train Loss -11.347
|
454 |
+
Valid Summary | End of Epoch 6 | Time 450.71s | Valid Loss -11.395
|
455 |
+
Test Summary | End of Epoch 6 | Time 271.05s | Test Loss -11.090
|
456 |
+
Fund new best model, dict saved
|
457 |
+
Train Summary | End of Epoch 7 | Time 16745.54s | Train Loss -11.889
|
458 |
+
Valid Summary | End of Epoch 7 | Time 451.96s | Valid Loss -11.784
|
459 |
+
Test Summary | End of Epoch 7 | Time 271.36s | Test Loss -11.404
|
460 |
+
Fund new best model, dict saved
|
461 |
+
Train Summary | End of Epoch 8 | Time 16731.87s | Train Loss -12.347
|
462 |
+
Valid Summary | End of Epoch 8 | Time 451.42s | Valid Loss -11.862
|
463 |
+
Test Summary | End of Epoch 8 | Time 270.95s | Test Loss -11.567
|
464 |
+
Fund new best model, dict saved
|
465 |
+
Train Summary | End of Epoch 9 | Time 16728.59s | Train Loss -12.715
|
466 |
+
Valid Summary | End of Epoch 9 | Time 451.57s | Valid Loss -12.517
|
467 |
+
Test Summary | End of Epoch 9 | Time 271.24s | Test Loss -11.989
|
468 |
+
Fund new best model, dict saved
|
469 |
+
Train Summary | End of Epoch 10 | Time 16734.21s | Train Loss -13.052
|
470 |
+
Valid Summary | End of Epoch 10 | Time 451.74s | Valid Loss -12.638
|
471 |
+
Test Summary | End of Epoch 10 | Time 271.40s | Test Loss -12.091
|
472 |
+
Fund new best model, dict saved
|
473 |
+
Train Summary | End of Epoch 11 | Time 16736.88s | Train Loss -13.331
|
474 |
+
Valid Summary | End of Epoch 11 | Time 451.91s | Valid Loss -12.844
|
475 |
+
Test Summary | End of Epoch 11 | Time 271.99s | Test Loss -12.463
|
476 |
+
Fund new best model, dict saved
|
477 |
+
Train Summary | End of Epoch 12 | Time 16754.74s | Train Loss -13.663
|
478 |
+
Valid Summary | End of Epoch 12 | Time 451.47s | Valid Loss -13.016
|
479 |
+
Test Summary | End of Epoch 12 | Time 271.31s | Test Loss -12.614
|
480 |
+
Fund new best model, dict saved
|
481 |
+
Train Summary | End of Epoch 13 | Time 16740.71s | Train Loss -13.833
|
482 |
+
Valid Summary | End of Epoch 13 | Time 451.63s | Valid Loss -13.124
|
483 |
+
Test Summary | End of Epoch 13 | Time 271.34s | Test Loss -12.827
|
484 |
+
Fund new best model, dict saved
|
485 |
+
Train Summary | End of Epoch 14 | Time 16732.43s | Train Loss -14.068
|
486 |
+
Valid Summary | End of Epoch 14 | Time 451.56s | Valid Loss -13.261
|
487 |
+
Test Summary | End of Epoch 14 | Time 271.57s | Test Loss -12.863
|
488 |
+
Fund new best model, dict saved
|
489 |
+
Train Summary | End of Epoch 15 | Time 16705.44s | Train Loss -14.255
|
490 |
+
Valid Summary | End of Epoch 15 | Time 450.84s | Valid Loss -13.477
|
491 |
+
Test Summary | End of Epoch 15 | Time 270.98s | Test Loss -13.126
|
492 |
+
Fund new best model, dict saved
|
493 |
+
Train Summary | End of Epoch 16 | Time 16732.34s | Train Loss -14.425
|
494 |
+
Valid Summary | End of Epoch 16 | Time 450.54s | Valid Loss -13.531
|
495 |
+
Test Summary | End of Epoch 16 | Time 270.10s | Test Loss -13.161
|
496 |
+
Fund new best model, dict saved
|
497 |
+
Train Summary | End of Epoch 17 | Time 16725.75s | Train Loss -14.559
|
498 |
+
Valid Summary | End of Epoch 17 | Time 450.59s | Valid Loss -13.624
|
499 |
+
Test Summary | End of Epoch 17 | Time 271.31s | Test Loss -13.168
|
500 |
+
Fund new best model, dict saved
|
501 |
+
Train Summary | End of Epoch 18 | Time 16639.55s | Train Loss -14.754
|
502 |
+
Valid Summary | End of Epoch 18 | Time 450.68s | Valid Loss -13.661
|
503 |
+
Test Summary | End of Epoch 18 | Time 270.49s | Test Loss -13.326
|
504 |
+
Fund new best model, dict saved
|
505 |
+
Train Summary | End of Epoch 19 | Time 16693.31s | Train Loss -14.864
|
506 |
+
Valid Summary | End of Epoch 19 | Time 450.50s | Valid Loss -13.803
|
507 |
+
Test Summary | End of Epoch 19 | Time 270.40s | Test Loss -13.268
|
508 |
+
Fund new best model, dict saved
|
509 |
+
Train Summary | End of Epoch 20 | Time 16672.09s | Train Loss -15.013
|
510 |
+
Valid Summary | End of Epoch 20 | Time 450.53s | Valid Loss -13.993
|
511 |
+
Test Summary | End of Epoch 20 | Time 270.43s | Test Loss -13.561
|
512 |
+
Fund new best model, dict saved
|
513 |
+
Train Summary | End of Epoch 21 | Time 16662.21s | Train Loss -15.142
|
514 |
+
Valid Summary | End of Epoch 21 | Time 450.41s | Valid Loss -14.015
|
515 |
+
Test Summary | End of Epoch 21 | Time 270.38s | Test Loss -13.555
|
516 |
+
Fund new best model, dict saved
|
517 |
+
Train Summary | End of Epoch 22 | Time 16682.18s | Train Loss -15.223
|
518 |
+
Valid Summary | End of Epoch 22 | Time 449.57s | Valid Loss -14.030
|
519 |
+
Test Summary | End of Epoch 22 | Time 270.29s | Test Loss -13.734
|
520 |
+
Fund new best model, dict saved
|
521 |
+
Train Summary | End of Epoch 23 | Time 16674.11s | Train Loss -15.370
|
522 |
+
Valid Summary | End of Epoch 23 | Time 449.79s | Valid Loss -14.037
|
523 |
+
Test Summary | End of Epoch 23 | Time 270.16s | Test Loss -13.485
|
524 |
+
Fund new best model, dict saved
|
525 |
+
Train Summary | End of Epoch 24 | Time 16740.27s | Train Loss -15.484
|
526 |
+
Valid Summary | End of Epoch 24 | Time 452.50s | Valid Loss -14.175
|
527 |
+
Test Summary | End of Epoch 24 | Time 271.40s | Test Loss -13.617
|
528 |
+
Fund new best model, dict saved
|
529 |
+
Train Summary | End of Epoch 25 | Time 17012.28s | Train Loss -15.558
|
530 |
+
Valid Summary | End of Epoch 25 | Time 450.05s | Valid Loss -14.159
|
531 |
+
Test Summary | End of Epoch 25 | Time 270.16s | Test Loss -13.763
|
532 |
+
Train Summary | End of Epoch 26 | Time 16678.31s | Train Loss -15.681
|
533 |
+
Valid Summary | End of Epoch 26 | Time 449.93s | Valid Loss -14.277
|
534 |
+
Test Summary | End of Epoch 26 | Time 270.31s | Test Loss -13.816
|
535 |
+
Fund new best model, dict saved
|
536 |
+
Train Summary | End of Epoch 27 | Time 16677.05s | Train Loss -15.737
|
537 |
+
Valid Summary | End of Epoch 27 | Time 450.08s | Valid Loss -14.110
|
538 |
+
Test Summary | End of Epoch 27 | Time 270.26s | Test Loss -13.692
|
539 |
+
Train Summary | End of Epoch 28 | Time 16673.83s | Train Loss -15.818
|
540 |
+
Valid Summary | End of Epoch 28 | Time 449.92s | Valid Loss -14.377
|
541 |
+
Test Summary | End of Epoch 28 | Time 270.28s | Test Loss -13.811
|
542 |
+
Fund new best model, dict saved
|
543 |
+
Train Summary | End of Epoch 29 | Time 16668.65s | Train Loss -15.898
|
544 |
+
Valid Summary | End of Epoch 29 | Time 449.85s | Valid Loss -14.417
|
545 |
+
Test Summary | End of Epoch 29 | Time 270.49s | Test Loss -13.873
|
546 |
+
Fund new best model, dict saved
|
547 |
+
Train Summary | End of Epoch 30 | Time 16670.93s | Train Loss -15.964
|
548 |
+
Valid Summary | End of Epoch 30 | Time 450.02s | Valid Loss -14.535
|
549 |
+
Test Summary | End of Epoch 30 | Time 270.53s | Test Loss -13.943
|
550 |
+
Fund new best model, dict saved
|
551 |
+
Train Summary | End of Epoch 31 | Time 16670.76s | Train Loss -16.044
|
552 |
+
Valid Summary | End of Epoch 31 | Time 450.22s | Valid Loss -14.449
|
553 |
+
Test Summary | End of Epoch 31 | Time 270.60s | Test Loss -13.832
|
554 |
+
Train Summary | End of Epoch 32 | Time 16669.80s | Train Loss -16.094
|
555 |
+
Valid Summary | End of Epoch 32 | Time 450.40s | Valid Loss -14.500
|
556 |
+
Test Summary | End of Epoch 32 | Time 270.49s | Test Loss -13.971
|
557 |
+
Train Summary | End of Epoch 33 | Time 16679.42s | Train Loss -16.179
|
558 |
+
Valid Summary | End of Epoch 33 | Time 450.37s | Valid Loss -14.525
|
559 |
+
Test Summary | End of Epoch 33 | Time 270.66s | Test Loss -14.027
|
560 |
+
Train Summary | End of Epoch 34 | Time 16695.78s | Train Loss -16.230
|
561 |
+
Valid Summary | End of Epoch 34 | Time 450.08s | Valid Loss -14.480
|
562 |
+
Test Summary | End of Epoch 34 | Time 270.38s | Test Loss -14.025
|
563 |
+
Train Summary | End of Epoch 35 | Time 16692.41s | Train Loss -16.284
|
564 |
+
Valid Summary | End of Epoch 35 | Time 450.05s | Valid Loss -14.578
|
565 |
+
Test Summary | End of Epoch 35 | Time 270.21s | Test Loss -13.989
|
566 |
+
Fund new best model, dict saved
|
567 |
+
Train Summary | End of Epoch 36 | Time 16692.23s | Train Loss -16.354
|
568 |
+
Valid Summary | End of Epoch 36 | Time 449.90s | Valid Loss -14.585
|
569 |
+
Test Summary | End of Epoch 36 | Time 270.30s | Test Loss -14.139
|
570 |
+
Fund new best model, dict saved
|
571 |
+
Train Summary | End of Epoch 37 | Time 16685.35s | Train Loss -16.404
|
572 |
+
Valid Summary | End of Epoch 37 | Time 449.81s | Valid Loss -14.721
|
573 |
+
Test Summary | End of Epoch 37 | Time 270.47s | Test Loss -14.192
|
574 |
+
Fund new best model, dict saved
|
575 |
+
Train Summary | End of Epoch 38 | Time 16684.55s | Train Loss -16.486
|
576 |
+
Valid Summary | End of Epoch 38 | Time 450.04s | Valid Loss -14.616
|
577 |
+
Test Summary | End of Epoch 38 | Time 270.32s | Test Loss -14.090
|
578 |
+
Train Summary | End of Epoch 39 | Time 16677.75s | Train Loss -16.533
|
579 |
+
Valid Summary | End of Epoch 39 | Time 450.01s | Valid Loss -14.719
|
580 |
+
Test Summary | End of Epoch 39 | Time 270.50s | Test Loss -14.230
|
581 |
+
Train Summary | End of Epoch 40 | Time 16673.64s | Train Loss -16.585
|
582 |
+
Valid Summary | End of Epoch 40 | Time 449.80s | Valid Loss -14.737
|
583 |
+
Test Summary | End of Epoch 40 | Time 270.47s | Test Loss -14.223
|
584 |
+
Fund new best model, dict saved
|
585 |
+
Train Summary | End of Epoch 41 | Time 16675.05s | Train Loss -16.608
|
586 |
+
Valid Summary | End of Epoch 41 | Time 450.04s | Valid Loss -14.743
|
587 |
+
Test Summary | End of Epoch 41 | Time 270.66s | Test Loss -14.350
|
588 |
+
Fund new best model, dict saved
|
589 |
+
Train Summary | End of Epoch 42 | Time 16674.30s | Train Loss -16.669
|
590 |
+
Valid Summary | End of Epoch 42 | Time 449.78s | Valid Loss -14.657
|
591 |
+
Test Summary | End of Epoch 42 | Time 270.22s | Test Loss -14.109
|
592 |
+
Train Summary | End of Epoch 43 | Time 16673.12s | Train Loss -16.722
|
593 |
+
Valid Summary | End of Epoch 43 | Time 449.97s | Valid Loss -14.834
|
594 |
+
Test Summary | End of Epoch 43 | Time 270.31s | Test Loss -14.221
|
595 |
+
Fund new best model, dict saved
|
596 |
+
Train Summary | End of Epoch 44 | Time 16681.09s | Train Loss -16.751
|
597 |
+
Valid Summary | End of Epoch 44 | Time 450.29s | Valid Loss -14.753
|
598 |
+
Test Summary | End of Epoch 44 | Time 270.49s | Test Loss -14.210
|
599 |
+
Train Summary | End of Epoch 45 | Time 16685.44s | Train Loss -16.819
|
600 |
+
Valid Summary | End of Epoch 45 | Time 450.07s | Valid Loss -14.793
|
601 |
+
Test Summary | End of Epoch 45 | Time 270.90s | Test Loss -14.182
|
602 |
+
Train Summary | End of Epoch 46 | Time 16678.05s | Train Loss -16.836
|
603 |
+
Valid Summary | End of Epoch 46 | Time 450.13s | Valid Loss -14.770
|
604 |
+
Test Summary | End of Epoch 46 | Time 270.73s | Test Loss -14.177
|
605 |
+
Train Summary | End of Epoch 47 | Time 16689.94s | Train Loss -16.903
|
606 |
+
Valid Summary | End of Epoch 47 | Time 449.85s | Valid Loss -14.859
|
607 |
+
Test Summary | End of Epoch 47 | Time 270.47s | Test Loss -14.310
|
608 |
+
Fund new best model, dict saved
|
609 |
+
Train Summary | End of Epoch 48 | Time 16795.76s | Train Loss -16.940
|
610 |
+
Valid Summary | End of Epoch 48 | Time 452.48s | Valid Loss -14.916
|
611 |
+
Test Summary | End of Epoch 48 | Time 272.45s | Test Loss -14.373
|
612 |
+
Fund new best model, dict saved
|
613 |
+
Train Summary | End of Epoch 49 | Time 16782.12s | Train Loss -16.991
|
614 |
+
Valid Summary | End of Epoch 49 | Time 451.16s | Valid Loss -14.877
|
615 |
+
Test Summary | End of Epoch 49 | Time 271.54s | Test Loss -14.249
|
616 |
+
Train Summary | End of Epoch 50 | Time 16773.27s | Train Loss -17.005
|
617 |
+
Valid Summary | End of Epoch 50 | Time 451.03s | Valid Loss -14.823
|
618 |
+
Test Summary | End of Epoch 50 | Time 271.00s | Test Loss -14.384
|
619 |
+
Train Summary | End of Epoch 51 | Time 16759.36s | Train Loss -17.036
|
620 |
+
Valid Summary | End of Epoch 51 | Time 450.81s | Valid Loss -14.887
|
621 |
+
Test Summary | End of Epoch 51 | Time 270.93s | Test Loss -14.350
|
622 |
+
Train Summary | End of Epoch 52 | Time 16758.53s | Train Loss -17.071
|
623 |
+
Valid Summary | End of Epoch 52 | Time 450.92s | Valid Loss -15.009
|
624 |
+
Test Summary | End of Epoch 52 | Time 271.18s | Test Loss -14.422
|
625 |
+
Fund new best model, dict saved
|
626 |
+
Train Summary | End of Epoch 53 | Time 16767.26s | Train Loss -17.120
|
627 |
+
Valid Summary | End of Epoch 53 | Time 451.49s | Valid Loss -14.815
|
628 |
+
Test Summary | End of Epoch 53 | Time 271.32s | Test Loss -14.253
|
629 |
+
Train Summary | End of Epoch 54 | Time 16760.62s | Train Loss -17.143
|
630 |
+
Valid Summary | End of Epoch 54 | Time 450.86s | Valid Loss -14.875
|
631 |
+
Test Summary | End of Epoch 54 | Time 270.82s | Test Loss -14.304
|
632 |
+
Train Summary | End of Epoch 55 | Time 16744.46s | Train Loss -17.175
|
633 |
+
Valid Summary | End of Epoch 55 | Time 450.54s | Valid Loss -14.823
|
634 |
+
Test Summary | End of Epoch 55 | Time 270.89s | Test Loss -14.382
|
635 |
+
Train Summary | End of Epoch 56 | Time 16745.39s | Train Loss -17.208
|
636 |
+
Valid Summary | End of Epoch 56 | Time 450.52s | Valid Loss -14.927
|
637 |
+
Test Summary | End of Epoch 56 | Time 270.84s | Test Loss -14.315
|
638 |
+
Train Summary | End of Epoch 57 | Time 16747.84s | Train Loss -17.248
|
639 |
+
Valid Summary | End of Epoch 57 | Time 450.80s | Valid Loss -14.802
|
640 |
+
Test Summary | End of Epoch 57 | Time 270.94s | Test Loss -14.131
|
641 |
+
reload weights and optimizer from last best checkpoint
|
642 |
+
Learning rate adjusted to: 0.000075
|
643 |
+
Train Summary | End of Epoch 58 | Time 16757.77s | Train Loss -17.292
|
644 |
+
Valid Summary | End of Epoch 58 | Time 451.37s | Valid Loss -15.072
|
645 |
+
Test Summary | End of Epoch 58 | Time 271.30s | Test Loss -14.426
|
646 |
+
Fund new best model, dict saved
|
647 |
+
Start evaluation
|
648 |
+
Avg SISNR:i tensor([14.6066], device='cuda:0')
|
649 |
+
Avg SNRi: 14.940969918598118
|
650 |
+
Avg PESQi: 1.4268671985467274
|
651 |
+
Avg STOIi: 0.27835948439816205
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731491824.dlc1h2tsljxspymy-master-0.29.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a0ad965ac2585d54b13e83b5de5bb0d2967b7ae8c6cd0861532bff4add3bafa9
|
3 |
+
size 384
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731566864.dlc1uz4efbcdp34x-master-0.26.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2160252c575b1e3359c6c0c5f2309527efbe3fe6e869decf4cdbc796cb312893
|
3 |
+
size 88
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731570913.dlc1uz4efbcdp34x-master-0.26.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c29d6ef55b1e1851d3485b45708a7cf03049f111e17a6033920f1eab518d56c4
|
3 |
+
size 532
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731640001.dlc1uz4efbcdp34x-master-0.26.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c32d0447eae6cb77d3eddd565f3516fd1e024b31811f04a11879198e17c60aaf
|
3 |
+
size 88
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731643472.dlc1uz4efbcdp34x-master-0.28.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8aae044127e823c4cc9ea6cc481ea6eabeb5fd5f87bb3a2b8ad35cf36878605f
|
3 |
+
size 88
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731649164.dlc1uz4efbcdp34x-master-0.26.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e2de87f9e4da9e5f302898f18fec7b98fd03b30dc2c62d01bd835faceb8b85b3
|
3 |
+
size 88
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731651543.dlc1uz4efbcdp34x-master-0.26.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6841260693be16e2b6bd7f515c717f9dad1c50533964d22675c05b00e5d3734e
|
3 |
+
size 88
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731669934.dlc1uz4efbcdp34x-master-0.26.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:270ffb2cb176948d1ea2da440a5d50b46a7be05ed6cbad94f41169feb46c8cfc
|
3 |
+
size 828
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731762939.dlc1uz4efbcdp34x-master-0.26.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b96b5861a382aea9865d499a1225e66231adcb049af3e31aefc1bd6ebcfc67d
|
3 |
+
size 88
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731781044.dlc1uz4efbcdp34x-master-0.27.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de26bf0c0e3884f53ca5d8a6c597f675da5ef8e9d49581b1ff6748ef0f5326ff
|
3 |
+
size 88
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731814134.dlc1uz4efbcdp34x-master-0.26.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7e7185b6b7e85505dd986ed96fe0a286cb8e800cfbf2ed7c9aac61754ac3005
|
3 |
+
size 88
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731827391.dlc1uz4efbcdp34x-master-0.26.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ae946e669399c51cc7aa25f798be88eac98aef8e5571148586b454ee7326810
|
3 |
+
size 88
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731834146.dlc1uz4efbcdp34x-master-0.26.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c1180605827e32bcf2889730e303d001add8febb870011a4edb25d7d82c99b2e
|
3 |
+
size 88
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731839773.dlc1uz4efbcdp34x-master-0.26.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a7c3058bfe5c6f3869e73d5e2d161ec68e73da7c1273e966b45e448c88d56fb
|
3 |
+
size 680
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731918387.dlc1uz4efbcdp34x-master-0.26.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f0eb3548fdebac2cbf101ad935ad8d7f51917d72dee57858e9676dc874920cd4
|
3 |
+
size 88
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731921663.dlc1uz4efbcdp34x-master-0.26.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11f0c59521ce9b83bf6179805a7713a9d9489034b58875db04a45a1adf6036a0
|
3 |
+
size 88
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731923844.dlc1uz4efbcdp34x-master-0.26.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f61011a39fb85d82d1cdfa1bce19d3f514c5b56db23bd900e0bbb0f60959f2c
|
3 |
+
size 88
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731932635.dlc1uz4efbcdp34x-master-0.26.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5e311917d55d395862d322a4ed2b0a71dc8df78b676b1d995c9cafddf7bd84b
|
3 |
+
size 88
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1731944330.dlc1uz4efbcdp34x-master-0.26.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f794719a9719ddde9b92f5cfe2f87fd0ccfb20ac93728aa8c650eb4ea7d41be9
|
3 |
+
size 532
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1732008941.dlc1uz4efbcdp34x-master-0.26.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e95f3f7fe5fadb0d936eebf61fabe84d83e4f24d83a5e4b44f07e80958eb8ef4
|
3 |
+
size 88
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1732009565.dlc1uz4efbcdp34x-master-0.26.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9720bbc513cc36bc9e0bfa8529e76ee617ea3648d669e1f0d1a1b41df1894ca8
|
3 |
+
size 532
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1732070579.dlc1uz4efbcdp34x-master-0.25.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f513dbcba288b9fef4eb6a7cc83f8d9d6a257eb21aa628b12d0dada4fcfbb3e
|
3 |
+
size 88
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1732085511.dlc1evi5tz54lvk8-master-0.26.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64ba57cf85eed97578201593a1bc0c77c3b99e71f79c0d16858e6ac3a70a2df5
|
3 |
+
size 88
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1732091307.dlc1evi5tz54lvk8-master-0.26.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98e4f82410859aa33db6b3d99b9bcde41dcd90acbb1323a1e1d2da079f02dcdd
|
3 |
+
size 88
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1732108121.dlc1evi5tz54lvk8-master-0.26.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cac4681a18718510fcd4ec3f1e14470a644718aeb0da171a227e63a2349e23a4
|
3 |
+
size 4084
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1732592656.dlc1evi5tz54lvk8-master-0.26.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e2433e3b44c9faa7040b63420c4bb1658fc1566d625fd2a3532c2b83f810c37
|
3 |
+
size 236
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1732622764.dlc1evi5tz54lvk8-master-0.27.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6793ebf66252a0c9e41f0fa71cff90b2aa2eb99511fd73a075ee0d52b6e88678
|
3 |
+
size 88
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1732631132.dlc1evi5tz54lvk8-master-0.24.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87ef87f9f8c35aa0471772c3443c87ed8663a879afffb5baf502696c62104bbd
|
3 |
+
size 88
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1732635246.dlc1evi5tz54lvk8-master-0.26.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7b06841c2a29b35daa0cbcb53f24ae7cd00854eb19970a29f9d651a301b592f
|
3 |
+
size 88
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1732656623.dlc1evi5tz54lvk8-master-0.26.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d5f0f1d51ddf3deb97400671d4c482b5a2270d13f46bbb36e6b807cae61a6e36
|
3 |
+
size 88
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1732674150.dlcb6t1c7cg4v7av-master-0.26.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5887ef9bc1cb43f1b554a94718302ed74cc1ebc1c6b3fc98e7ccf00861032456
|
3 |
+
size 88
|
checkpoints/log_VoxCeleb2_lip_mossformer2_2spk/tensorboard/events.out.tfevents.1732683557.dlcb6t1c7cg4v7av-master-0.26.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3631cfe3013f3289896f1c9aa874bba04168a82e0cc95028b97c2023b6b3e1e
|
3 |
+
size 1568
|