mingyuan commited on
Commit
a0d91d3
·
1 Parent(s): 39ee545

initial commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +1 -0
  2. README.md +6 -9
  3. app.py +123 -0
  4. configs/_base_/datasets/human_ml3d_bs128.py +60 -0
  5. configs/_base_/datasets/kit_ml_bs128.py +60 -0
  6. configs/mdm/mdm_t2m_official.py +67 -0
  7. configs/motiondiffuse/motiondiffuse_kit.py +89 -0
  8. configs/motiondiffuse/motiondiffuse_t2m.py +90 -0
  9. configs/remodiffuse/remodiffuse_kit.py +141 -0
  10. configs/remodiffuse/remodiffuse_t2m.py +141 -0
  11. data/database/t2m_text_train.npz +3 -0
  12. data/datasets/human_ml3d/mean.npy +3 -0
  13. data/datasets/human_ml3d/std.npy +3 -0
  14. data/datasets/kit_ml/mean.npy +3 -0
  15. data/datasets/kit_ml/std.npy +3 -0
  16. logs/mdm/mdm_t2m/latest.pth +3 -0
  17. logs/motiondiffuse/motiondiffuse_t2m/latest.pth +3 -0
  18. logs/remodiffuse/remodiffuse_t2m/latest.pth +3 -0
  19. mogen/__init__.py +56 -0
  20. mogen/apis/__init__.py +13 -0
  21. mogen/apis/test.py +160 -0
  22. mogen/apis/train.py +165 -0
  23. mogen/core/__init__.py +0 -0
  24. mogen/core/distributed_wrapper.py +136 -0
  25. mogen/core/evaluation/__init__.py +4 -0
  26. mogen/core/evaluation/builder.py +29 -0
  27. mogen/core/evaluation/eval_hooks.py +138 -0
  28. mogen/core/evaluation/evaluators/__init__.py +0 -0
  29. mogen/core/evaluation/evaluators/base_evaluator.py +144 -0
  30. mogen/core/evaluation/evaluators/diversity_evaluator.py +52 -0
  31. mogen/core/evaluation/evaluators/fid_evaluator.py +58 -0
  32. mogen/core/evaluation/evaluators/matching_score_evaluator.py +71 -0
  33. mogen/core/evaluation/evaluators/multimodality_evaluator.py +63 -0
  34. mogen/core/evaluation/evaluators/precision_evaluator.py +74 -0
  35. mogen/core/evaluation/get_model.py +46 -0
  36. mogen/core/evaluation/utils.py +130 -0
  37. mogen/core/optimizer/__init__.py +3 -0
  38. mogen/core/optimizer/builder.py +52 -0
  39. mogen/datasets/__init__.py +11 -0
  40. mogen/datasets/base_dataset.py +117 -0
  41. mogen/datasets/builder.py +113 -0
  42. mogen/datasets/dataset_wrappers.py +42 -0
  43. mogen/datasets/pipelines/__init__.py +18 -0
  44. mogen/datasets/pipelines/compose.py +42 -0
  45. mogen/datasets/pipelines/formatting.py +134 -0
  46. mogen/datasets/pipelines/transforms.py +120 -0
  47. mogen/datasets/samplers/__init__.py +3 -0
  48. mogen/datasets/samplers/distributed_sampler.py +42 -0
  49. mogen/datasets/text_motion_dataset.py +93 -0
  50. mogen/models/__init__.py +7 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ **__pycache__**
README.md CHANGED
@@ -1,12 +1,9 @@
1
- ---
2
- title: ReMoDiffuse
3
- emoji: 📚
4
- colorFrom: red
5
- colorTo: gray
6
  sdk: gradio
7
- sdk_version: 3.43.2
8
  app_file: app.py
9
  pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ title: MotionDiffuse
2
+ emoji: 🏢
3
+ colorFrom: blue
4
+ colorTo: red
 
5
  sdk: gradio
6
+ sdk_version: 3.44.1
7
  app_file: app.py
8
  pinned: false
9
+ license: mit
 
 
app.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import gradio as gr
4
+
5
+ os.makedirs("outputs", exist_ok=True)
6
+ sys.path.insert(0, '.')
7
+
8
+ import argparse
9
+ import os.path as osp
10
+ import mmcv
11
+ import numpy as np
12
+ import torch
13
+ from mogen.models import build_architecture
14
+ from mmcv.runner import load_checkpoint
15
+ from mmcv.parallel import MMDataParallel
16
+ from mogen.utils.plot_utils import (
17
+ recover_from_ric,
18
+ plot_3d_motion,
19
+ t2m_kinematic_chain
20
+ )
21
+ from scipy.ndimage import gaussian_filter
22
+ from IPython.display import Image
23
+
24
+
25
+ def motion_temporal_filter(motion, sigma=1):
26
+ motion = motion.reshape(motion.shape[0], -1)
27
+ for i in range(motion.shape[1]):
28
+ motion[:, i] = gaussian_filter(motion[:, i], sigma=sigma, mode="nearest")
29
+ return motion.reshape(motion.shape[0], -1, 3)
30
+
31
+
32
+ def plot_t2m(data, result_path, npy_path, caption):
33
+ joint = recover_from_ric(torch.from_numpy(data).float(), 22).numpy()
34
+ joint = motion_temporal_filter(joint, sigma=2.5)
35
+ plot_3d_motion(result_path, t2m_kinematic_chain, joint, title=caption, fps=20)
36
+ if npy_path is not None:
37
+ np.save(npy_path, joint)
38
+
39
+ def create_remodiffuse():
40
+ config_path = "configs/remodiffuse/remodiffuse_t2m.py"
41
+ ckpt_path = "logs/remodiffuse/remodiffuse_t2m/latest.pth"
42
+ cfg = mmcv.Config.fromfile(config_path)
43
+ model = build_architecture(cfg.model)
44
+ load_checkpoint(model, ckpt_path, map_location='cpu')
45
+ model.cpu()
46
+ model.eval()
47
+ return model
48
+
49
+ def create_motiondiffuse():
50
+ config_path = "configs/motiondiffuse/motiondiffuse_t2m.py"
51
+ ckpt_path = "logs/motiondiffuse/motiondiffuse_t2m/latest.pth"
52
+ cfg = mmcv.Config.fromfile(config_path)
53
+ model = build_architecture(cfg.model)
54
+ load_checkpoint(model, ckpt_path, map_location='cpu')
55
+ model.cpu()
56
+ model.eval()
57
+ return model
58
+
59
+ def create_mdm():
60
+ config_path = "configs/mdm/mdm_t2m_official.py"
61
+ ckpt_path = "logs/mdm/mdm_t2m/latest.pth"
62
+ cfg = mmcv.Config.fromfile(config_path)
63
+ model = build_architecture(cfg.model)
64
+ load_checkpoint(model, ckpt_path, map_location='cpu')
65
+ model.cpu()
66
+ model.eval()
67
+ return model
68
+
69
+ model_remodiffuse = create_remodiffuse()
70
+ # model_motiondiffuse = create_motiondiffuse()
71
+ # model_mdm = create_mdm()
72
+
73
+ mean_path = "data/datasets/human_ml3d/mean.npy"
74
+ std_path = "data/datasets/human_ml3d/std.npy"
75
+ mean = np.load(mean_path)
76
+ std = np.load(std_path)
77
+
78
+
79
+ def show_generation_result(model, text, motion_length, result_path):
80
+ device = 'cpu'
81
+ motion = torch.zeros(1, motion_length, 263).to(device)
82
+ motion_mask = torch.ones(1, motion_length).to(device)
83
+ motion_length = torch.Tensor([motion_length]).long().to(device)
84
+ model = model.to(device)
85
+ input = {
86
+ 'motion': motion,
87
+ 'motion_mask': motion_mask,
88
+ 'motion_length': motion_length,
89
+ 'motion_metas': [{'text': text}],
90
+ }
91
+
92
+ all_pred_motion = []
93
+ with torch.no_grad():
94
+ input['inference_kwargs'] = {}
95
+ output_list = []
96
+ output = model(**input)[0]['pred_motion']
97
+ pred_motion = output.cpu().detach().numpy()
98
+ pred_motion = pred_motion * std + mean
99
+
100
+ plot_t2m(pred_motion, result_path, None, text)
101
+
102
+ def generate(prompt, length):
103
+ if not os.path.exists("outputs"):
104
+ os.mkdir("outputs")
105
+ result_path = "outputs/" + str(hash(prompt)) + ".mp4"
106
+ show_generation_result(model_remodiffuse, prompt, length, result_path)
107
+ return result_path
108
+
109
+ demo = gr.Interface(
110
+ fn=generate,
111
+ inputs=["text", gr.Slider(20, 196, value=60)],
112
+ examples=[
113
+ ["the man throws a punch with each hand.", 58],
114
+ ["a person spins quickly and takes off running.", 29],
115
+ ["a person quickly waves with their right hand", 46],
116
+ ["a person performing a slight bow", 89],
117
+ ],
118
+ outputs="video",
119
+ title="ReMoDiffuse: Retrieval-Augmented Motion Diffusion Model",
120
+ description="This is an interactive demo for ReMoDiffuse. For more information, feel free to visit our project page(https://mingyuan-zhang.github.io/projects/ReMoDiffuse.html).")
121
+
122
+ demo.queue()
123
+ demo.launch()
configs/_base_/datasets/human_ml3d_bs128.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ data_keys = ['motion', 'motion_mask', 'motion_length', 'clip_feat']
3
+ meta_keys = ['text', 'token']
4
+ train_pipeline = [
5
+ dict(
6
+ type='Normalize',
7
+ mean_path='data/datasets/human_ml3d/mean.npy',
8
+ std_path='data/datasets/human_ml3d/std.npy'),
9
+ dict(type='Crop', crop_size=196),
10
+ dict(type='ToTensor', keys=data_keys),
11
+ dict(type='Collect', keys=data_keys, meta_keys=meta_keys)
12
+ ]
13
+
14
+ data = dict(
15
+ samples_per_gpu=128,
16
+ workers_per_gpu=1,
17
+ train=dict(
18
+ type='RepeatDataset',
19
+ dataset=dict(
20
+ type='TextMotionDataset',
21
+ dataset_name='human_ml3d',
22
+ data_prefix='data',
23
+ pipeline=train_pipeline,
24
+ ann_file='train.txt',
25
+ motion_dir='motions',
26
+ text_dir='texts',
27
+ token_dir='tokens',
28
+ clip_feat_dir='clip_feats',
29
+ ),
30
+ times=200
31
+ ),
32
+ test=dict(
33
+ type='TextMotionDataset',
34
+ dataset_name='human_ml3d',
35
+ data_prefix='data',
36
+ pipeline=train_pipeline,
37
+ ann_file='test.txt',
38
+ motion_dir='motions',
39
+ text_dir='texts',
40
+ token_dir='tokens',
41
+ clip_feat_dir='clip_feats',
42
+ eval_cfg=dict(
43
+ shuffle_indexes=True,
44
+ replication_times=20,
45
+ replication_reduction='statistics',
46
+ text_encoder_name='human_ml3d',
47
+ text_encoder_path='data/evaluators/human_ml3d/finest.tar',
48
+ motion_encoder_name='human_ml3d',
49
+ motion_encoder_path='data/evaluators/human_ml3d/finest.tar',
50
+ metrics=[
51
+ dict(type='R Precision', batch_size=32, top_k=3),
52
+ dict(type='Matching Score', batch_size=32),
53
+ dict(type='FID'),
54
+ dict(type='Diversity', num_samples=300),
55
+ dict(type='MultiModality', num_samples=100, num_repeats=30, num_picks=10)
56
+ ]
57
+ ),
58
+ test_mode=True
59
+ )
60
+ )
configs/_base_/datasets/kit_ml_bs128.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ data_keys = ['motion', 'motion_mask', 'motion_length', 'clip_feat']
3
+ meta_keys = ['text', 'token']
4
+ train_pipeline = [
5
+ dict(type='Crop', crop_size=196),
6
+ dict(
7
+ type='Normalize',
8
+ mean_path='data/datasets/kit_ml/mean.npy',
9
+ std_path='data/datasets/kit_ml/std.npy'),
10
+ dict(type='ToTensor', keys=data_keys),
11
+ dict(type='Collect', keys=data_keys, meta_keys=meta_keys)
12
+ ]
13
+
14
+ data = dict(
15
+ samples_per_gpu=128,
16
+ workers_per_gpu=1,
17
+ train=dict(
18
+ type='RepeatDataset',
19
+ dataset=dict(
20
+ type='TextMotionDataset',
21
+ dataset_name='kit_ml',
22
+ data_prefix='data',
23
+ pipeline=train_pipeline,
24
+ ann_file='train.txt',
25
+ motion_dir='motions',
26
+ text_dir='texts',
27
+ token_dir='tokens',
28
+ clip_feat_dir='clip_feats',
29
+ ),
30
+ times=100
31
+ ),
32
+ test=dict(
33
+ type='TextMotionDataset',
34
+ dataset_name='kit_ml',
35
+ data_prefix='data',
36
+ pipeline=train_pipeline,
37
+ ann_file='test.txt',
38
+ motion_dir='motions',
39
+ text_dir='texts',
40
+ token_dir='tokens',
41
+ clip_feat_dir='clip_feats',
42
+ eval_cfg=dict(
43
+ shuffle_indexes=True,
44
+ replication_times=20,
45
+ replication_reduction='statistics',
46
+ text_encoder_name='kit_ml',
47
+ text_encoder_path='data/evaluators/kit_ml/finest.tar',
48
+ motion_encoder_name='kit_ml',
49
+ motion_encoder_path='data/evaluators/kit_ml/finest.tar',
50
+ metrics=[
51
+ dict(type='R Precision', batch_size=32, top_k=3),
52
+ dict(type='Matching Score', batch_size=32),
53
+ dict(type='FID'),
54
+ dict(type='Diversity', num_samples=300),
55
+ dict(type='MultiModality', num_samples=50, num_repeats=30, num_picks=10)
56
+ ]
57
+ ),
58
+ test_mode=True
59
+ )
60
+ )
configs/mdm/mdm_t2m_official.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ['../_base_/datasets/human_ml3d_bs128.py']
2
+
3
+ # checkpoint saving
4
+ checkpoint_config = dict(interval=1)
5
+
6
+ dist_params = dict(backend='nccl')
7
+ log_level = 'INFO'
8
+ load_from = None
9
+ resume_from = None
10
+ workflow = [('train', 1)]
11
+
12
+ # optimizer
13
+ optimizer = dict(type='Adam', lr=1e-4)
14
+ optimizer_config = dict(grad_clip=None)
15
+ # learning policy
16
+ lr_config = dict(policy='step', step=[])
17
+ runner = dict(type='EpochBasedRunner', max_epochs=50)
18
+
19
+ log_config = dict(
20
+ interval=50,
21
+ hooks=[
22
+ dict(type='TextLoggerHook'),
23
+ # dict(type='TensorboardLoggerHook')
24
+ ])
25
+
26
+ input_feats = 263
27
+ max_seq_len = 196
28
+ latent_dim = 512
29
+ time_embed_dim = 2048
30
+ text_latent_dim = 256
31
+ ff_size = 1024
32
+ num_layers = 8
33
+ num_heads = 4
34
+ dropout = 0.1
35
+ cond_mask_prob = 0.1
36
+ # model settings
37
+ model = dict(
38
+ type='MotionDiffusion',
39
+ model=dict(
40
+ type='MDMTransformer',
41
+ input_feats=input_feats,
42
+ latent_dim=latent_dim,
43
+ ff_size=ff_size,
44
+ num_layers=num_layers,
45
+ num_heads=num_heads,
46
+ dropout=dropout,
47
+ time_embed_dim=time_embed_dim,
48
+ cond_mask_prob=cond_mask_prob,
49
+ guide_scale=2.5,
50
+ clip_version='ViT-B/32',
51
+ use_official_ckpt=True
52
+ ),
53
+ loss_recon=dict(type='MSELoss', loss_weight=1, reduction='none'),
54
+ diffusion_train=dict(
55
+ beta_scheduler='cosine',
56
+ diffusion_steps=1000,
57
+ model_mean_type='start_x',
58
+ model_var_type='fixed_small',
59
+ ),
60
+ diffusion_test=dict(
61
+ beta_scheduler='cosine',
62
+ diffusion_steps=1000,
63
+ model_mean_type='start_x',
64
+ model_var_type='fixed_small',
65
+ ),
66
+ inference_type='ddpm'
67
+ )
configs/motiondiffuse/motiondiffuse_kit.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ['../_base_/datasets/kit_ml_bs128.py']
2
+
3
+ # checkpoint saving
4
+ checkpoint_config = dict(interval=1)
5
+
6
+ dist_params = dict(backend='nccl')
7
+ log_level = 'INFO'
8
+ load_from = None
9
+ resume_from = None
10
+ workflow = [('train', 1)]
11
+
12
+ # optimizer
13
+ optimizer = dict(type='Adam', lr=2e-4)
14
+ optimizer_config = dict(grad_clip=None)
15
+ # learning policy
16
+ lr_config = dict(policy='step', step=[])
17
+ runner = dict(type='EpochBasedRunner', max_epochs=50)
18
+
19
+ log_config = dict(
20
+ interval=50,
21
+ hooks=[
22
+ dict(type='TextLoggerHook'),
23
+ # dict(type='TensorboardLoggerHook')
24
+ ])
25
+
26
+ input_feats = 251
27
+ max_seq_len = 196
28
+ latent_dim = 512
29
+ time_embed_dim = 2048
30
+ text_latent_dim = 256
31
+ ff_size = 1024
32
+ num_heads = 8
33
+ dropout = 0
34
+ # model settings
35
+ model = dict(
36
+ type='MotionDiffusion',
37
+ model=dict(
38
+ type='MotionDiffuseTransformer',
39
+ input_feats=input_feats,
40
+ max_seq_len=max_seq_len,
41
+ latent_dim=latent_dim,
42
+ time_embed_dim=time_embed_dim,
43
+ num_layers=8,
44
+ sa_block_cfg=dict(
45
+ type='EfficientSelfAttention',
46
+ latent_dim=latent_dim,
47
+ num_heads=num_heads,
48
+ dropout=dropout,
49
+ time_embed_dim=time_embed_dim
50
+ ),
51
+ ca_block_cfg=dict(
52
+ type='EfficientCrossAttention',
53
+ latent_dim=latent_dim,
54
+ text_latent_dim=text_latent_dim,
55
+ num_heads=num_heads,
56
+ dropout=dropout,
57
+ time_embed_dim=time_embed_dim
58
+ ),
59
+ ffn_cfg=dict(
60
+ latent_dim=latent_dim,
61
+ ffn_dim=ff_size,
62
+ dropout=dropout,
63
+ time_embed_dim=time_embed_dim
64
+ ),
65
+ text_encoder=dict(
66
+ pretrained_model='clip',
67
+ latent_dim=text_latent_dim,
68
+ num_layers=4,
69
+ num_heads=4,
70
+ ff_size=2048,
71
+ dropout=dropout,
72
+ use_text_proj=True
73
+ )
74
+ ),
75
+ loss_recon=dict(type='MSELoss', loss_weight=1, reduction='none'),
76
+ diffusion_train=dict(
77
+ beta_scheduler='linear',
78
+ diffusion_steps=1000,
79
+ model_mean_type='epsilon',
80
+ model_var_type='fixed_small',
81
+ ),
82
+ diffusion_test=dict(
83
+ beta_scheduler='linear',
84
+ diffusion_steps=1000,
85
+ model_mean_type='epsilon',
86
+ model_var_type='fixed_small',
87
+ ),
88
+ inference_type='ddpm'
89
+ )
configs/motiondiffuse/motiondiffuse_t2m.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ['../_base_/datasets/human_ml3d_bs128.py']
2
+
3
+ # checkpoint saving
4
+ checkpoint_config = dict(interval=1)
5
+
6
+ dist_params = dict(backend='nccl')
7
+ log_level = 'INFO'
8
+ load_from = None
9
+ resume_from = None
10
+ workflow = [('train', 1)]
11
+
12
+ # optimizer
13
+ optimizer = dict(type='Adam', lr=2e-4)
14
+ optimizer_config = dict(grad_clip=None)
15
+ # learning policy
16
+ lr_config = dict(policy='step', step=[])
17
+ runner = dict(type='EpochBasedRunner', max_epochs=50)
18
+
19
+ log_config = dict(
20
+ interval=50,
21
+ hooks=[
22
+ dict(type='TextLoggerHook'),
23
+ # dict(type='TensorboardLoggerHook')
24
+ ])
25
+
26
+ input_feats = 263
27
+ max_seq_len = 196
28
+ latent_dim = 512
29
+ time_embed_dim = 2048
30
+ text_latent_dim = 256
31
+ ff_size = 1024
32
+ num_heads = 8
33
+ dropout = 0
34
+ # model settings
35
+ model = dict(
36
+ type='MotionDiffusion',
37
+ model=dict(
38
+ type='MotionDiffuseTransformer',
39
+ input_feats=input_feats,
40
+ max_seq_len=max_seq_len,
41
+ latent_dim=latent_dim,
42
+ time_embed_dim=time_embed_dim,
43
+ num_layers=8,
44
+ sa_block_cfg=dict(
45
+ type='EfficientSelfAttention',
46
+ latent_dim=latent_dim,
47
+ num_heads=num_heads,
48
+ dropout=dropout,
49
+ time_embed_dim=time_embed_dim
50
+ ),
51
+ ca_block_cfg=dict(
52
+ type='EfficientCrossAttention',
53
+ latent_dim=latent_dim,
54
+ text_latent_dim=text_latent_dim,
55
+ num_heads=num_heads,
56
+ dropout=dropout,
57
+ time_embed_dim=time_embed_dim
58
+ ),
59
+ ffn_cfg=dict(
60
+ latent_dim=latent_dim,
61
+ ffn_dim=ff_size,
62
+ dropout=dropout,
63
+ time_embed_dim=time_embed_dim
64
+ ),
65
+ text_encoder=dict(
66
+ pretrained_model='clip',
67
+ latent_dim=text_latent_dim,
68
+ num_layers=4,
69
+ num_heads=4,
70
+ ff_size=2048,
71
+ dropout=dropout,
72
+ use_text_proj=True
73
+ )
74
+ ),
75
+ loss_recon=dict(type='MSELoss', loss_weight=1, reduction='none'),
76
+ diffusion_train=dict(
77
+ beta_scheduler='linear',
78
+ diffusion_steps=1000,
79
+ model_mean_type='epsilon',
80
+ model_var_type='fixed_small',
81
+ ),
82
+ diffusion_test=dict(
83
+ beta_scheduler='linear',
84
+ diffusion_steps=1000,
85
+ model_mean_type='epsilon',
86
+ model_var_type='fixed_small',
87
+ ),
88
+ inference_type='ddpm'
89
+ )
90
+ data = dict(samples_per_gpu=128)
configs/remodiffuse/remodiffuse_kit.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ['../_base_/datasets/kit_ml_bs128.py']
2
+
3
+ # checkpoint saving
4
+ checkpoint_config = dict(interval=1)
5
+
6
+ dist_params = dict(backend='nccl')
7
+ log_level = 'INFO'
8
+ load_from = None
9
+ resume_from = None
10
+ workflow = [('train', 1)]
11
+
12
+ # optimizer
13
+ optimizer = dict(type='Adam', lr=2e-4)
14
+ optimizer_config = dict(grad_clip=None)
15
+ # learning policy
16
+ lr_config = dict(policy='CosineAnnealing', min_lr_ratio=2e-5, by_epoch=False)
17
+ runner = dict(type='EpochBasedRunner', max_epochs=20)
18
+
19
+ log_config = dict(
20
+ interval=50,
21
+ hooks=[
22
+ dict(type='TextLoggerHook'),
23
+ # dict(type='TensorboardLoggerHook')
24
+ ])
25
+
26
+ input_feats = 251
27
+ max_seq_len = 196
28
+ latent_dim = 512
29
+ time_embed_dim = 2048
30
+ text_latent_dim = 256
31
+ ff_size = 1024
32
+ num_heads = 8
33
+ dropout = 0
34
+
35
+ def scale_func(timestep):
36
+ import random
37
+ w = (1 - (1000 - timestep) / 1000) * 4.0 + 1
38
+ if timestep > 100:
39
+ if random.randint(0, 1) == 0:
40
+ output = {
41
+ 'both_coef': w,
42
+ 'text_coef': 0,
43
+ 'retr_coef': 1 - w,
44
+ 'none_coef': 0
45
+ }
46
+ else:
47
+ output = {
48
+ 'both_coef': 0,
49
+ 'text_coef': w,
50
+ 'retr_coef': 0,
51
+ 'none_coef': 1 - w
52
+ }
53
+ else:
54
+ both_coef = 0.78123
55
+ text_coef = 0.39284
56
+ retr_coef = -0.12475
57
+ none_coef = 1 - both_coef - text_coef - retr_coef
58
+ output = {
59
+ 'both_coef': both_coef,
60
+ 'text_coef': text_coef,
61
+ 'retr_coef': retr_coef,
62
+ 'none_coef': none_coef
63
+ }
64
+ return output
65
+
66
+ # model settings
67
+ model = dict(
68
+ type='MotionDiffusion',
69
+ model=dict(
70
+ type='ReMoDiffuseTransformer',
71
+ input_feats=input_feats,
72
+ max_seq_len=max_seq_len,
73
+ latent_dim=latent_dim,
74
+ time_embed_dim=time_embed_dim,
75
+ num_layers=4,
76
+ ca_block_cfg=dict(
77
+ type='SemanticsModulatedAttention',
78
+ latent_dim=latent_dim,
79
+ text_latent_dim=text_latent_dim,
80
+ num_heads=num_heads,
81
+ dropout=dropout,
82
+ time_embed_dim=time_embed_dim
83
+ ),
84
+ ffn_cfg=dict(
85
+ latent_dim=latent_dim,
86
+ ffn_dim=ff_size,
87
+ dropout=dropout,
88
+ time_embed_dim=time_embed_dim
89
+ ),
90
+ text_encoder=dict(
91
+ pretrained_model='clip',
92
+ latent_dim=text_latent_dim,
93
+ num_layers=2,
94
+ ff_size=2048,
95
+ dropout=dropout,
96
+ use_text_proj=False
97
+ ),
98
+ retrieval_cfg=dict(
99
+ num_retrieval=2,
100
+ stride=4,
101
+ num_layers=2,
102
+ num_motion_layers=2,
103
+ kinematic_coef=0.1,
104
+ topk=2,
105
+ retrieval_file='data/database/kit_text_train.npz',
106
+ latent_dim=latent_dim,
107
+ output_dim=latent_dim,
108
+ max_seq_len=max_seq_len,
109
+ num_heads=num_heads,
110
+ ff_size=ff_size,
111
+ dropout=dropout,
112
+ ffn_cfg=dict(
113
+ latent_dim=latent_dim,
114
+ ffn_dim=ff_size,
115
+ dropout=dropout,
116
+ ),
117
+ sa_block_cfg=dict(
118
+ type='EfficientSelfAttention',
119
+ latent_dim=latent_dim,
120
+ num_heads=num_heads,
121
+ dropout=dropout
122
+ ),
123
+ ),
124
+ scale_func=scale_func
125
+ ),
126
+ loss_recon=dict(type='MSELoss', loss_weight=1, reduction='none'),
127
+ diffusion_train=dict(
128
+ beta_scheduler='linear',
129
+ diffusion_steps=1000,
130
+ model_mean_type='start_x',
131
+ model_var_type='fixed_large',
132
+ ),
133
+ diffusion_test=dict(
134
+ beta_scheduler='linear',
135
+ diffusion_steps=1000,
136
+ model_mean_type='start_x',
137
+ model_var_type='fixed_large',
138
+ respace='15,15,8,6,6',
139
+ ),
140
+ inference_type='ddim'
141
+ )
configs/remodiffuse/remodiffuse_t2m.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ['../_base_/datasets/human_ml3d_bs128.py']
2
+
3
+ # checkpoint saving
4
+ checkpoint_config = dict(interval=1)
5
+
6
+ dist_params = dict(backend='nccl')
7
+ log_level = 'INFO'
8
+ load_from = None
9
+ resume_from = None
10
+ workflow = [('train', 1)]
11
+
12
+ # optimizer
13
+ optimizer = dict(type='Adam', lr=2e-4)
14
+ optimizer_config = dict(grad_clip=None)
15
+ # learning policy
16
+ lr_config = dict(policy='CosineAnnealing', min_lr_ratio=2e-5, by_epoch=False)
17
+ runner = dict(type='EpochBasedRunner', max_epochs=40)
18
+
19
+ log_config = dict(
20
+ interval=50,
21
+ hooks=[
22
+ dict(type='TextLoggerHook'),
23
+ # dict(type='TensorboardLoggerHook')
24
+ ])
25
+
26
+ input_feats = 263
27
+ max_seq_len = 196
28
+ latent_dim = 512
29
+ time_embed_dim = 2048
30
+ text_latent_dim = 256
31
+ ff_size = 1024
32
+ num_heads = 8
33
+ dropout = 0
34
+
35
+ def scale_func(timestep):
36
+ import random
37
+ w = (1 - (1000 - timestep) / 1000) * 6.5 + 1
38
+ if timestep > 100:
39
+ if random.randint(0, 1) == 0:
40
+ output = {
41
+ 'both_coef': w,
42
+ 'text_coef': 0,
43
+ 'retr_coef': 1 - w,
44
+ 'none_coef': 0
45
+ }
46
+ else:
47
+ output = {
48
+ 'both_coef': 0,
49
+ 'text_coef': w,
50
+ 'retr_coef': 0,
51
+ 'none_coef': 1 - w
52
+ }
53
+ else:
54
+ both_coef = 0.52351
55
+ text_coef = -0.28419
56
+ retr_coef = 2.39872
57
+ none_coef = 1 - both_coef - text_coef - retr_coef
58
+ output = {
59
+ 'both_coef': both_coef,
60
+ 'text_coef': text_coef,
61
+ 'retr_coef': retr_coef,
62
+ 'none_coef': none_coef
63
+ }
64
+ return output
65
+
66
+ # model settings
67
+ model = dict(
68
+ type='MotionDiffusion',
69
+ model=dict(
70
+ type='ReMoDiffuseTransformer',
71
+ input_feats=input_feats,
72
+ max_seq_len=max_seq_len,
73
+ latent_dim=latent_dim,
74
+ time_embed_dim=time_embed_dim,
75
+ num_layers=4,
76
+ ca_block_cfg=dict(
77
+ type='SemanticsModulatedAttention',
78
+ latent_dim=latent_dim,
79
+ text_latent_dim=text_latent_dim,
80
+ num_heads=num_heads,
81
+ dropout=dropout,
82
+ time_embed_dim=time_embed_dim
83
+ ),
84
+ ffn_cfg=dict(
85
+ latent_dim=latent_dim,
86
+ ffn_dim=ff_size,
87
+ dropout=dropout,
88
+ time_embed_dim=time_embed_dim
89
+ ),
90
+ text_encoder=dict(
91
+ pretrained_model='clip',
92
+ latent_dim=text_latent_dim,
93
+ num_layers=2,
94
+ ff_size=2048,
95
+ dropout=dropout,
96
+ use_text_proj=False
97
+ ),
98
+ retrieval_cfg=dict(
99
+ num_retrieval=2,
100
+ stride=4,
101
+ num_layers=2,
102
+ num_motion_layers=2,
103
+ kinematic_coef=0.1,
104
+ topk=2,
105
+ retrieval_file='data/database/t2m_text_train.npz',
106
+ latent_dim=latent_dim,
107
+ output_dim=latent_dim,
108
+ max_seq_len=max_seq_len,
109
+ num_heads=num_heads,
110
+ ff_size=ff_size,
111
+ dropout=dropout,
112
+ ffn_cfg=dict(
113
+ latent_dim=latent_dim,
114
+ ffn_dim=ff_size,
115
+ dropout=dropout,
116
+ ),
117
+ sa_block_cfg=dict(
118
+ type='EfficientSelfAttention',
119
+ latent_dim=latent_dim,
120
+ num_heads=num_heads,
121
+ dropout=dropout
122
+ ),
123
+ ),
124
+ scale_func=scale_func
125
+ ),
126
+ loss_recon=dict(type='MSELoss', loss_weight=1, reduction='none'),
127
+ diffusion_train=dict(
128
+ beta_scheduler='linear',
129
+ diffusion_steps=1000,
130
+ model_mean_type='start_x',
131
+ model_var_type='fixed_large',
132
+ ),
133
+ diffusion_test=dict(
134
+ beta_scheduler='linear',
135
+ diffusion_steps=1000,
136
+ model_mean_type='start_x',
137
+ model_var_type='fixed_large',
138
+ respace='15,15,8,6,6',
139
+ ),
140
+ inference_type='ddim'
141
+ )
data/database/t2m_text_train.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae3575b686e29623f9e1715345b052726650f53c5bfcc770d9fb87a827a60249
3
+ size 1462801786
data/datasets/human_ml3d/mean.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d73483a5b53e017b4044fe363164d7c185082a02ae7f69525ea70c5ccfd4a85
3
+ size 1180
data/datasets/human_ml3d/std.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a6d720e004b6da18e8033d739de6078cbc7c1c8fad0ff62eee86f173e4430a2
3
+ size 1180
data/datasets/kit_ml/mean.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e23fac51db2215ab5666324226be48f27efd6a6e7b22ebd17c28e0f056a7c22
3
+ size 2136
data/datasets/kit_ml/std.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:296a60656cea07e65ee64512d73d47c0412df0698b35194116330661be32fa90
3
+ size 2136
logs/mdm/mdm_t2m/latest.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8810255fb8df9eed6211537de9826f07ff73862f367cbf91532d84fd4c9a497e
3
+ size 81791550
logs/motiondiffuse/motiondiffuse_t2m/latest.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:521baa6ba60865710bc75b99f393b133e45dc18083229a2258a16e5dc65f904a
3
+ size 348728194
logs/remodiffuse/remodiffuse_t2m/latest.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaa34b3328942769478e96283678424c95c4b817ca6f7162c4cf1fc512d4951b
3
+ size 187939375
mogen/__init__.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+
3
+ import mmcv
4
+ from packaging.version import parse
5
+
6
+ from .version import __version__
7
+
8
+
9
+ def digit_version(version_str: str, length: int = 4):
10
+ """Convert a version string into a tuple of integers.
11
+ This method is usually used for comparing two versions. For pre-release
12
+ versions: alpha < beta < rc.
13
+ Args:
14
+ version_str (str): The version string.
15
+ length (int): The maximum number of version levels. Default: 4.
16
+ Returns:
17
+ tuple[int]: The version info in digits (integers).
18
+ """
19
+ version = parse(version_str)
20
+ assert version.release, f'failed to parse version {version_str}'
21
+ release = list(version.release)
22
+ release = release[:length]
23
+ if len(release) < length:
24
+ release = release + [0] * (length - len(release))
25
+ if version.is_prerelease:
26
+ mapping = {'a': -3, 'b': -2, 'rc': -1}
27
+ val = -4
28
+ # version.pre can be None
29
+ if version.pre:
30
+ if version.pre[0] not in mapping:
31
+ warnings.warn(f'unknown prerelease version {version.pre[0]}, '
32
+ 'version checking may go wrong')
33
+ else:
34
+ val = mapping[version.pre[0]]
35
+ release.extend([val, version.pre[-1]])
36
+ else:
37
+ release.extend([val, 0])
38
+
39
+ elif version.is_postrelease:
40
+ release.extend([1, version.post])
41
+ else:
42
+ release.extend([0, 0])
43
+ return tuple(release)
44
+
45
+
46
+ mmcv_minimum_version = '1.4.2'
47
+ mmcv_maximum_version = '1.9.0'
48
+ mmcv_version = digit_version(mmcv.__version__)
49
+
50
+
51
+ assert (mmcv_version >= digit_version(mmcv_minimum_version)
52
+ and mmcv_version <= digit_version(mmcv_maximum_version)), \
53
+ f'MMCV=={mmcv.__version__} is used but incompatible. ' \
54
+ f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.'
55
+
56
+ __all__ = ['__version__', 'digit_version']
mogen/apis/__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from mogen.apis import test, train
2
+ from mogen.apis.test import (
3
+ collect_results_cpu,
4
+ collect_results_gpu,
5
+ multi_gpu_test,
6
+ single_gpu_test,
7
+ )
8
+ from mogen.apis.train import set_random_seed, train_model
9
+
10
+ __all__ = [
11
+ 'collect_results_cpu', 'collect_results_gpu', 'multi_gpu_test',
12
+ 'single_gpu_test', 'set_random_seed', 'train_model'
13
+ ]
mogen/apis/test.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os.path as osp
2
+ import pickle
3
+ import shutil
4
+ import tempfile
5
+ import time
6
+
7
+ import mmcv
8
+ import torch
9
+ import torch.distributed as dist
10
+ from mmcv.runner import get_dist_info
11
+
12
+
13
+ def single_gpu_test(model, data_loader):
14
+ """Test with single gpu."""
15
+ model.eval()
16
+ results = []
17
+ dataset = data_loader.dataset
18
+ prog_bar = mmcv.ProgressBar(len(dataset))
19
+ for i, data in enumerate(data_loader):
20
+ with torch.no_grad():
21
+ result = model(return_loss=False, **data)
22
+
23
+ batch_size = len(result)
24
+ if isinstance(result, list):
25
+ results.extend(result)
26
+ else:
27
+ results.append(result)
28
+
29
+ batch_size = data['motion'].size(0)
30
+ for _ in range(batch_size):
31
+ prog_bar.update()
32
+ return results
33
+
34
+
35
+ def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):
36
+ """Test model with multiple gpus.
37
+ This method tests model with multiple gpus and collects the results
38
+ under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'
39
+ it encodes results to gpu tensors and use gpu communication for results
40
+ collection. On cpu mode it saves the results on different gpus to 'tmpdir'
41
+ and collects them by the rank 0 worker.
42
+ Args:
43
+ model (nn.Module): Model to be tested.
44
+ data_loader (nn.Dataloader): Pytorch data loader.
45
+ tmpdir (str): Path of directory to save the temporary results from
46
+ different gpus under cpu mode.
47
+ gpu_collect (bool): Option to use either gpu or cpu to collect results.
48
+ Returns:
49
+ list: The prediction results.
50
+ """
51
+ model.eval()
52
+ results = []
53
+ dataset = data_loader.dataset
54
+ rank, world_size = get_dist_info()
55
+ if rank == 0:
56
+ # Check if tmpdir is valid for cpu_collect
57
+ if (not gpu_collect) and (tmpdir is not None and osp.exists(tmpdir)):
58
+ raise OSError((f'The tmpdir {tmpdir} already exists.',
59
+ ' Since tmpdir will be deleted after testing,',
60
+ ' please make sure you specify an empty one.'))
61
+ prog_bar = mmcv.ProgressBar(len(dataset))
62
+ time.sleep(2) # This line can prevent deadlock problem in some cases.
63
+ for i, data in enumerate(data_loader):
64
+ with torch.no_grad():
65
+ result = model(return_loss=False, **data)
66
+ if isinstance(result, list):
67
+ results.extend(result)
68
+ else:
69
+ results.append(result)
70
+
71
+ if rank == 0:
72
+ batch_size = data['motion'].size(0)
73
+ for _ in range(batch_size * world_size):
74
+ prog_bar.update()
75
+
76
+ # collect results from all ranks
77
+ if gpu_collect:
78
+ results = collect_results_gpu(results, len(dataset))
79
+ else:
80
+ results = collect_results_cpu(results, len(dataset), tmpdir)
81
+ return results
82
+
83
+
84
+ def collect_results_cpu(result_part, size, tmpdir=None):
85
+ """Collect results in cpu."""
86
+ rank, world_size = get_dist_info()
87
+ # create a tmp dir if it is not specified
88
+ if tmpdir is None:
89
+ MAX_LEN = 512
90
+ # 32 is whitespace
91
+ dir_tensor = torch.full((MAX_LEN, ),
92
+ 32,
93
+ dtype=torch.uint8,
94
+ device='cuda')
95
+ if rank == 0:
96
+ mmcv.mkdir_or_exist('.dist_test')
97
+ tmpdir = tempfile.mkdtemp(dir='.dist_test')
98
+ tmpdir = torch.tensor(
99
+ bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
100
+ dir_tensor[:len(tmpdir)] = tmpdir
101
+ dist.broadcast(dir_tensor, 0)
102
+ tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
103
+ else:
104
+ mmcv.mkdir_or_exist(tmpdir)
105
+ # dump the part result to the dir
106
+ mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl'))
107
+ dist.barrier()
108
+ # collect all parts
109
+ if rank != 0:
110
+ return None
111
+ else:
112
+ # load results of all parts from tmp dir
113
+ part_list = []
114
+ for i in range(world_size):
115
+ part_file = osp.join(tmpdir, f'part_{i}.pkl')
116
+ part_result = mmcv.load(part_file)
117
+ part_list.append(part_result)
118
+ # sort the results
119
+ ordered_results = []
120
+ for res in zip(*part_list):
121
+ ordered_results.extend(list(res))
122
+ # the dataloader may pad some samples
123
+ ordered_results = ordered_results[:size]
124
+ # remove tmp dir
125
+ shutil.rmtree(tmpdir)
126
+ return ordered_results
127
+
128
+
129
+ def collect_results_gpu(result_part, size):
130
+ """Collect results in gpu."""
131
+ rank, world_size = get_dist_info()
132
+ # dump result part to tensor with pickle
133
+ part_tensor = torch.tensor(
134
+ bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda')
135
+ # gather all result part tensor shape
136
+ shape_tensor = torch.tensor(part_tensor.shape, device='cuda')
137
+ shape_list = [shape_tensor.clone() for _ in range(world_size)]
138
+ dist.all_gather(shape_list, shape_tensor)
139
+ # padding result part tensor to max length
140
+ shape_max = torch.tensor(shape_list).max()
141
+ part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda')
142
+ part_send[:shape_tensor[0]] = part_tensor
143
+ part_recv_list = [
144
+ part_tensor.new_zeros(shape_max) for _ in range(world_size)
145
+ ]
146
+ # gather all result part
147
+ dist.all_gather(part_recv_list, part_send)
148
+
149
+ if rank == 0:
150
+ part_list = []
151
+ for recv, shape in zip(part_recv_list, shape_list):
152
+ part_result = pickle.loads(recv[:shape[0]].cpu().numpy().tobytes())
153
+ part_list.append(part_result)
154
+ # sort the results
155
+ ordered_results = []
156
+ for res in zip(*part_list):
157
+ ordered_results.extend(list(res))
158
+ # the dataloader may pad some samples
159
+ ordered_results = ordered_results[:size]
160
+ return ordered_results
mogen/apis/train.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import warnings
3
+
4
+ import numpy as np
5
+ import torch
6
+ from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
7
+ from mmcv.runner import (
8
+ DistSamplerSeedHook,
9
+ Fp16OptimizerHook,
10
+ OptimizerHook,
11
+ build_runner,
12
+ )
13
+
14
+ from mogen.core.distributed_wrapper import DistributedDataParallelWrapper
15
+ from mogen.core.evaluation import DistEvalHook, EvalHook
16
+ from mogen.core.optimizer import build_optimizers
17
+ from mogen.datasets import build_dataloader, build_dataset
18
+ from mogen.utils import get_root_logger
19
+
20
+
21
+ def set_random_seed(seed, deterministic=False):
22
+ """Set random seed.
23
+ Args:
24
+ seed (int): Seed to be used.
25
+ deterministic (bool): Whether to set the deterministic option for
26
+ CUDNN backend, i.e., set `torch.backends.cudnn.deterministic`
27
+ to True and `torch.backends.cudnn.benchmark` to False.
28
+ Default: False.
29
+ """
30
+ random.seed(seed)
31
+ np.random.seed(seed)
32
+ torch.manual_seed(seed)
33
+ torch.cuda.manual_seed_all(seed)
34
+ if deterministic:
35
+ torch.backends.cudnn.deterministic = True
36
+ torch.backends.cudnn.benchmark = False
37
+
38
+
39
+ def train_model(model,
40
+ dataset,
41
+ cfg,
42
+ distributed=False,
43
+ validate=False,
44
+ timestamp=None,
45
+ device='cuda',
46
+ meta=None):
47
+ """Main api for training model."""
48
+ logger = get_root_logger(cfg.log_level)
49
+
50
+ # prepare data loaders
51
+ dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
52
+
53
+ data_loaders = [
54
+ build_dataloader(
55
+ ds,
56
+ cfg.data.samples_per_gpu,
57
+ cfg.data.workers_per_gpu,
58
+ # cfg.gpus will be ignored if distributed
59
+ num_gpus=len(cfg.gpu_ids),
60
+ dist=distributed,
61
+ round_up=True,
62
+ seed=cfg.seed) for ds in dataset
63
+ ]
64
+
65
+ # determine whether use adversarial training precess or not
66
+ use_adverserial_train = cfg.get('use_adversarial_train', False)
67
+
68
+ # put model on gpus
69
+ if distributed:
70
+ find_unused_parameters = cfg.get('find_unused_parameters', True)
71
+ # Sets the `find_unused_parameters` parameter in
72
+ # torch.nn.parallel.DistributedDataParallel
73
+ if use_adverserial_train:
74
+ # Use DistributedDataParallelWrapper for adversarial training
75
+ model = DistributedDataParallelWrapper(
76
+ model,
77
+ device_ids=[torch.cuda.current_device()],
78
+ broadcast_buffers=False,
79
+ find_unused_parameters=find_unused_parameters)
80
+ else:
81
+ model = MMDistributedDataParallel(
82
+ model.cuda(),
83
+ device_ids=[torch.cuda.current_device()],
84
+ broadcast_buffers=False,
85
+ find_unused_parameters=find_unused_parameters)
86
+ else:
87
+ if device == 'cuda':
88
+ model = MMDataParallel(
89
+ model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)
90
+ elif device == 'cpu':
91
+ model = model.cpu()
92
+ else:
93
+ raise ValueError(F'unsupported device name {device}.')
94
+
95
+ # build runner
96
+ optimizer = build_optimizers(model, cfg.optimizer)
97
+
98
+ if cfg.get('runner') is None:
99
+ cfg.runner = {
100
+ 'type': 'EpochBasedRunner',
101
+ 'max_epochs': cfg.total_epochs
102
+ }
103
+ warnings.warn(
104
+ 'config is now expected to have a `runner` section, '
105
+ 'please set `runner` in your config.', UserWarning)
106
+
107
+ runner = build_runner(
108
+ cfg.runner,
109
+ default_args=dict(
110
+ model=model,
111
+ batch_processor=None,
112
+ optimizer=optimizer,
113
+ work_dir=cfg.work_dir,
114
+ logger=logger,
115
+ meta=meta))
116
+
117
+ # an ugly walkaround to make the .log and .log.json filenames the same
118
+ runner.timestamp = timestamp
119
+
120
+ if use_adverserial_train:
121
+ # The optimizer step process is included in the train_step function
122
+ # of the model, so the runner should NOT include optimizer hook.
123
+ optimizer_config = None
124
+ else:
125
+ # fp16 setting
126
+ fp16_cfg = cfg.get('fp16', None)
127
+ if fp16_cfg is not None:
128
+ optimizer_config = Fp16OptimizerHook(
129
+ **cfg.optimizer_config, **fp16_cfg, distributed=distributed)
130
+ elif distributed and 'type' not in cfg.optimizer_config:
131
+ optimizer_config = OptimizerHook(**cfg.optimizer_config)
132
+ else:
133
+ optimizer_config = cfg.optimizer_config
134
+
135
+ # register hooks
136
+ runner.register_training_hooks(
137
+ cfg.lr_config,
138
+ optimizer_config,
139
+ cfg.checkpoint_config,
140
+ cfg.log_config,
141
+ cfg.get('momentum_config', None),
142
+ custom_hooks_config=cfg.get('custom_hooks', None))
143
+ if distributed:
144
+ runner.register_hook(DistSamplerSeedHook())
145
+
146
+ # register eval hooks
147
+ if validate:
148
+ val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))
149
+ val_dataloader = build_dataloader(
150
+ val_dataset,
151
+ samples_per_gpu=cfg.data.samples_per_gpu,
152
+ workers_per_gpu=cfg.data.workers_per_gpu,
153
+ dist=distributed,
154
+ shuffle=False,
155
+ round_up=True)
156
+ eval_cfg = cfg.get('evaluation', {})
157
+ eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner'
158
+ eval_hook = DistEvalHook if distributed else EvalHook
159
+ runner.register_hook(eval_hook(val_dataloader, **eval_cfg))
160
+
161
+ if cfg.resume_from:
162
+ runner.resume(cfg.resume_from)
163
+ elif cfg.load_from:
164
+ runner.load_checkpoint(cfg.load_from)
165
+ runner.run(data_loaders, cfg.workflow)
mogen/core/__init__.py ADDED
File without changes
mogen/core/distributed_wrapper.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ import torch
3
+ import torch.nn as nn
4
+ from mmcv.parallel import MODULE_WRAPPERS, MMDistributedDataParallel
5
+ from mmcv.parallel.scatter_gather import scatter_kwargs
6
+ from torch.cuda._utils import _get_device_index
7
+
8
+
9
+ @MODULE_WRAPPERS.register_module()
10
+ class DistributedDataParallelWrapper(nn.Module):
11
+ """A DistributedDataParallel wrapper for models in 3D mesh estimation task.
12
+
13
+ In 3D mesh estimation task, there is a need to wrap different modules in
14
+ the models with separate DistributedDataParallel. Otherwise, it will cause
15
+ errors for GAN training.
16
+ More specific, the GAN model, usually has two sub-modules:
17
+ generator and discriminator. If we wrap both of them in one
18
+ standard DistributedDataParallel, it will cause errors during training,
19
+ because when we update the parameters of the generator (or discriminator),
20
+ the parameters of the discriminator (or generator) is not updated, which is
21
+ not allowed for DistributedDataParallel.
22
+ So we design this wrapper to separately wrap DistributedDataParallel
23
+ for generator and discriminator.
24
+ In this wrapper, we perform two operations:
25
+ 1. Wrap the modules in the models with separate MMDistributedDataParallel.
26
+ Note that only modules with parameters will be wrapped.
27
+ 2. Do scatter operation for 'forward', 'train_step' and 'val_step'.
28
+ Note that the arguments of this wrapper is the same as those in
29
+ `torch.nn.parallel.distributed.DistributedDataParallel`.
30
+ Args:
31
+ module (nn.Module): Module that needs to be wrapped.
32
+ device_ids (list[int | `torch.device`]): Same as that in
33
+ `torch.nn.parallel.distributed.DistributedDataParallel`.
34
+ dim (int, optional): Same as that in the official scatter function in
35
+ pytorch. Defaults to 0.
36
+ broadcast_buffers (bool): Same as that in
37
+ `torch.nn.parallel.distributed.DistributedDataParallel`.
38
+ Defaults to False.
39
+ find_unused_parameters (bool, optional): Same as that in
40
+ `torch.nn.parallel.distributed.DistributedDataParallel`.
41
+ Traverse the autograd graph of all tensors contained in returned
42
+ value of the wrapped module’s forward function. Defaults to False.
43
+ kwargs (dict): Other arguments used in
44
+ `torch.nn.parallel.distributed.DistributedDataParallel`.
45
+ """
46
+
47
+ def __init__(self,
48
+ module,
49
+ device_ids,
50
+ dim=0,
51
+ broadcast_buffers=False,
52
+ find_unused_parameters=False,
53
+ **kwargs):
54
+ super().__init__()
55
+ assert len(device_ids) == 1, (
56
+ 'Currently, DistributedDataParallelWrapper only supports one'
57
+ 'single CUDA device for each process.'
58
+ f'The length of device_ids must be 1, but got {len(device_ids)}.')
59
+ self.module = module
60
+ self.dim = dim
61
+ self.to_ddp(
62
+ device_ids=device_ids,
63
+ dim=dim,
64
+ broadcast_buffers=broadcast_buffers,
65
+ find_unused_parameters=find_unused_parameters,
66
+ **kwargs)
67
+ self.output_device = _get_device_index(device_ids[0], True)
68
+
69
+ def to_ddp(self, device_ids, dim, broadcast_buffers,
70
+ find_unused_parameters, **kwargs):
71
+ """Wrap models with separate MMDistributedDataParallel.
72
+
73
+ It only wraps the modules with parameters.
74
+ """
75
+ for name, module in self.module._modules.items():
76
+ if next(module.parameters(), None) is None:
77
+ module = module.cuda()
78
+ elif all(not p.requires_grad for p in module.parameters()):
79
+ module = module.cuda()
80
+ else:
81
+ module = MMDistributedDataParallel(
82
+ module.cuda(),
83
+ device_ids=device_ids,
84
+ dim=dim,
85
+ broadcast_buffers=broadcast_buffers,
86
+ find_unused_parameters=find_unused_parameters,
87
+ **kwargs)
88
+ self.module._modules[name] = module
89
+
90
+ def scatter(self, inputs, kwargs, device_ids):
91
+ """Scatter function.
92
+
93
+ Args:
94
+ inputs (Tensor): Input Tensor.
95
+ kwargs (dict): Args for
96
+ ``mmcv.parallel.scatter_gather.scatter_kwargs``.
97
+ device_ids (int): Device id.
98
+ """
99
+ return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)
100
+
101
+ def forward(self, *inputs, **kwargs):
102
+ """Forward function.
103
+
104
+ Args:
105
+ inputs (tuple): Input data.
106
+ kwargs (dict): Args for
107
+ ``mmcv.parallel.scatter_gather.scatter_kwargs``.
108
+ """
109
+ inputs, kwargs = self.scatter(inputs, kwargs,
110
+ [torch.cuda.current_device()])
111
+ return self.module(*inputs[0], **kwargs[0])
112
+
113
+ def train_step(self, *inputs, **kwargs):
114
+ """Train step function.
115
+
116
+ Args:
117
+ inputs (Tensor): Input Tensor.
118
+ kwargs (dict): Args for
119
+ ``mmcv.parallel.scatter_gather.scatter_kwargs``.
120
+ """
121
+ inputs, kwargs = self.scatter(inputs, kwargs,
122
+ [torch.cuda.current_device()])
123
+ output = self.module.train_step(*inputs[0], **kwargs[0])
124
+ return output
125
+
126
+ def val_step(self, *inputs, **kwargs):
127
+ """Validation step function.
128
+
129
+ Args:
130
+ inputs (tuple): Input data.
131
+ kwargs (dict): Args for ``scatter_kwargs``.
132
+ """
133
+ inputs, kwargs = self.scatter(inputs, kwargs,
134
+ [torch.cuda.current_device()])
135
+ output = self.module.val_step(*inputs[0], **kwargs[0])
136
+ return output
mogen/core/evaluation/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from mogen.core.evaluation.eval_hooks import DistEvalHook, EvalHook
2
+ from mogen.core.evaluation.builder import build_evaluator
3
+
4
+ __all__ = ["DistEvalHook", "EvalHook", "build_evaluator"]
mogen/core/evaluation/builder.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import numpy as np
3
+ from mmcv.utils import Registry
4
+ from .evaluators.precision_evaluator import PrecisionEvaluator
5
+ from .evaluators.matching_score_evaluator import MatchingScoreEvaluator
6
+ from .evaluators.fid_evaluator import FIDEvaluator
7
+ from .evaluators.diversity_evaluator import DiversityEvaluator
8
+ from .evaluators.multimodality_evaluator import MultiModalityEvaluator
9
+
10
+ EVALUATORS = Registry('evaluators')
11
+
12
+ EVALUATORS.register_module(name='R Precision', module=PrecisionEvaluator)
13
+ EVALUATORS.register_module(name='Matching Score', module=MatchingScoreEvaluator)
14
+ EVALUATORS.register_module(name='FID', module=FIDEvaluator)
15
+ EVALUATORS.register_module(name='Diversity', module=DiversityEvaluator)
16
+ EVALUATORS.register_module(name='MultiModality', module=MultiModalityEvaluator)
17
+
18
+
19
+ def build_evaluator(metric, eval_cfg, data_len, eval_indexes):
20
+ cfg = copy.deepcopy(eval_cfg)
21
+ cfg.update(metric)
22
+ cfg.pop('metrics')
23
+ cfg['data_len'] = data_len
24
+ cfg['eval_indexes'] = eval_indexes
25
+ evaluator = EVALUATORS.build(cfg)
26
+ if evaluator.append_indexes is not None:
27
+ for i in range(eval_cfg['replication_times']):
28
+ eval_indexes[i] = np.concatenate((eval_indexes[i], evaluator.append_indexes[i]), axis=0)
29
+ return evaluator, eval_indexes
mogen/core/evaluation/eval_hooks.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ import tempfile
3
+ import warnings
4
+
5
+ from mmcv.runner import DistEvalHook as BaseDistEvalHook
6
+ from mmcv.runner import EvalHook as BaseEvalHook
7
+
8
+ mogen_GREATER_KEYS = []
9
+ mogen_LESS_KEYS = []
10
+
11
+
12
+ class EvalHook(BaseEvalHook):
13
+
14
+ def __init__(self,
15
+ dataloader,
16
+ start=None,
17
+ interval=1,
18
+ by_epoch=True,
19
+ save_best=None,
20
+ rule=None,
21
+ test_fn=None,
22
+ greater_keys=mogen_GREATER_KEYS,
23
+ less_keys=mogen_LESS_KEYS,
24
+ **eval_kwargs):
25
+ if test_fn is None:
26
+ from mogen.apis import single_gpu_test
27
+ test_fn = single_gpu_test
28
+
29
+ # remove "gpu_collect" from eval_kwargs
30
+ if 'gpu_collect' in eval_kwargs:
31
+ warnings.warn(
32
+ '"gpu_collect" will be deprecated in EvalHook.'
33
+ 'Please remove it from the config.', DeprecationWarning)
34
+ _ = eval_kwargs.pop('gpu_collect')
35
+
36
+ # update "save_best" according to "key_indicator" and remove the
37
+ # latter from eval_kwargs
38
+ if 'key_indicator' in eval_kwargs or isinstance(save_best, bool):
39
+ warnings.warn(
40
+ '"key_indicator" will be deprecated in EvalHook.'
41
+ 'Please use "save_best" to specify the metric key,'
42
+ 'e.g., save_best="pa-mpjpe".', DeprecationWarning)
43
+
44
+ key_indicator = eval_kwargs.pop('key_indicator', None)
45
+ if save_best is True and key_indicator is None:
46
+ raise ValueError('key_indicator should not be None, when '
47
+ 'save_best is set to True.')
48
+ save_best = key_indicator
49
+
50
+ super().__init__(dataloader, start, interval, by_epoch, save_best,
51
+ rule, test_fn, greater_keys, less_keys, **eval_kwargs)
52
+
53
+ def evaluate(self, runner, results):
54
+
55
+ with tempfile.TemporaryDirectory() as tmp_dir:
56
+ eval_res = self.dataloader.dataset.evaluate(
57
+ results,
58
+ work_dir=tmp_dir,
59
+ logger=runner.logger,
60
+ **self.eval_kwargs)
61
+
62
+ for name, val in eval_res.items():
63
+ runner.log_buffer.output[name] = val
64
+ runner.log_buffer.ready = True
65
+
66
+ if self.save_best is not None:
67
+ if self.key_indicator == 'auto':
68
+ self._init_rule(self.rule, list(eval_res.keys())[0])
69
+
70
+ return eval_res[self.key_indicator]
71
+
72
+ return None
73
+
74
+
75
+ class DistEvalHook(BaseDistEvalHook):
76
+
77
+ def __init__(self,
78
+ dataloader,
79
+ start=None,
80
+ interval=1,
81
+ by_epoch=True,
82
+ save_best=None,
83
+ rule=None,
84
+ test_fn=None,
85
+ greater_keys=mogen_GREATER_KEYS,
86
+ less_keys=mogen_LESS_KEYS,
87
+ broadcast_bn_buffer=True,
88
+ tmpdir=None,
89
+ gpu_collect=False,
90
+ **eval_kwargs):
91
+
92
+ if test_fn is None:
93
+ from mogen.apis import multi_gpu_test
94
+ test_fn = multi_gpu_test
95
+
96
+ # update "save_best" according to "key_indicator" and remove the
97
+ # latter from eval_kwargs
98
+ if 'key_indicator' in eval_kwargs or isinstance(save_best, bool):
99
+ warnings.warn(
100
+ '"key_indicator" will be deprecated in EvalHook.'
101
+ 'Please use "save_best" to specify the metric key,'
102
+ 'e.g., save_best="pa-mpjpe".', DeprecationWarning)
103
+
104
+ key_indicator = eval_kwargs.pop('key_indicator', None)
105
+ if save_best is True and key_indicator is None:
106
+ raise ValueError('key_indicator should not be None, when '
107
+ 'save_best is set to True.')
108
+ save_best = key_indicator
109
+
110
+ super().__init__(dataloader, start, interval, by_epoch, save_best,
111
+ rule, test_fn, greater_keys, less_keys,
112
+ broadcast_bn_buffer, tmpdir, gpu_collect,
113
+ **eval_kwargs)
114
+
115
+ def evaluate(self, runner, results):
116
+ """Evaluate the results.
117
+ Args:
118
+ runner (:obj:`mmcv.Runner`): The underlined training runner.
119
+ results (list): Output results.
120
+ """
121
+ with tempfile.TemporaryDirectory() as tmp_dir:
122
+ eval_res = self.dataloader.dataset.evaluate(
123
+ results,
124
+ work_dir=tmp_dir,
125
+ logger=runner.logger,
126
+ **self.eval_kwargs)
127
+
128
+ for name, val in eval_res.items():
129
+ runner.log_buffer.output[name] = val
130
+ runner.log_buffer.ready = True
131
+
132
+ if self.save_best is not None:
133
+ if self.key_indicator == 'auto':
134
+ # infer from eval_results
135
+ self._init_rule(self.rule, list(eval_res.keys())[0])
136
+ return eval_res[self.key_indicator]
137
+
138
+ return None
mogen/core/evaluation/evaluators/__init__.py ADDED
File without changes
mogen/core/evaluation/evaluators/base_evaluator.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import numpy as np
3
+ from ..utils import get_metric_statistics
4
+
5
+
6
+ class BaseEvaluator(object):
7
+
8
+ def __init__(self,
9
+ batch_size=None,
10
+ drop_last=False,
11
+ replication_times=1,
12
+ replication_reduction='statistics',
13
+ eval_begin_idx=None,
14
+ eval_end_idx=None):
15
+ self.batch_size = batch_size
16
+ self.drop_last = drop_last
17
+ self.replication_times = replication_times
18
+ self.replication_reduction = replication_reduction
19
+ assert replication_reduction in ['statistics', 'mean', 'concat']
20
+ self.eval_begin_idx = eval_begin_idx
21
+ self.eval_end_idx = eval_end_idx
22
+
23
+ def evaluate(self, results):
24
+ total_len = len(results)
25
+ partial_len = total_len // self.replication_times
26
+ all_metrics = []
27
+ for replication_idx in range(self.replication_times):
28
+ partial_results = results[
29
+ replication_idx * partial_len: (replication_idx + 1) * partial_len]
30
+ if self.batch_size is not None:
31
+ batch_metrics = []
32
+ for batch_start in range(self.eval_begin_idx, self.eval_end_idx, self.batch_size):
33
+ batch_results = partial_results[batch_start: batch_start + self.batch_size]
34
+ if len(batch_results) < self.batch_size and self.drop_last:
35
+ continue
36
+ batch_metrics.append(self.single_evaluate(batch_results))
37
+ all_metrics.append(self.concat_batch_metrics(batch_metrics))
38
+ else:
39
+ batch_results = partial_results[self.eval_begin_idx: self.eval_end_idx]
40
+ all_metrics.append(self.single_evaluate(batch_results))
41
+ all_metrics = np.stack(all_metrics, axis=0)
42
+ if self.replication_reduction == 'statistics':
43
+ values = get_metric_statistics(all_metrics, self.replication_times)
44
+ elif self.replication_reduction == 'mean':
45
+ values = np.mean(all_metrics, axis=0)
46
+ elif self.replication_reduction == 'concat':
47
+ values = all_metrics
48
+ return self.parse_values(values)
49
+
50
+ def prepare_results(self, results):
51
+ text = []
52
+ pred_motion = []
53
+ pred_motion_length = []
54
+ pred_motion_mask = []
55
+ motion = []
56
+ motion_length = []
57
+ motion_mask = []
58
+ token = []
59
+ # count the maximum motion length
60
+ T = max([result['motion'].shape[0] for result in results])
61
+ for result in results:
62
+ cur_motion = result['motion']
63
+ if cur_motion.shape[0] < T:
64
+ padding_values = torch.zeros((T - cur_motion.shape[0], cur_motion.shape[1]))
65
+ padding_values = padding_values.type_as(pred_motion)
66
+ cur_motion = torch.cat([cur_motion, padding_values], dim=0)
67
+ motion.append(cur_motion)
68
+ cur_pred_motion = result['pred_motion']
69
+ if cur_pred_motion.shape[0] < T:
70
+ padding_values = torch.zeros((T - cur_pred_motion.shape[0], cur_pred_motion.shape[1]))
71
+ padding_values = padding_values.type_as(cur_pred_motion)
72
+ cur_pred_motion = torch.cat([cur_pred_motion, padding_values], dim=0)
73
+ pred_motion.append(cur_pred_motion)
74
+ cur_motion_mask = result['motion_mask']
75
+ if cur_motion_mask.shape[0] < T:
76
+ padding_values = torch.zeros((T - cur_motion_mask.shape[0]))
77
+ padding_values = padding_values.type_as(cur_motion_mask)
78
+ cur_motion_mask= torch.cat([cur_motion_mask, padding_values], dim=0)
79
+ motion_mask.append(cur_motion_mask)
80
+ cur_pred_motion_mask = result['pred_motion_mask']
81
+ if cur_pred_motion_mask.shape[0] < T:
82
+ padding_values = torch.zeros((T - cur_pred_motion_mask.shape[0]))
83
+ padding_values = padding_values.type_as(cur_pred_motion_mask)
84
+ cur_pred_motion_mask= torch.cat([cur_pred_motion_mask, padding_values], dim=0)
85
+ pred_motion_mask.append(cur_pred_motion_mask)
86
+ motion_length.append(result['motion_length'].item())
87
+ pred_motion_length.append(result['pred_motion_length'].item())
88
+ if 'text' in result.keys():
89
+ text.append(result['text'])
90
+ if 'token' in result.keys():
91
+ token.append(result['token'])
92
+
93
+ motion = torch.stack(motion, dim=0)
94
+ pred_motion = torch.stack(pred_motion, dim=0)
95
+ motion_mask = torch.stack(motion_mask, dim=0)
96
+ pred_motion_mask = torch.stack(pred_motion_mask, dim=0)
97
+ motion_length = torch.Tensor(motion_length).to(motion.device).long()
98
+ pred_motion_length = torch.Tensor(pred_motion_length).to(motion.device).long()
99
+ output = {
100
+ 'pred_motion': pred_motion,
101
+ 'pred_motion_mask': pred_motion_mask,
102
+ 'pred_motion_length': pred_motion_length,
103
+ 'motion': motion,
104
+ 'motion_mask': motion_mask,
105
+ 'motion_length': motion_length,
106
+ 'text': text,
107
+ 'token': token
108
+ }
109
+ return output
110
+
111
+ def to_device(self, device):
112
+ for model in self.model_list:
113
+ model.to(device)
114
+
115
+ def motion_encode(self, motion, motion_length, motion_mask, device):
116
+ N = motion.shape[0]
117
+ motion_emb = []
118
+ batch_size = 32
119
+ cur_idx = 0
120
+ with torch.no_grad():
121
+ while cur_idx < N:
122
+ cur_motion = motion[cur_idx: cur_idx + batch_size].to(device)
123
+ cur_motion_length = motion_length[cur_idx: cur_idx + batch_size].to(device)
124
+ cur_motion_mask = motion_mask[cur_idx: cur_idx + batch_size].to(device)
125
+ cur_motion_emb = self.motion_encoder(cur_motion, cur_motion_length, cur_motion_mask)
126
+ motion_emb.append(cur_motion_emb)
127
+ cur_idx += batch_size
128
+ motion_emb = torch.cat(motion_emb, dim=0)
129
+ return motion_emb
130
+
131
+ def text_encode(self, text, token, device):
132
+ N = len(text)
133
+ text_emb = []
134
+ batch_size = 32
135
+ cur_idx = 0
136
+ with torch.no_grad():
137
+ while cur_idx < N:
138
+ cur_text = text[cur_idx: cur_idx + batch_size]
139
+ cur_token = token[cur_idx: cur_idx + batch_size]
140
+ cur_text_emb = self.text_encoder(cur_text, cur_token, device)
141
+ text_emb.append(cur_text_emb)
142
+ cur_idx += batch_size
143
+ text_emb = torch.cat(text_emb, dim=0)
144
+ return text_emb
mogen/core/evaluation/evaluators/diversity_evaluator.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+
4
+ from ..get_model import get_motion_model
5
+ from .base_evaluator import BaseEvaluator
6
+ from ..utils import calculate_diversity
7
+
8
+
9
+ class DiversityEvaluator(BaseEvaluator):
10
+
11
+ def __init__(self,
12
+ data_len=0,
13
+ motion_encoder_name=None,
14
+ motion_encoder_path=None,
15
+ num_samples=300,
16
+ batch_size=None,
17
+ drop_last=False,
18
+ replication_times=1,
19
+ replication_reduction='statistics',
20
+ **kwargs):
21
+ super().__init__(
22
+ replication_times=replication_times,
23
+ replication_reduction=replication_reduction,
24
+ batch_size=batch_size,
25
+ drop_last=drop_last,
26
+ eval_begin_idx=0,
27
+ eval_end_idx=data_len
28
+ )
29
+ self.num_samples = num_samples
30
+ self.append_indexes = None
31
+ self.motion_encoder = get_motion_model(motion_encoder_name, motion_encoder_path)
32
+ self.model_list = [self.motion_encoder]
33
+
34
+ def single_evaluate(self, results):
35
+ results = self.prepare_results(results)
36
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
37
+ motion = results['motion']
38
+ pred_motion = results['pred_motion']
39
+ pred_motion_length = results['pred_motion_length']
40
+ pred_motion_mask = results['pred_motion_mask']
41
+ self.motion_encoder.to(device)
42
+ self.motion_encoder.eval()
43
+ with torch.no_grad():
44
+ pred_motion_emb = self.motion_encode(pred_motion, pred_motion_length, pred_motion_mask, device).cpu().detach().numpy()
45
+ diversity = calculate_diversity(pred_motion_emb, self.num_samples)
46
+ return diversity
47
+
48
+ def parse_values(self, values):
49
+ metrics = {}
50
+ metrics['Diversity (mean)'] = values[0]
51
+ metrics['Diversity (conf)'] = values[1]
52
+ return metrics
mogen/core/evaluation/evaluators/fid_evaluator.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+
4
+ from ..get_model import get_motion_model
5
+ from .base_evaluator import BaseEvaluator
6
+ from ..utils import (
7
+ calculate_activation_statistics,
8
+ calculate_frechet_distance)
9
+
10
+
11
+ class FIDEvaluator(BaseEvaluator):
12
+
13
+ def __init__(self,
14
+ data_len=0,
15
+ motion_encoder_name=None,
16
+ motion_encoder_path=None,
17
+ batch_size=None,
18
+ drop_last=False,
19
+ replication_times=1,
20
+ replication_reduction='statistics',
21
+ **kwargs):
22
+ super().__init__(
23
+ replication_times=replication_times,
24
+ replication_reduction=replication_reduction,
25
+ batch_size=batch_size,
26
+ drop_last=drop_last,
27
+ eval_begin_idx=0,
28
+ eval_end_idx=data_len
29
+ )
30
+ self.append_indexes = None
31
+ self.motion_encoder = get_motion_model(motion_encoder_name, motion_encoder_path)
32
+ self.model_list = [self.motion_encoder]
33
+
34
+ def single_evaluate(self, results):
35
+ results = self.prepare_results(results)
36
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
37
+ pred_motion = results['pred_motion']
38
+
39
+ pred_motion_length = results['pred_motion_length']
40
+ pred_motion_mask = results['pred_motion_mask']
41
+ motion = results['motion']
42
+ motion_length = results['motion_length']
43
+ motion_mask = results['motion_mask']
44
+ self.motion_encoder.to(device)
45
+ self.motion_encoder.eval()
46
+ with torch.no_grad():
47
+ pred_motion_emb = self.motion_encode(pred_motion, pred_motion_length, pred_motion_mask, device).cpu().detach().numpy()
48
+ gt_motion_emb = self.motion_encode(motion, motion_length, motion_mask, device).cpu().detach().numpy()
49
+ gt_mu, gt_cov = calculate_activation_statistics(gt_motion_emb)
50
+ pred_mu, pred_cov = calculate_activation_statistics(pred_motion_emb)
51
+ fid = calculate_frechet_distance(gt_mu, gt_cov, pred_mu, pred_cov)
52
+ return fid
53
+
54
+ def parse_values(self, values):
55
+ metrics = {}
56
+ metrics['FID (mean)'] = values[0]
57
+ metrics['FID (conf)'] = values[1]
58
+ return metrics
mogen/core/evaluation/evaluators/matching_score_evaluator.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+
4
+ from ..get_model import get_motion_model, get_text_model
5
+ from .base_evaluator import BaseEvaluator
6
+ from ..utils import calculate_top_k, euclidean_distance_matrix
7
+
8
+
9
+ class MatchingScoreEvaluator(BaseEvaluator):
10
+
11
+ def __init__(self,
12
+ data_len=0,
13
+ text_encoder_name=None,
14
+ text_encoder_path=None,
15
+ motion_encoder_name=None,
16
+ motion_encoder_path=None,
17
+ top_k=3,
18
+ batch_size=32,
19
+ drop_last=False,
20
+ replication_times=1,
21
+ replication_reduction='statistics',
22
+ **kwargs):
23
+ super().__init__(
24
+ replication_times=replication_times,
25
+ replication_reduction=replication_reduction,
26
+ batch_size=batch_size,
27
+ drop_last=drop_last,
28
+ eval_begin_idx=0,
29
+ eval_end_idx=data_len
30
+ )
31
+ self.append_indexes = None
32
+ self.text_encoder = get_text_model(text_encoder_name, text_encoder_path)
33
+ self.motion_encoder = get_motion_model(motion_encoder_name, motion_encoder_path)
34
+ self.top_k = top_k
35
+ self.model_list = [self.text_encoder, self.motion_encoder]
36
+
37
+ def single_evaluate(self, results):
38
+ results = self.prepare_results(results)
39
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
40
+ motion = results['motion']
41
+ pred_motion = results['pred_motion']
42
+ pred_motion_length = results['pred_motion_length']
43
+ pred_motion_mask = results['pred_motion_mask']
44
+ text = results['text']
45
+ token = results['token']
46
+ self.text_encoder.to(device)
47
+ self.motion_encoder.to(device)
48
+ self.text_encoder.eval()
49
+ self.motion_encoder.eval()
50
+ with torch.no_grad():
51
+ word_emb = self.text_encode(text, token, device=device).cpu().detach().numpy()
52
+ motion_emb = self.motion_encode(pred_motion, pred_motion_length, pred_motion_mask, device).cpu().detach().numpy()
53
+ dist_mat = euclidean_distance_matrix(word_emb, motion_emb)
54
+ matching_score = dist_mat.trace()
55
+ all_size = word_emb.shape[0]
56
+ return matching_score, all_size
57
+
58
+ def concat_batch_metrics(self, batch_metrics):
59
+ matching_score_sum = 0
60
+ all_size = 0
61
+ for batch_matching_score, batch_all_size in batch_metrics:
62
+ matching_score_sum += batch_matching_score
63
+ all_size += batch_all_size
64
+ matching_score = matching_score_sum / all_size
65
+ return matching_score
66
+
67
+ def parse_values(self, values):
68
+ metrics = {}
69
+ metrics['Matching Score (mean)'] = values[0]
70
+ metrics['Matching Score (conf)'] = values[1]
71
+ return metrics
mogen/core/evaluation/evaluators/multimodality_evaluator.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+
4
+ from ..get_model import get_motion_model
5
+ from .base_evaluator import BaseEvaluator
6
+ from ..utils import calculate_multimodality
7
+
8
+
9
+ class MultiModalityEvaluator(BaseEvaluator):
10
+
11
+ def __init__(self,
12
+ data_len=0,
13
+ motion_encoder_name=None,
14
+ motion_encoder_path=None,
15
+ num_samples=100,
16
+ num_repeats=30,
17
+ num_picks=10,
18
+ batch_size=None,
19
+ drop_last=False,
20
+ replication_times=1,
21
+ replication_reduction='statistics',
22
+ **kwargs):
23
+ super().__init__(
24
+ replication_times=replication_times,
25
+ replication_reduction=replication_reduction,
26
+ batch_size=batch_size,
27
+ drop_last=drop_last,
28
+ eval_begin_idx=data_len,
29
+ eval_end_idx=data_len + num_samples * num_repeats
30
+ )
31
+ self.num_samples = num_samples
32
+ self.num_repeats = num_repeats
33
+ self.num_picks = num_picks
34
+ self.append_indexes = []
35
+ for i in range(replication_times):
36
+ append_indexes = []
37
+ selected_indexs = np.random.choice(data_len, self.num_samples)
38
+ for index in selected_indexs:
39
+ append_indexes = append_indexes + [index] * self.num_repeats
40
+ self.append_indexes.append(np.array(append_indexes))
41
+ self.motion_encoder = get_motion_model(motion_encoder_name, motion_encoder_path)
42
+ self.model_list = [self.motion_encoder]
43
+
44
+ def single_evaluate(self, results):
45
+ results = self.prepare_results(results)
46
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
47
+ motion = results['motion']
48
+ pred_motion = results['pred_motion']
49
+ pred_motion_length = results['pred_motion_length']
50
+ pred_motion_mask = results['pred_motion_mask']
51
+ self.motion_encoder.to(device)
52
+ self.motion_encoder.eval()
53
+ with torch.no_grad():
54
+ pred_motion_emb = self.motion_encode(pred_motion, pred_motion_length, pred_motion_mask, device).cpu().detach().numpy()
55
+ pred_motion_emb = pred_motion_emb.reshape((self.num_samples, self.num_repeats, -1))
56
+ multimodality = calculate_multimodality(pred_motion_emb, self.num_picks)
57
+ return multimodality
58
+
59
+ def parse_values(self, values):
60
+ metrics = {}
61
+ metrics['MultiModality (mean)'] = values[0]
62
+ metrics['MultiModality (conf)'] = values[1]
63
+ return metrics
mogen/core/evaluation/evaluators/precision_evaluator.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+
4
+ from ..get_model import get_motion_model, get_text_model
5
+ from .base_evaluator import BaseEvaluator
6
+ from ..utils import calculate_top_k, euclidean_distance_matrix
7
+
8
+
9
+ class PrecisionEvaluator(BaseEvaluator):
10
+
11
+ def __init__(self,
12
+ data_len=0,
13
+ text_encoder_name=None,
14
+ text_encoder_path=None,
15
+ motion_encoder_name=None,
16
+ motion_encoder_path=None,
17
+ top_k=3,
18
+ batch_size=32,
19
+ drop_last=False,
20
+ replication_times=1,
21
+ replication_reduction='statistics',
22
+ **kwargs):
23
+ super().__init__(
24
+ replication_times=replication_times,
25
+ replication_reduction=replication_reduction,
26
+ batch_size=batch_size,
27
+ drop_last=drop_last,
28
+ eval_begin_idx=0,
29
+ eval_end_idx=data_len
30
+ )
31
+ self.append_indexes = None
32
+ self.text_encoder = get_text_model(text_encoder_name, text_encoder_path)
33
+ self.motion_encoder = get_motion_model(motion_encoder_name, motion_encoder_path)
34
+ self.top_k = top_k
35
+ self.model_list = [self.text_encoder, self.motion_encoder]
36
+
37
+ def single_evaluate(self, results):
38
+ results = self.prepare_results(results)
39
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
40
+ motion = results['motion']
41
+ pred_motion = results['pred_motion']
42
+ pred_motion_length = results['pred_motion_length']
43
+ pred_motion_mask = results['pred_motion_mask']
44
+ text = results['text']
45
+ token = results['token']
46
+ self.text_encoder.to(device)
47
+ self.motion_encoder.to(device)
48
+ self.text_encoder.eval()
49
+ self.motion_encoder.eval()
50
+ with torch.no_grad():
51
+ word_emb = self.text_encode(text, token, device=device).cpu().detach().numpy()
52
+ motion_emb = self.motion_encode(pred_motion, pred_motion_length, pred_motion_mask, device).cpu().detach().numpy()
53
+ dist_mat = euclidean_distance_matrix(word_emb, motion_emb)
54
+ argsmax = np.argsort(dist_mat, axis=1)
55
+ top_k_mat = calculate_top_k(argsmax, top_k=self.top_k)
56
+ top_k_count = top_k_mat.sum(axis=0)
57
+ all_size = word_emb.shape[0]
58
+ return top_k_count, all_size
59
+
60
+ def concat_batch_metrics(self, batch_metrics):
61
+ top_k_count = 0
62
+ all_size = 0
63
+ for batch_top_k_count, batch_all_size in batch_metrics:
64
+ top_k_count += batch_top_k_count
65
+ all_size += batch_all_size
66
+ R_precision = top_k_count / all_size
67
+ return R_precision
68
+
69
+ def parse_values(self, values):
70
+ metrics = {}
71
+ for top_k in range(self.top_k):
72
+ metrics['R_precision Top %d (mean)' % (top_k + 1)] = values[0][top_k]
73
+ metrics['R_precision Top %d (conf)' % (top_k + 1)] = values[1][top_k]
74
+ return metrics
mogen/core/evaluation/get_model.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from mogen.models import build_submodule
2
+
3
+
4
+ def get_motion_model(name, ckpt_path):
5
+ if name == 'kit_ml':
6
+ model = build_submodule(dict(
7
+ type='T2MMotionEncoder',
8
+ input_size=251,
9
+ movement_hidden_size=512,
10
+ movement_latent_size=512,
11
+ motion_hidden_size=1024,
12
+ motion_latent_size=512,
13
+ ))
14
+ else:
15
+ model = build_submodule(dict(
16
+ type='T2MMotionEncoder',
17
+ input_size=263,
18
+ movement_hidden_size=512,
19
+ movement_latent_size=512,
20
+ motion_hidden_size=1024,
21
+ motion_latent_size=512,
22
+ ))
23
+ model.load_pretrained(ckpt_path)
24
+ return model
25
+
26
+ def get_text_model(name, ckpt_path):
27
+ if name == 'kit_ml':
28
+ model = build_submodule(dict(
29
+ type='T2MTextEncoder',
30
+ word_size=300,
31
+ pos_size=15,
32
+ hidden_size=512,
33
+ output_size=512,
34
+ max_text_len=20
35
+ ))
36
+ else:
37
+ model = build_submodule(dict(
38
+ type='T2MTextEncoder',
39
+ word_size=300,
40
+ pos_size=15,
41
+ hidden_size=512,
42
+ output_size=512,
43
+ max_text_len=20
44
+ ))
45
+ model.load_pretrained(ckpt_path)
46
+ return model
mogen/core/evaluation/utils.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from scipy import linalg
3
+
4
+
5
+ def get_metric_statistics(values, replication_times):
6
+ mean = np.mean(values, axis=0)
7
+ std = np.std(values, axis=0)
8
+ conf_interval = 1.96 * std / np.sqrt(replication_times)
9
+ return mean, conf_interval
10
+
11
+
12
+ # (X - X_train)*(X - X_train) = -2X*X_train + X*X + X_train*X_train
13
+ def euclidean_distance_matrix(matrix1, matrix2):
14
+ """
15
+ Params:
16
+ -- matrix1: N1 x D
17
+ -- matrix2: N2 x D
18
+ Returns:
19
+ -- dist: N1 x N2
20
+ dist[i, j] == distance(matrix1[i], matrix2[j])
21
+ """
22
+ assert matrix1.shape[1] == matrix2.shape[1]
23
+ d1 = -2 * np.dot(matrix1, matrix2.T) # shape (num_test, num_train)
24
+ d2 = np.sum(np.square(matrix1), axis=1, keepdims=True) # shape (num_test, 1)
25
+ d3 = np.sum(np.square(matrix2), axis=1) # shape (num_train, )
26
+ dists = np.sqrt(d1 + d2 + d3) # broadcasting
27
+ return dists
28
+
29
+
30
+ def calculate_top_k(mat, top_k):
31
+ size = mat.shape[0]
32
+ gt_mat = np.expand_dims(np.arange(size), 1).repeat(size, 1)
33
+ bool_mat = (mat == gt_mat)
34
+ correct_vec = False
35
+ top_k_list = []
36
+ for i in range(top_k):
37
+ # print(correct_vec, bool_mat[:, i])
38
+ correct_vec = (correct_vec | bool_mat[:, i])
39
+ # print(correct_vec)
40
+ top_k_list.append(correct_vec[:, None])
41
+ top_k_mat = np.concatenate(top_k_list, axis=1)
42
+ return top_k_mat
43
+
44
+
45
+ def calculate_activation_statistics(activations):
46
+ """
47
+ Params:
48
+ -- activation: num_samples x dim_feat
49
+ Returns:
50
+ -- mu: dim_feat
51
+ -- sigma: dim_feat x dim_feat
52
+ """
53
+ mu = np.mean(activations, axis=0)
54
+ cov = np.cov(activations, rowvar=False)
55
+ return mu, cov
56
+
57
+ def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
58
+ """Numpy implementation of the Frechet Distance.
59
+ The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
60
+ and X_2 ~ N(mu_2, C_2) is
61
+ d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
62
+ Stable version by Dougal J. Sutherland.
63
+ Params:
64
+ -- mu1 : Numpy array containing the activations of a layer of the
65
+ inception net (like returned by the function 'get_predictions')
66
+ for generated samples.
67
+ -- mu2 : The sample mean over activations, precalculated on an
68
+ representative data set.
69
+ -- sigma1: The covariance matrix over activations for generated samples.
70
+ -- sigma2: The covariance matrix over activations, precalculated on an
71
+ representative data set.
72
+ Returns:
73
+ -- : The Frechet Distance.
74
+ """
75
+
76
+ mu1 = np.atleast_1d(mu1)
77
+ mu2 = np.atleast_1d(mu2)
78
+
79
+ sigma1 = np.atleast_2d(sigma1)
80
+ sigma2 = np.atleast_2d(sigma2)
81
+
82
+ assert mu1.shape == mu2.shape, \
83
+ 'Training and test mean vectors have different lengths'
84
+ assert sigma1.shape == sigma2.shape, \
85
+ 'Training and test covariances have different dimensions'
86
+
87
+ diff = mu1 - mu2
88
+
89
+ # Product might be almost singular
90
+ covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
91
+ if not np.isfinite(covmean).all():
92
+ msg = ('fid calculation produces singular product; '
93
+ 'adding %s to diagonal of cov estimates') % eps
94
+ print(msg)
95
+ offset = np.eye(sigma1.shape[0]) * eps
96
+ covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
97
+
98
+ # Numerical error might give slight imaginary component
99
+ if np.iscomplexobj(covmean):
100
+ if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
101
+ m = np.max(np.abs(covmean.imag))
102
+ raise ValueError('Imaginary component {}'.format(m))
103
+ covmean = covmean.real
104
+
105
+ tr_covmean = np.trace(covmean)
106
+
107
+ return (diff.dot(diff) + np.trace(sigma1) +
108
+ np.trace(sigma2) - 2 * tr_covmean)
109
+
110
+
111
+ def calculate_diversity(activation, diversity_times):
112
+ assert len(activation.shape) == 2
113
+ assert activation.shape[0] > diversity_times
114
+ num_samples = activation.shape[0]
115
+
116
+ first_indices = np.random.choice(num_samples, diversity_times, replace=False)
117
+ second_indices = np.random.choice(num_samples, diversity_times, replace=False)
118
+ dist = linalg.norm(activation[first_indices] - activation[second_indices], axis=1)
119
+ return dist.mean()
120
+
121
+
122
+ def calculate_multimodality(activation, multimodality_times):
123
+ assert len(activation.shape) == 3
124
+ assert activation.shape[1] > multimodality_times
125
+ num_per_sent = activation.shape[1]
126
+
127
+ first_dices = np.random.choice(num_per_sent, multimodality_times, replace=False)
128
+ second_dices = np.random.choice(num_per_sent, multimodality_times, replace=False)
129
+ dist = linalg.norm(activation[:, first_dices] - activation[:, second_dices], axis=2)
130
+ return dist.mean()
mogen/core/optimizer/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .builder import OPTIMIZERS, build_optimizers
2
+
3
+ __all__ = ['build_optimizers', 'OPTIMIZERS']
mogen/core/optimizer/builder.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmcv.runner import build_optimizer
3
+ from mmcv.utils import Registry
4
+
5
+ OPTIMIZERS = Registry('optimizers')
6
+
7
+
8
+ def build_optimizers(model, cfgs):
9
+ """Build multiple optimizers from configs. If `cfgs` contains several dicts
10
+ for optimizers, then a dict for each constructed optimizers will be
11
+ returned. If `cfgs` only contains one optimizer config, the constructed
12
+ optimizer itself will be returned. For example,
13
+
14
+ 1) Multiple optimizer configs:
15
+
16
+ .. code-block:: python
17
+
18
+ optimizer_cfg = dict(
19
+ model1=dict(type='SGD', lr=lr),
20
+ model2=dict(type='SGD', lr=lr))
21
+
22
+ The return dict is
23
+ ``dict('model1': torch.optim.Optimizer, 'model2': torch.optim.Optimizer)``
24
+
25
+ 2) Single optimizer config:
26
+
27
+ .. code-block:: python
28
+
29
+ optimizer_cfg = dict(type='SGD', lr=lr)
30
+
31
+ The return is ``torch.optim.Optimizer``.
32
+
33
+ Args:
34
+ model (:obj:`nn.Module`): The model with parameters to be optimized.
35
+ cfgs (dict): The config dict of the optimizer.
36
+
37
+ Returns:
38
+ dict[:obj:`torch.optim.Optimizer`] | :obj:`torch.optim.Optimizer`:
39
+ The initialized optimizers.
40
+ """
41
+ optimizers = {}
42
+ if hasattr(model, 'module'):
43
+ model = model.module
44
+ # determine whether 'cfgs' has several dicts for optimizers
45
+ if all(isinstance(v, dict) for v in cfgs.values()):
46
+ for key, cfg in cfgs.items():
47
+ cfg_ = cfg.copy()
48
+ module = getattr(model, key)
49
+ optimizers[key] = build_optimizer(module, cfg_)
50
+ return optimizers
51
+
52
+ return build_optimizer(model, cfgs)
mogen/datasets/__init__.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .base_dataset import BaseMotionDataset
2
+ from .text_motion_dataset import TextMotionDataset
3
+ from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset
4
+ from .pipelines import Compose
5
+ from .samplers import DistributedSampler
6
+
7
+
8
+ __all__ = [
9
+ 'BaseMotionDataset', 'TextMotionDataset', 'DATASETS', 'PIPELINES', 'build_dataloader',
10
+ 'build_dataset', 'Compose', 'DistributedSampler'
11
+ ]
mogen/datasets/base_dataset.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import copy
3
+ from typing import Optional, Union
4
+
5
+ import numpy as np
6
+ from torch.utils.data import Dataset
7
+
8
+ from .pipelines import Compose
9
+ from .builder import DATASETS
10
+ from mogen.core.evaluation import build_evaluator
11
+
12
+
13
+ @DATASETS.register_module()
14
+ class BaseMotionDataset(Dataset):
15
+ """Base motion dataset.
16
+ Args:
17
+ data_prefix (str): the prefix of data path.
18
+ pipeline (list): a list of dict, where each element represents
19
+ a operation defined in `mogen.datasets.pipelines`.
20
+ ann_file (str | None, optional): the annotation file. When ann_file is
21
+ str, the subclass is expected to read from the ann_file. When
22
+ ann_file is None, the subclass is expected to read according
23
+ to data_prefix.
24
+ test_mode (bool): in train mode or test mode. Default: None.
25
+ dataset_name (str | None, optional): the name of dataset. It is used
26
+ to identify the type of evaluation metric. Default: None.
27
+ """
28
+
29
+ def __init__(self,
30
+ data_prefix: str,
31
+ pipeline: list,
32
+ dataset_name: Optional[Union[str, None]] = None,
33
+ fixed_length: Optional[Union[int, None]] = None,
34
+ ann_file: Optional[Union[str, None]] = None,
35
+ motion_dir: Optional[Union[str, None]] = None,
36
+ eval_cfg: Optional[Union[dict, None]] = None,
37
+ test_mode: Optional[bool] = False):
38
+ super(BaseMotionDataset, self).__init__()
39
+
40
+ self.data_prefix = data_prefix
41
+ self.pipeline = Compose(pipeline)
42
+ self.dataset_name = dataset_name
43
+ self.fixed_length = fixed_length
44
+ self.ann_file = os.path.join(data_prefix, 'datasets', dataset_name, ann_file)
45
+ self.motion_dir = os.path.join(data_prefix, 'datasets', dataset_name, motion_dir)
46
+ self.eval_cfg = copy.deepcopy(eval_cfg)
47
+ self.test_mode = test_mode
48
+
49
+ self.load_annotations()
50
+ if self.test_mode:
51
+ self.prepare_evaluation()
52
+
53
+ def load_anno(self, name):
54
+ motion_path = os.path.join(self.motion_dir, name + '.npy')
55
+ motion_data = np.load(motion_path)
56
+ return {'motion': motion_data}
57
+
58
+
59
+ def load_annotations(self):
60
+ """Load annotations from ``ann_file`` to ``data_infos``"""
61
+ self.data_infos = []
62
+ for line in open(self.ann_file, 'r').readlines():
63
+ line = line.strip()
64
+ self.data_infos.append(self.load_anno(line))
65
+
66
+
67
+ def prepare_data(self, idx: int):
68
+ """"Prepare raw data for the f'{idx'}-th data."""
69
+ results = copy.deepcopy(self.data_infos[idx])
70
+ results['dataset_name'] = self.dataset_name
71
+ results['sample_idx'] = idx
72
+ return self.pipeline(results)
73
+
74
+ def __len__(self):
75
+ """Return the length of current dataset."""
76
+ if self.test_mode:
77
+ return len(self.eval_indexes)
78
+ elif self.fixed_length is not None:
79
+ return self.fixed_length
80
+ return len(self.data_infos)
81
+
82
+ def __getitem__(self, idx: int):
83
+ """Prepare data for the ``idx``-th data.
84
+ As for video dataset, we can first parse raw data for each frame. Then
85
+ we combine annotations from all frames. This interface is used to
86
+ simplify the logic of video dataset and other special datasets.
87
+ """
88
+ if self.test_mode:
89
+ idx = self.eval_indexes[idx]
90
+ elif self.fixed_length is not None:
91
+ idx = idx % len(self.data_infos)
92
+ return self.prepare_data(idx)
93
+
94
+ def prepare_evaluation(self):
95
+ self.evaluators = []
96
+ self.eval_indexes = []
97
+ for _ in range(self.eval_cfg['replication_times']):
98
+ eval_indexes = np.arange(len(self.data_infos))
99
+ if self.eval_cfg.get('shuffle_indexes', False):
100
+ np.random.shuffle(eval_indexes)
101
+ self.eval_indexes.append(eval_indexes)
102
+ for metric in self.eval_cfg['metrics']:
103
+ evaluator, self.eval_indexes = build_evaluator(
104
+ metric, self.eval_cfg, len(self.data_infos), self.eval_indexes)
105
+ self.evaluators.append(evaluator)
106
+
107
+ self.eval_indexes = np.concatenate(self.eval_indexes)
108
+
109
+ def evaluate(self, results, work_dir, logger=None):
110
+ metrics = {}
111
+ device = results[0]['motion'].device
112
+ for evaluator in self.evaluators:
113
+ evaluator.to_device(device)
114
+ metrics.update(evaluator.evaluate(results))
115
+ if logger is not None:
116
+ logger.info(metrics)
117
+ return metrics
mogen/datasets/builder.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import platform
2
+ import random
3
+ from functools import partial
4
+ from typing import Optional, Union
5
+
6
+ import numpy as np
7
+ from mmcv.parallel import collate
8
+ from mmcv.runner import get_dist_info
9
+ from mmcv.utils import Registry, build_from_cfg
10
+ from torch.utils.data import DataLoader
11
+ from torch.utils.data.dataset import Dataset
12
+
13
+ from .samplers import DistributedSampler
14
+
15
+ if platform.system() != 'Windows':
16
+ # https://github.com/pytorch/pytorch/issues/973
17
+ import resource
18
+ rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
19
+ base_soft_limit = rlimit[0]
20
+ hard_limit = rlimit[1]
21
+ soft_limit = min(max(4096, base_soft_limit), hard_limit)
22
+ resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit))
23
+
24
+ DATASETS = Registry('dataset')
25
+ PIPELINES = Registry('pipeline')
26
+
27
+
28
+ def build_dataset(cfg: Union[dict, list, tuple],
29
+ default_args: Optional[Union[dict, None]] = None):
30
+ """"Build dataset by the given config."""
31
+ from .dataset_wrappers import (
32
+ ConcatDataset,
33
+ RepeatDataset,
34
+ )
35
+ if isinstance(cfg, (list, tuple)):
36
+ dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg])
37
+ elif cfg['type'] == 'RepeatDataset':
38
+ dataset = RepeatDataset(
39
+ build_dataset(cfg['dataset'], default_args), cfg['times'])
40
+ else:
41
+ dataset = build_from_cfg(cfg, DATASETS, default_args)
42
+
43
+ return dataset
44
+
45
+
46
+ def build_dataloader(dataset: Dataset,
47
+ samples_per_gpu: int,
48
+ workers_per_gpu: int,
49
+ num_gpus: Optional[int] = 1,
50
+ dist: Optional[bool] = True,
51
+ shuffle: Optional[bool] = True,
52
+ round_up: Optional[bool] = True,
53
+ seed: Optional[Union[int, None]] = None,
54
+ persistent_workers: Optional[bool] = True,
55
+ **kwargs):
56
+ """Build PyTorch DataLoader.
57
+ In distributed training, each GPU/process has a dataloader.
58
+ In non-distributed training, there is only one dataloader for all GPUs.
59
+ Args:
60
+ dataset (:obj:`Dataset`): A PyTorch dataset.
61
+ samples_per_gpu (int): Number of training samples on each GPU, i.e.,
62
+ batch size of each GPU.
63
+ workers_per_gpu (int): How many subprocesses to use for data loading
64
+ for each GPU.
65
+ num_gpus (int, optional): Number of GPUs. Only used in non-distributed
66
+ training.
67
+ dist (bool, optional): Distributed training/test or not. Default: True.
68
+ shuffle (bool, optional): Whether to shuffle the data at every epoch.
69
+ Default: True.
70
+ round_up (bool, optional): Whether to round up the length of dataset by
71
+ adding extra samples to make it evenly divisible. Default: True.
72
+ kwargs: any keyword argument to be used to initialize DataLoader
73
+ Returns:
74
+ DataLoader: A PyTorch dataloader.
75
+ """
76
+ rank, world_size = get_dist_info()
77
+ if dist:
78
+ sampler = DistributedSampler(
79
+ dataset, world_size, rank, shuffle=shuffle, round_up=round_up)
80
+ shuffle = False
81
+ batch_size = samples_per_gpu
82
+ num_workers = workers_per_gpu
83
+ else:
84
+ sampler = None
85
+ batch_size = num_gpus * samples_per_gpu
86
+ num_workers = num_gpus * workers_per_gpu
87
+
88
+ init_fn = partial(
89
+ worker_init_fn, num_workers=num_workers, rank=rank,
90
+ seed=seed) if seed is not None else None
91
+
92
+ data_loader = DataLoader(
93
+ dataset,
94
+ batch_size=batch_size,
95
+ sampler=sampler,
96
+ num_workers=num_workers,
97
+ collate_fn=partial(collate, samples_per_gpu=samples_per_gpu),
98
+ pin_memory=False,
99
+ shuffle=shuffle,
100
+ worker_init_fn=init_fn,
101
+ persistent_workers=persistent_workers,
102
+ **kwargs)
103
+
104
+ return data_loader
105
+
106
+
107
+ def worker_init_fn(worker_id: int, num_workers: int, rank: int, seed: int):
108
+ """Init random seed for each worker."""
109
+ # The seed of each worker equals to
110
+ # num_worker * rank + worker_id + user_seed
111
+ worker_seed = num_workers * rank + worker_id + seed
112
+ np.random.seed(worker_seed)
113
+ random.seed(worker_seed)
mogen/datasets/dataset_wrappers.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
2
+ from torch.utils.data.dataset import Dataset
3
+
4
+ from .builder import DATASETS
5
+
6
+
7
+ @DATASETS.register_module()
8
+ class ConcatDataset(_ConcatDataset):
9
+ """A wrapper of concatenated dataset.
10
+ Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but
11
+ add `get_cat_ids` function.
12
+ Args:
13
+ datasets (list[:obj:`Dataset`]): A list of datasets.
14
+ """
15
+
16
+ def __init__(self, datasets: list):
17
+ super(ConcatDataset, self).__init__(datasets)
18
+
19
+
20
+ @DATASETS.register_module()
21
+ class RepeatDataset(object):
22
+ """A wrapper of repeated dataset.
23
+ The length of repeated dataset will be `times` larger than the original
24
+ dataset. This is useful when the data loading time is long but the dataset
25
+ is small. Using RepeatDataset can reduce the data loading time between
26
+ epochs.
27
+ Args:
28
+ dataset (:obj:`Dataset`): The dataset to be repeated.
29
+ times (int): Repeat times.
30
+ """
31
+
32
+ def __init__(self, dataset: Dataset, times: int):
33
+ self.dataset = dataset
34
+ self.times = times
35
+
36
+ self._ori_len = len(self.dataset)
37
+
38
+ def __getitem__(self, idx: int):
39
+ return self.dataset[idx % self._ori_len]
40
+
41
+ def __len__(self):
42
+ return self.times * self._ori_len
mogen/datasets/pipelines/__init__.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .compose import Compose
2
+ from .formatting import (
3
+ to_tensor,
4
+ ToTensor,
5
+ Transpose,
6
+ Collect,
7
+ WrapFieldsToLists
8
+ )
9
+ from .transforms import (
10
+ Crop,
11
+ RandomCrop,
12
+ Normalize
13
+ )
14
+
15
+ __all__ = [
16
+ 'Compose', 'to_tensor', 'Transpose', 'Collect', 'WrapFieldsToLists', 'ToTensor',
17
+ 'Crop', 'RandomCrop', 'Normalize'
18
+ ]
mogen/datasets/pipelines/compose.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections.abc import Sequence
2
+
3
+ from mmcv.utils import build_from_cfg
4
+
5
+ from ..builder import PIPELINES
6
+
7
+
8
+ @PIPELINES.register_module()
9
+ class Compose(object):
10
+ """Compose a data pipeline with a sequence of transforms.
11
+
12
+ Args:
13
+ transforms (list[dict | callable]):
14
+ Either config dicts of transforms or transform objects.
15
+ """
16
+
17
+ def __init__(self, transforms):
18
+ assert isinstance(transforms, Sequence)
19
+ self.transforms = []
20
+ for transform in transforms:
21
+ if isinstance(transform, dict):
22
+ transform = build_from_cfg(transform, PIPELINES)
23
+ self.transforms.append(transform)
24
+ elif callable(transform):
25
+ self.transforms.append(transform)
26
+ else:
27
+ raise TypeError('transform must be callable or a dict, but got'
28
+ f' {type(transform)}')
29
+
30
+ def __call__(self, data):
31
+ for t in self.transforms:
32
+ data = t(data)
33
+ if data is None:
34
+ return None
35
+ return data
36
+
37
+ def __repr__(self):
38
+ format_string = self.__class__.__name__ + '('
39
+ for t in self.transforms:
40
+ format_string += f'\n {t}'
41
+ format_string += '\n)'
42
+ return format_string
mogen/datasets/pipelines/formatting.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections.abc import Sequence
2
+
3
+ import mmcv
4
+ import numpy as np
5
+ import torch
6
+ from mmcv.parallel import DataContainer as DC
7
+ from PIL import Image
8
+
9
+ from ..builder import PIPELINES
10
+
11
+
12
+ def to_tensor(data):
13
+ """Convert objects of various python types to :obj:`torch.Tensor`.
14
+
15
+ Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`,
16
+ :class:`Sequence`, :class:`int` and :class:`float`.
17
+ """
18
+ if isinstance(data, torch.Tensor):
19
+ return data
20
+ elif isinstance(data, np.ndarray):
21
+ return torch.from_numpy(data)
22
+ elif isinstance(data, Sequence) and not mmcv.is_str(data):
23
+ return torch.tensor(data)
24
+ elif isinstance(data, int):
25
+ return torch.LongTensor([data])
26
+ elif isinstance(data, float):
27
+ return torch.FloatTensor([data])
28
+ else:
29
+ raise TypeError(
30
+ f'Type {type(data)} cannot be converted to tensor.'
31
+ 'Supported types are: `numpy.ndarray`, `torch.Tensor`, '
32
+ '`Sequence`, `int` and `float`')
33
+
34
+
35
+ @PIPELINES.register_module()
36
+ class ToTensor(object):
37
+
38
+ def __init__(self, keys):
39
+ self.keys = keys
40
+
41
+ def __call__(self, results):
42
+ for key in self.keys:
43
+ results[key] = to_tensor(results[key])
44
+ return results
45
+
46
+ def __repr__(self):
47
+ return self.__class__.__name__ + f'(keys={self.keys})'
48
+
49
+
50
+ @PIPELINES.register_module()
51
+ class Transpose(object):
52
+
53
+ def __init__(self, keys, order):
54
+ self.keys = keys
55
+ self.order = order
56
+
57
+ def __call__(self, results):
58
+ for key in self.keys:
59
+ results[key] = results[key].transpose(self.order)
60
+ return results
61
+
62
+ def __repr__(self):
63
+ return self.__class__.__name__ + \
64
+ f'(keys={self.keys}, order={self.order})'
65
+
66
+
67
+ @PIPELINES.register_module()
68
+ class Collect(object):
69
+ """Collect data from the loader relevant to the specific task.
70
+
71
+ This is usually the last stage of the data loader pipeline.
72
+
73
+ Args:
74
+ keys (Sequence[str]): Keys of results to be collected in ``data``.
75
+ meta_keys (Sequence[str], optional): Meta keys to be converted to
76
+ ``mmcv.DataContainer`` and collected in ``data[motion_metas]``.
77
+ Default: ``('filename', 'ori_filename', 'ori_shape', 'motion_shape', 'motion_mask')``
78
+
79
+ Returns:
80
+ dict: The result dict contains the following keys
81
+ - keys in``self.keys``
82
+ - ``motion_metas`` if available
83
+ """
84
+
85
+ def __init__(self,
86
+ keys,
87
+ meta_keys=('filename', 'ori_filename', 'ori_shape', 'motion_shape', 'motion_mask')):
88
+ self.keys = keys
89
+ self.meta_keys = meta_keys
90
+
91
+ def __call__(self, results):
92
+ data = {}
93
+ motion_meta = {}
94
+ for key in self.meta_keys:
95
+ if key in results:
96
+ motion_meta[key] = results[key]
97
+ data['motion_metas'] = DC(motion_meta, cpu_only=True)
98
+ for key in self.keys:
99
+ data[key] = results[key]
100
+ return data
101
+
102
+ def __repr__(self):
103
+ return self.__class__.__name__ + \
104
+ f'(keys={self.keys}, meta_keys={self.meta_keys})'
105
+
106
+
107
+ @PIPELINES.register_module()
108
+ class WrapFieldsToLists(object):
109
+ """Wrap fields of the data dictionary into lists for evaluation.
110
+
111
+ This class can be used as a last step of a test or validation
112
+ pipeline for single image evaluation or inference.
113
+
114
+ Example:
115
+ >>> test_pipeline = [
116
+ >>> dict(type='LoadImageFromFile'),
117
+ >>> dict(type='Normalize',
118
+ mean=[123.675, 116.28, 103.53],
119
+ std=[58.395, 57.12, 57.375],
120
+ to_rgb=True),
121
+ >>> dict(type='ImageToTensor', keys=['img']),
122
+ >>> dict(type='Collect', keys=['img']),
123
+ >>> dict(type='WrapIntoLists')
124
+ >>> ]
125
+ """
126
+
127
+ def __call__(self, results):
128
+ # Wrap dict fields into lists
129
+ for key, val in results.items():
130
+ results[key] = [val]
131
+ return results
132
+
133
+ def __repr__(self):
134
+ return f'{self.__class__.__name__}()'
mogen/datasets/pipelines/transforms.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import random
3
+
4
+ import mmcv
5
+ import numpy as np
6
+
7
+ from ..builder import PIPELINES
8
+ import torch
9
+ from typing import Optional, Tuple, Union
10
+
11
+
12
+ @PIPELINES.register_module()
13
+ class Crop(object):
14
+ r"""Crop motion sequences.
15
+
16
+ Args:
17
+ crop_size (int): The size of the cropped motion sequence.
18
+ """
19
+ def __init__(self,
20
+ crop_size: Optional[Union[int, None]] = None):
21
+ self.crop_size = crop_size
22
+ assert self.crop_size is not None
23
+
24
+ def __call__(self, results):
25
+ motion = results['motion']
26
+ length = len(motion)
27
+ if length >= self.crop_size:
28
+ idx = random.randint(0, length - self.crop_size)
29
+ motion = motion[idx: idx + self.crop_size]
30
+ results['motion_length'] = self.crop_size
31
+ else:
32
+ padding_length = self.crop_size - length
33
+ D = motion.shape[1:]
34
+ padding_zeros = np.zeros((padding_length, *D), dtype=np.float32)
35
+ motion = np.concatenate([motion, padding_zeros], axis=0)
36
+ results['motion_length'] = length
37
+ assert len(motion) == self.crop_size
38
+ results['motion'] = motion
39
+ results['motion_shape'] = motion.shape
40
+ if length >= self.crop_size:
41
+ results['motion_mask'] = torch.ones(self.crop_size).numpy()
42
+ else:
43
+ results['motion_mask'] = torch.cat(
44
+ (torch.ones(length), torch.zeros(self.crop_size - length))).numpy()
45
+ return results
46
+
47
+
48
+ def __repr__(self):
49
+ repr_str = self.__class__.__name__ + f'(crop_size={self.crop_size})'
50
+ return repr_str
51
+
52
+ @PIPELINES.register_module()
53
+ class RandomCrop(object):
54
+ r"""Random crop motion sequences. Each sequence will be padded with zeros to the maximum length.
55
+
56
+ Args:
57
+ min_size (int or None): The minimum size of the cropped motion sequence (inclusive).
58
+ max_size (int or None): The maximum size of the cropped motion sequence (inclusive).
59
+ """
60
+ def __init__(self,
61
+ min_size: Optional[Union[int, None]] = None,
62
+ max_size: Optional[Union[int, None]] = None):
63
+ self.min_size = min_size
64
+ self.max_size = max_size
65
+ assert self.min_size is not None
66
+ assert self.max_size is not None
67
+
68
+ def __call__(self, results):
69
+ motion = results['motion']
70
+ length = len(motion)
71
+ crop_size = random.randint(self.min_size, self.max_size)
72
+ if length > crop_size:
73
+ idx = random.randint(0, length - crop_size)
74
+ motion = motion[idx: idx + crop_size]
75
+ results['motion_length'] = crop_size
76
+ else:
77
+ results['motion_length'] = length
78
+ padding_length = self.max_size - min(crop_size, length)
79
+ if padding_length > 0:
80
+ D = motion.shape[1:]
81
+ padding_zeros = np.zeros((padding_length, *D), dtype=np.float32)
82
+ motion = np.concatenate([motion, padding_zeros], axis=0)
83
+ results['motion'] = motion
84
+ results['motion_shape'] = motion.shape
85
+ if length >= self.max_size and crop_size == self.max_size:
86
+ results['motion_mask'] = torch.ones(self.max_size).numpy()
87
+ else:
88
+ results['motion_mask'] = torch.cat((
89
+ torch.ones(min(length, crop_size)),
90
+ torch.zeros(self.max_size - min(length, crop_size))), dim=0).numpy()
91
+ assert len(motion) == self.max_size
92
+ return results
93
+
94
+
95
+ def __repr__(self):
96
+ repr_str = self.__class__.__name__ + f'(min_size={self.min_size}'
97
+ repr_str += f', max_size={self.max_size})'
98
+ return repr_str
99
+
100
+ @PIPELINES.register_module()
101
+ class Normalize(object):
102
+ """Normalize motion sequences.
103
+
104
+ Args:
105
+ mean_path (str): Path of mean file.
106
+ std_path (str): Path of std file.
107
+ """
108
+
109
+ def __init__(self, mean_path, std_path, eps=1e-9):
110
+ self.mean = np.load(mean_path)
111
+ self.std = np.load(std_path)
112
+ self.eps = eps
113
+
114
+ def __call__(self, results):
115
+ motion = results['motion']
116
+ motion = (motion - self.mean) / (self.std + self.eps)
117
+ results['motion'] = motion
118
+ results['motion_norm_mean'] = self.mean
119
+ results['motion_norm_std'] = self.std
120
+ return results
mogen/datasets/samplers/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .distributed_sampler import DistributedSampler
2
+
3
+ __all__ = ['DistributedSampler']
mogen/datasets/samplers/distributed_sampler.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.utils.data import DistributedSampler as _DistributedSampler
3
+
4
+
5
+ class DistributedSampler(_DistributedSampler):
6
+
7
+ def __init__(self,
8
+ dataset,
9
+ num_replicas=None,
10
+ rank=None,
11
+ shuffle=True,
12
+ round_up=True):
13
+ super().__init__(dataset, num_replicas=num_replicas, rank=rank)
14
+ self.shuffle = shuffle
15
+ self.round_up = round_up
16
+ if self.round_up:
17
+ self.total_size = self.num_samples * self.num_replicas
18
+ else:
19
+ self.total_size = len(self.dataset)
20
+
21
+ def __iter__(self):
22
+ # deterministically shuffle based on epoch
23
+ if self.shuffle:
24
+ g = torch.Generator()
25
+ g.manual_seed(self.epoch)
26
+ indices = torch.randperm(len(self.dataset), generator=g).tolist()
27
+ else:
28
+ indices = torch.arange(len(self.dataset)).tolist()
29
+
30
+ # add extra samples to make it evenly divisible
31
+ if self.round_up:
32
+ indices = (
33
+ indices *
34
+ int(self.total_size / len(indices) + 1))[:self.total_size]
35
+ assert len(indices) == self.total_size
36
+
37
+ # subsample
38
+ indices = indices[self.rank:self.total_size:self.num_replicas]
39
+ if self.round_up:
40
+ assert len(indices) == self.num_samples
41
+
42
+ return iter(indices)
mogen/datasets/text_motion_dataset.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import os.path
4
+ from abc import ABCMeta
5
+ from collections import OrderedDict
6
+ from typing import Any, List, Optional, Union
7
+
8
+ import mmcv
9
+ import copy
10
+ import numpy as np
11
+ import torch
12
+ import torch.distributed as dist
13
+ from mmcv.runner import get_dist_info
14
+
15
+ from .base_dataset import BaseMotionDataset
16
+ from .builder import DATASETS
17
+
18
+
19
+ @DATASETS.register_module()
20
+ class TextMotionDataset(BaseMotionDataset):
21
+ """TextMotion dataset.
22
+
23
+ Args:
24
+ text_dir (str): Path to the directory containing the text files.
25
+ """
26
+ def __init__(self,
27
+ data_prefix: str,
28
+ pipeline: list,
29
+ dataset_name: Optional[Union[str, None]] = None,
30
+ fixed_length: Optional[Union[int, None]] = None,
31
+ ann_file: Optional[Union[str, None]] = None,
32
+ motion_dir: Optional[Union[str, None]] = None,
33
+ text_dir: Optional[Union[str, None]] = None,
34
+ token_dir: Optional[Union[str, None]] = None,
35
+ clip_feat_dir: Optional[Union[str, None]] = None,
36
+ eval_cfg: Optional[Union[dict, None]] = None,
37
+ fine_mode: Optional[bool] = False,
38
+ test_mode: Optional[bool] = False):
39
+ self.text_dir = os.path.join(data_prefix, 'datasets', dataset_name, text_dir)
40
+ if token_dir is not None:
41
+ self.token_dir = os.path.join(data_prefix, 'datasets', dataset_name, token_dir)
42
+ else:
43
+ self.token_dir = None
44
+ if clip_feat_dir is not None:
45
+ self.clip_feat_dir = os.path.join(data_prefix, 'datasets', dataset_name, clip_feat_dir)
46
+ else:
47
+ self.clip_feat_dir = None
48
+ self.fine_mode = fine_mode
49
+ super(TextMotionDataset, self).__init__(
50
+ data_prefix=data_prefix,
51
+ pipeline=pipeline,
52
+ dataset_name=dataset_name,
53
+ fixed_length=fixed_length,
54
+ ann_file=ann_file,
55
+ motion_dir=motion_dir,
56
+ eval_cfg=eval_cfg,
57
+ test_mode=test_mode)
58
+
59
+ def load_anno(self, name):
60
+ results = super().load_anno(name)
61
+ text_path = os.path.join(self.text_dir, name + '.txt')
62
+ text_data = []
63
+ for line in open(text_path, 'r'):
64
+ text_data.append(line.strip())
65
+ results['text'] = text_data
66
+ if self.token_dir is not None:
67
+ token_path = os.path.join(self.token_dir, name + '.txt')
68
+ token_data = []
69
+ for line in open(token_path, 'r'):
70
+ token_data.append(line.strip())
71
+ results['token'] = token_data
72
+ if self.clip_feat_dir is not None:
73
+ clip_feat_path = os.path.join(self.clip_feat_dir, name + '.npy')
74
+ clip_feat = torch.from_numpy(np.load(clip_feat_path))
75
+ results['clip_feat'] = clip_feat
76
+ return results
77
+
78
+ def prepare_data(self, idx: int):
79
+ """"Prepare raw data for the f'{idx'}-th data."""
80
+ results = copy.deepcopy(self.data_infos[idx])
81
+ text_list = results['text']
82
+ idx = np.random.randint(0, len(text_list))
83
+ if self.fine_mode:
84
+ results['text'] = json.loads(text_list[idx])
85
+ else:
86
+ results['text'] = text_list[idx]
87
+ if 'clip_feat' in results.keys():
88
+ results['clip_feat'] = results['clip_feat'][idx]
89
+ if 'token' in results.keys():
90
+ results['token'] = results['token'][idx]
91
+ results['dataset_name'] = self.dataset_name
92
+ results['sample_idx'] = idx
93
+ return self.pipeline(results)
mogen/models/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from .architectures import *
2
+ from .losses import *
3
+ from .rnns import *
4
+ from .transformers import *
5
+ from .attentions import *
6
+ from .builder import *
7
+ from .utils import *