|
{ |
|
"device": "$torch.device(f'cuda:{dist.get_rank()}')", |
|
"network": { |
|
"_target_": "torch.nn.parallel.DistributedDataParallel", |
|
"module": "$@network_def.to(@device)", |
|
"device_ids": [ |
|
"@device" |
|
] |
|
}, |
|
"validate#sampler": { |
|
"_target_": "DistributedSampler", |
|
"dataset": "@validate#dataset", |
|
"even_divisible": false, |
|
"shuffle": false |
|
}, |
|
"validate#dataloader#sampler": "@validate#sampler", |
|
"validate#handlers#1#_disabled_": "$dist.get_rank() > 0", |
|
"evaluating": [ |
|
"$import torch.distributed as dist", |
|
"$dist.init_process_group(backend='nccl')", |
|
"$torch.cuda.set_device(@device)", |
|
"$setattr(torch.backends.cudnn, 'benchmark', True)", |
|
"$import logging", |
|
"$@validate#evaluator.logger.setLevel(logging.WARNING if dist.get_rank() > 0 else logging.INFO)", |
|
"$@validate#evaluator.run()", |
|
"$dist.destroy_process_group()" |
|
] |
|
} |
|
|