import sys from functools import reduce from torch import nn import torch.distributed as dist def summary(model: nn.Module, file=sys.stdout): def repr(model): # We treat the extra repr like the sub-module, one item per line extra_lines = [] extra_repr = model.extra_repr() # empty string will be split into list [''] if extra_repr: extra_lines = extra_repr.split('\n') child_lines = [] total_params = 0 for key, module in model._modules.items(): mod_str, num_params = repr(module) mod_str = nn.modules.module._addindent(mod_str, 2) child_lines.append('(' + key + '): ' + mod_str) total_params += num_params lines = extra_lines + child_lines for name, p in model._parameters.items(): if hasattr(p, 'shape'): total_params += reduce(lambda x, y: x * y, p.shape) main_str = model._get_name() + '(' if lines: # simple one-liner info, which most builtin Modules will use if len(extra_lines) == 1 and not child_lines: main_str += extra_lines[0] else: main_str += '\n ' + '\n '.join(lines) + '\n' main_str += ')' if file is sys.stdout: main_str += ', \033[92m{:,}\033[0m params'.format(total_params) else: main_str += ', {:,} params'.format(total_params) return main_str, total_params string, count = repr(model) if file is not None: if isinstance(file, str): file = open(file, 'w') print(string, file=file) file.flush() return count def grad_norm(model: nn.Module): total_norm = 0 for p in model.parameters(): param_norm = p.grad.data.norm(2) total_norm += param_norm.item() ** 2 return total_norm ** 0.5 def distributed(): return dist.is_available() and dist.is_initialized()