# Copied from rut5compressed/util.py of rut5compressed repository. import logging import re from functools import wraps from re import Pattern from typing import Callable, Dict, Optional, Tuple import numpy as np import torch as T from .modules import SVDCompressedLinear def map_module(root: T.nn.Module, func: Callable[[T.nn.Module, str], T.nn.Module], patt: Optional[str] = None) -> T.nn.Module: """Function ``map_module`` applies a function to each leaf of module tree which matches to a specified pattern. Parameters ---------- root : torch.nn.Module Module to modify. func : callable Function to be applied to every module (or matched to pattern) in module tree. patt : str, optional Pattern to filter modules by path in module tree. Returns ------- torch.nn.Module Module modified in-place. """ @wraps(func) def func_safe(*args, **kwargs): node = func(*args, **kwargs) if not isinstance(node, T.nn.Module): raise ValueError('Mapped result must be toch.nn.Module type ' f'but given {type(node)}.') return node return _map_module(root, func_safe, re.compile(patt or r'.*'), '') def _map_module(root: T.nn.Module, func: Callable[[T.nn.Module, str], T.nn.Module], patt: Pattern, path: str) -> T.nn.Module: for name, child in root.named_children(): node = _map_module(child, func, patt, f'{path}/{name}') if node != child: setattr(root, name, node) if patt.match(path or '/'): root = func(root, path or '/') return root def convert_linear(module: T.nn.Linear, ctor, **kwargs) -> T.nn.Module: """Function convert_linear takes module and returns linear module with approximate matmul. Non-linear modules are returned intact. """ if not isinstance(module, T.nn.Linear): return module raise NotImplementedError def numel(module: T.nn.Module): value = sum(x.numel() for x in module.parameters()) + \ sum(x.numel() for x in module.buffers()) def account_prunned(module: T.nn.Module, path: str): nonlocal value for name, attr in vars(module).items(): if not name.endswith('_mask') or not isinstance(attr, T.Tensor): continue weight_name = name[:-5] if not hasattr(module, weight_name): continue weight = getattr(module, weight_name) value -= weight.numel() - attr.sum() value += attr.numel() return module def account_quantized(module: T.nn.Module, path: str): nonlocal value if isinstance(module, T.nn.quantized.Linear): value += module.weight().numel() if module.bias() is not None: value += module.bias().numel() return module def account_rest(module: T.nn.Module, path: str): account_prunned(module, path) account_quantized(module, path) return module map_module(module, account_rest) return value def sizeof(module: T.nn.Module): value = sum(x.numel() * x.element_size() for x in module.parameters()) + \ sum(x.numel() * x.element_size() for x in module.buffers()) def account_prunned(module: T.nn.Module, path: str): nonlocal value for name, attr in vars(module).items(): if not name.endswith('_mask') or not isinstance(attr, T.Tensor): continue weight_name = name[:-5] if not hasattr(module, weight_name): continue weight = getattr(module, weight_name) value -= (weight.numel() - attr.sum()) * weight.element_size() value += attr.numel() * attr.element_size() return module def account_quantized(module: T.nn.Module, path: str): nonlocal value if isinstance(module, T.nn.quantized.Linear): value += module.weight().numel() * module.weight().element_size() if (bias := module.bias()) is not None: value += bias.numel() * bias.element_size() return module def account_rest(module: T.nn.Module, path: str): account_prunned(module, path) account_quantized(module, path) return module map_module(module, account_rest) return value def flatten_module(module: T.nn.Module, regexp=None) -> Dict[str, T.nn.Module]: modules = {} map_module(module, lambda x, y: modules.update(**{y: x}) or x, regexp) return modules def print_flatten(module: T.nn.Module): paths = [] path_len = 0 names = [] name_len = 0 indx_len = 0 def func(module, path): nonlocal path_len, name_len, indx_len paths.append(path) path_len = max(path_len, len(path)) name = module.__class__.__name__ names.append(name) name_len = max(name_len, len(name)) indx_len += 1 return module map_module(module, func) indx_len = int(np.ceil(np.log10(indx_len))) fmt = f'{{indx:>{indx_len}s}} {{path:{path_len}s}} {{name:{name_len}s}}' print(fmt.format(indx='#', path='Path', name='Layer')) print('-' * (indx_len + path_len + name_len + 2)) for i, (path, name) in enumerate(zip(paths, names)): print(fmt.format(indx=str(i), path=path, name=name)) def compress_linear_svd(module: T.nn.Module, path: str, rank: Optional[int] = None) -> T.nn.Module: if not isinstance(module, T.nn.Linear): return module # Do not factorize if ranks equals to the size of the # smallest dimension. norows, nocols = module.weight.shape if rank == min(norows, nocols): return module # If there is no rank, then choose rank to be equal point when the number # of elements in original matrix is approximately equal to the number of # elements in SVD factors. if rank is None: ratio = norows * nocols / (norows + nocols) rank = int(np.floor(ratio)) return SVDCompressedLinear.from_linear(module, rank) def compress_linear_tt(module: T.nn.Module, path: str, shape: Tuple[Tuple[int], Tuple[int]], rank: int) -> T.nn.Module: if not isinstance(module, T.nn.Linear): return module # TODO(@not-found): We need propper compression config. inp_size = np.prod(shape[0]) out_size = np.prod(shape[1]) if inp_size == module.in_features and out_size == module.out_features: pass elif inp_size == module.out_features and out_size == module.in_features: shape = (shape[1], shape[0]) else: raise ValueError( 'Input and output features does not match to compression shape: ' f'{shape[0]} vs {module.in_features} and {shape[1]} vs ' f'{module.out_features}.') logging.info('apply tt compression to layer %s', path) return TTCompressedLinear.from_linear(module, shape, rank) # noqa: F821 def compress(module: T.nn.Module, rank: int) -> T.nn.Module: """Function compress substitutes in-place linear layer of T5 model with linear layer which weight matrix is factorized with SVD. :param module: Model to compress. :param rank: Desired rank of compressed layer. """ return map_module( root=module, func=lambda x, y: compress_linear_svd(x, y, rank), patt=r'.*/DenseReluDense/w.*') # TODO(@not-found): Remove?