Spaces:
Runtime error
Runtime error
#!/usr/bin/env python3 | |
# coding=utf-8 | |
import torch | |
import torch.nn as nn | |
class AutoClip: | |
def __init__(self, parameters, initial_clipping=0.1, percentile=50, history_len=1000): | |
self.parameters = list(parameters) | |
self.grad_history = [torch.full([history_len], initial_clipping) for _ in self.parameters] | |
self.index = 0 | |
self.history_len = history_len | |
self.percentile = percentile | |
def __call__(self): | |
self._add_to_history(self.parameters) | |
grad_norms = [] | |
for parameter, history in zip(self.parameters, self.grad_history): | |
if parameter.grad is None or not parameter.grad.abs().sum().is_nonzero(): | |
continue | |
clip_value = self._get_percentile(history, self.percentile) | |
grad_norms.append(nn.utils.clip_grad_norm_(parameter, clip_value).item()) | |
return sum(grad_norms) / len(grad_norms) | |
def _add_to_history(self, parameters): | |
for i, param in enumerate(parameters): | |
if param.grad is None or not param.grad.abs().sum().is_nonzero(): | |
continue | |
self.grad_history[i][self.index] = param.grad.data.norm(2) | |
self.index = (self.index + 1) % self.history_len | |
def _get_percentile(self, tensor, percentile): | |
k = 1 + round(0.01 * percentile * (tensor.numel() - 1)) | |
return tensor.kthvalue(k).values.item() | |