Spaces:
Sleeping
Sleeping
File size: 3,138 Bytes
35c1cfd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
# MIT License
#
# Copyright 2023 ByteDance Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”),
# to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
import torch
from torch import nn, einsum
from einops import rearrange
class RandomProjectionQuantizer(nn.Module):
"""
Random projection and codebook lookup module
Some code is borrowed from:
https://github.com/lucidrains/vector-quantize-pytorch/blob/master/vector_quantize_pytorch/random_projection_quantizer.py
But I did normalization using pre-computed global mean & variance instead of using layer norm.
"""
def __init__(
self,
input_dim,
codebook_dim,
codebook_size,
seed=142,
):
super().__init__()
# random seed
torch.manual_seed(seed)
# randomly initialized projection
random_projection = torch.empty(input_dim, codebook_dim)
nn.init.xavier_normal_(random_projection)
self.register_buffer("random_projection", random_projection)
# randomly initialized codebook
codebook = torch.empty(codebook_size, codebook_dim)
nn.init.normal_(codebook)
self.register_buffer("codebook", codebook)
def codebook_lookup(self, x):
# reshape
b = x.shape[0]
x = rearrange(x, "b n e -> (b n) e")
# L2 normalization
normalized_x = nn.functional.normalize(x, dim=1, p=2)
normalized_codebook = nn.functional.normalize(self.codebook, dim=1, p=2)
# compute distances
distances = torch.cdist(normalized_codebook, normalized_x)
# get nearest
nearest_indices = torch.argmin(distances, dim=0)
# reshape
xq = rearrange(nearest_indices, "(b n) -> b n", b=b)
return xq
@torch.no_grad()
def forward(self, x):
# always eval
self.eval()
# random projection [batch, length, input_dim] -> [batch, length, codebook_dim]
x = einsum("b n d, d e -> b n e", x, self.random_projection)
# codebook lookup
xq = self.codebook_lookup(x)
return xq
|