Spaces:

qgyd2021
/

cc_denoise

Running

App Files Files Community

HoneyTian commited on Mar 3

Commit

6b7a897

1 Parent(s): 365fc03

update

Browse files

Files changed (2) hide show

examples/nx_clean_unet/yaml/config.yaml +2 -2
toolbox/torchaudio/models/nx_clean_unet/transformer/embedding.py +95 -0

examples/nx_clean_unet/yaml/config.yaml CHANGED Viewed

@@ -12,8 +12,8 @@ down_sampling_hidden_channels: 64
 down_sampling_kernel_size: 4
 down_sampling_stride: 2
-tsfm_hidden_size: 64
-tsfm_attention_heads: 4
 tsfm_num_blocks: 6
 tsfm_dropout_rate: 0.1
 tsfm_max_length: 5120

 down_sampling_kernel_size: 4
 down_sampling_stride: 2
+tsfm_hidden_size: 128
+tsfm_attention_heads: 8
 tsfm_num_blocks: 6
 tsfm_dropout_rate: 0.1
 tsfm_max_length: 5120

toolbox/torchaudio/models/nx_clean_unet/transformer/embedding.py ADDED Viewed

	@@ -0,0 +1,95 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import torch
+import torch.nn as nn
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class RelativeMultiheadAttention(nn.Module):
+    def __init__(self, d_model, num_heads, max_len, dropout=0.1):
+        super(RelativeMultiheadAttention, self).__init__()
+        self.num_heads = num_heads
+        self.d_model = d_model
+        self.head_dim = d_model // num_heads
+        self.scale = self.head_dim ** -0.5
+        self.query_projection = nn.Linear(d_model, d_model)
+        self.key_projection = nn.Linear(d_model, d_model)
+        self.value_projection = nn.Linear(d_model, d_model)
+        self.output_projection = nn.Linear(d_model, d_model)
+        self.dropout = nn.Dropout(dropout)
+        # Relative position encoding
+        self.relative_positions_encoding = self.generate_relative_positions_encoding(max_len, self.head_dim)
+    def generate_relative_positions_encoding(self, max_len, head_dim):
+        # Generate relative positions encoding matrix
+        even_index = torch.arange(max_len)[:, None] // torch.pow(10000, torch.arange(0, head_dim, 2) / head_dim)
+        odd_index = torch.arange(max_len)[:, None] // torch.pow(10000, torch.arange(1, head_dim, 2) / head_dim)
+        even_index = torch.sin(even_index)
+        odd_index = torch.cos(odd_index)
+        pos_encoding = torch.zeros(max_len, head_dim)
+        pos_encoding[:, 0::2] = even_index
+        pos_encoding[:, 1::2] = odd_index
+        return pos_encoding
+    def forward(self, query, key, value, mask=None):
+        batch_size = query.size(0)
+        query_len = query.size(1)
+        key_len = key.size(1)
+        # Project queries, keys, and values to multiple heads
+        query = self.query_projection(query).view(batch_size, query_len, self.num_heads, self.head_dim).transpose(1, 2)
+        key = self.key_projection(key).view(batch_size, key_len, self.num_heads, self.head_dim).transpose(1, 2)
+        value = self.value_projection(value).view(batch_size, key_len, self.num_heads, self.head_dim).transpose(1, 2)
+        # Apply relative position encoding
+        relative_keys = self.relative_positions_encoding[:query_len, :].unsqueeze(0).unsqueeze(0).repeat(batch_size, self.num_heads, 1, 1)
+        relative_values = self.relative_positions_encoding[:query_len, :].unsqueeze(0).unsqueeze(0).repeat(batch_size, self.num_heads, 1, 1)
+        # Compute attention scores
+        scores = torch.matmul(query, key.transpose(-2, -1)) * self.scale
+        scores += torch.matmul(query, relative_keys.transpose(-2, -1))
+        if mask is not None:
+            scores = scores.masked_fill(mask == 0, float('-inf'))
+        attn_weights = F.softmax(scores, dim=-1)
+        attn_weights = self.dropout(attn_weights)
+        # Apply attention weights to values
+        output = torch.matmul(attn_weights, value) + torch.matmul(attn_weights, relative_values)
+        output = output.transpose(1, 2).contiguous().view(batch_size, query_len, self.d_model)
+        # Apply output projection
+        output = self.output_projection(output)
+        return output
+def main():
+    # Example usage
+    batch_size = 2
+    query_len = 10
+    key_len = 10
+    d_model = 512
+    num_heads = 8
+    max_len = 100
+    query = torch.rand(batch_size, query_len, d_model)
+    key = torch.rand(batch_size, key_len, d_model)
+    value = torch.rand(batch_size, key_len, d_model)
+    attention = RelativeMultiheadAttention(d_model, num_heads, max_len)
+    output = attention(query, key, value)
+    print(output.shape)  # Output shape should be (batch_size, query_len, d_model)
+    return
+if __name__ == '__main__':
+    main()