File size: 1,440 Bytes
74b17e0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import torch
import torch.nn as nn
from . import register_connector
from .base import Connector
class MoFMLP(nn.Module):
def __init__(self, config):
super().__init__()
modules_clip = [nn.Linear(config.vision_hidden_size, config.hidden_size),
nn.GELU(),
nn.Linear(config.hidden_size, config.hidden_size)
]
modules_dinov2 = [nn.Linear(config.vision_hidden_size, config.hidden_size),
nn.GELU(),
nn.Linear(config.hidden_size, config.hidden_size)
]
self.clip = nn.Sequential(*modules_clip)
self.dinov2 = nn.Sequential(*modules_dinov2)
def forward(self, x):
image_features_clip = self.clip(x[0])
image_features_dinov2 = self.dinov2(x[1])
bs = image_features_clip.size(0)
total_len = image_features_clip.size(1)+image_features_dinov2.size(1)
dim = image_features_clip.size(-1)
merged_features = torch.empty(bs, total_len, dim).to(device=x[0].device, dtype=x[0].dtype)
merged_features[:,0::2] = image_features_clip
merged_features[:,1::2] = image_features_dinov2
return merged_features
@register_connector('mof_mlp')
class MoFMLPConnector(Connector):
def __init__(self, config):
super().__init__()
self._connector = MoFMLP(config)
|