Spaces:

bala1802
/

clip_demo

Sleeping

bala1802 commited on Dec 21, 2023

Commit

6a3434a

1 Parent(s): 9d4e40b

Upload 3 files

Files changed (3) hide show

image_encoder.py ADDED Viewed

+from torch import nn
+import timm
+from configuration import CFG
+class ImageEncoder(nn.Module):
+    """
+    Encode images to a fixed size vector
+    """
+    def __init__(
+        self, model_name=CFG.model_name, pretrained=CFG.pretrained, trainable=CFG.trainable
+    ):
+        super().__init__()
+        self.model = timm.create_model(
+            model_name, pretrained, num_classes=0, global_pool="avg"
+        )
+        for p in self.model.parameters():
+            p.requires_grad = trainable
+    def forward(self, x):
+        return self.model(x)

projection_head.py ADDED Viewed

+from torch import nn
+from configuration import CFG
+class ProjectionHead(nn.Module):
+    def __init__(
+        self,
+        embedding_dim,
+        projection_dim=CFG.projection_dim,
+        dropout=CFG.dropout
+    ):
+        super().__init__()
+        self.projection = nn.Linear(embedding_dim, projection_dim)
+        self.gelu = nn.GELU()
+        self.fc = nn.Linear(projection_dim, projection_dim)
+        self.dropout = nn.Dropout(dropout)
+        self.layer_norm = nn.LayerNorm(projection_dim)
+    def forward(self, x):
+        projected = self.projection(x)
+        x = self.gelu(projected)
+        x = self.fc(x)
+        x = self.dropout(x)
+        x = x + projected
+        x = self.layer_norm(x)
+        return x

text_encoder.py ADDED Viewed

+from torch import nn
+from configuration import CFG
+from transformers import DistilBertModel, DistilBertConfig, DistilBertTokenizer
+class TextEncoder(nn.Module):
+    def __init__(self, model_name=CFG.text_encoder_model, pretrained=CFG.pretrained, trainable=CFG.trainable):
+        super().__init__()
+        if pretrained:
+            self.model = DistilBertModel.from_pretrained(model_name)
+        else:
+            self.model = DistilBertModel(config=DistilBertConfig())
+        for p in self.model.parameters():
+            p.requires_grad = trainable
+        # we are using the CLS token hidden representation as the sentence's embedding
+        self.target_token_idx = 0
+    def forward(self, input_ids, attention_mask):
+        output = self.model(input_ids=input_ids, attention_mask=attention_mask)
+        last_hidden_state = output.last_hidden_state
+        return last_hidden_state[:, self.target_token_idx, :]