uf-aice-lab commited on
Commit
a7e3274
·
verified ·
1 Parent(s): f143732

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +26 -19
README.md CHANGED
@@ -2,38 +2,45 @@
2
  license: mit
3
  ---
4
 
5
- This is the structure of the BLIPNet model. You can load the model with it, or you can create a bigger model for your task.
6
 
7
- class BLIPNet(torch.nn.Module):
8
- def __init__(self, ):
 
 
 
 
 
 
 
 
 
9
  super().__init__()
10
- #Generation Model
11
- self.model = BlipForConditionalGeneration.from_pretrained(MODEL_NAME, cache_dir="model")
12
- #Same with https://huggingface.co/uf-aice-lab/BLIP-Math
13
- self.ebd_dim = ebd_dim= 443136
14
 
15
- #Classification Model
16
  fc_dim = 64 # You can choose a higher number for better performance, for example, 1024.
17
  self.head = nn.Sequential(
18
  nn.Linear(self.ebd_dim, fc_dim),
19
  nn.ReLU(),
20
  )
21
- self.score = nn.Linear(fc_dim, 5) #5 classes
22
 
23
  def forward(self, pixel_values, input_ids):
24
  outputs = self.model(input_ids=input_ids, pixel_values=pixel_values, labels=input_ids)
25
  image_text_embeds = self.model.vision_model(pixel_values, return_dict=True).last_hidden_state
26
- image_text_embeds = self.head(image_embeds.view(-1, self.ebd_dim))
27
 
28
- #A classification model is based on embeddings from a generative model to leverage BLIP's powerful image-text encoding capabilities.
29
- logits = self.score(image_embeds)
30
 
31
- #generated text, probabilities of classification
32
  return outputs, logits
33
-
34
  model = BLIPNet()
35
- model.load_state_dict(torch.load(best_model_wts_path) ,strict=False)
36
-
37
- You need to input the sample in the same way as:
38
- https://huggingface.co/uf-aice-lab/BLIP-Math
39
- Then you can get the text and score at the same time.
 
2
  license: mit
3
  ---
4
 
5
+ # BLIPNet Model
6
 
7
+ This is the structure of the BLIPNet model. You can load the model with this structure, or you can create a bigger model for your specific task.
8
+
9
+ ## Model Structure
10
+
11
+ ```python
12
+ import torch
13
+ import torch.nn as nn
14
+ from transformers import BlipForConditionalGeneration
15
+
16
+ class BLIPNet(torch.nn.Module):
17
+ def __init__(self):
18
  super().__init__()
19
+ # Generation Model
20
+ self.model = BlipForConditionalGeneration.from_pretrained("MODEL_NAME", cache_dir="model")
21
+ # Same with https://huggingface.co/uf-aice-lab/BLIP-Math
22
+ self.ebd_dim = 443136
23
 
24
+ # Classification Model
25
  fc_dim = 64 # You can choose a higher number for better performance, for example, 1024.
26
  self.head = nn.Sequential(
27
  nn.Linear(self.ebd_dim, fc_dim),
28
  nn.ReLU(),
29
  )
30
+ self.score = nn.Linear(fc_dim, 5) # 5 classes
31
 
32
  def forward(self, pixel_values, input_ids):
33
  outputs = self.model(input_ids=input_ids, pixel_values=pixel_values, labels=input_ids)
34
  image_text_embeds = self.model.vision_model(pixel_values, return_dict=True).last_hidden_state
35
+ image_text_embeds = self.head(image_text_embeds.view(-1, self.ebd_dim))
36
 
37
+ # A classification model is based on embeddings from a generative model to leverage BLIP's powerful image-text encoding capabilities.
38
+ logits = self.score(image_text_embeds)
39
 
40
+ # generated text, probabilities of classification
41
  return outputs, logits
42
+
43
  model = BLIPNet()
44
+ model.load_state_dict(torch.load("best_model_wts_path"), strict=False)
45
+ Usage
46
+ You need to input the sample in the same way as shown in the example provided at: BLIP-Math. Then you can get the generated text and classification score simultaneously.