chats-bug commited on
Commit
fdf0785
·
1 Parent(s): 307952a

Only Blip2 model active for testing on cpu

Browse files
Files changed (1) hide show
  1. app.py +36 -31
app.py CHANGED
@@ -1,13 +1,18 @@
1
  import gradio as gr
2
- from transformers import AutoProcessor, AutoTokenizer, AutoImageProcessor, AutoModelForCausalLM, BlipForConditionalGeneration, Blip2ForConditionalGeneration, VisionEncoderDecoderModel
3
  import torch
4
  import open_clip
5
 
6
  from huggingface_hub import hf_hub_download
7
 
 
8
  # Use when running on a CPU
9
  device_map = {
10
- "embedding": "cpu",
 
 
 
 
11
  }
12
 
13
  # Load the Blip2 model
@@ -15,30 +20,30 @@ preprocessor_blip2_8_bit = AutoProcessor.from_pretrained("Salesforce/blip2-opt-6
15
  model_blip2_8_bit = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-6.7b", device_map=device_map, load_in_8bit_fp32_cpu_offload=True)
16
 
17
  # Load the Blip base model
18
- preprocessor_blip_base = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
19
- model_blip_base = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
20
 
21
- # Load the Blip large model
22
- preprocessor_blip_large = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
23
- model_blip_large = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
24
 
25
- # Load the GIT coco model
26
- preprocessor_git_large_coco = AutoProcessor.from_pretrained("microsoft/git-large-coco")
27
- model_git_large_coco = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
28
 
29
- # Load the CLIP model
30
- model_oc_coca, _, transform_oc_coca = open_clip.create_model_and_transforms(
31
- model_name="coca_ViT-L-14",
32
- pretrained="mscoco_finetuned_laion2B-s13B-b90k"
33
- )
34
 
35
  device = "cuda" if torch.cuda.is_available() else "cpu"
36
  # Transfer the models to the device
37
  model_blip2_8_bit.to(device)
38
- model_blip_base.to(device)
39
- model_blip_large.to(device)
40
- model_git_large_coco.to(device)
41
- model_oc_coca.to(device)
42
 
43
 
44
  def generate_caption(
@@ -139,18 +144,18 @@ def generate_captions(
139
  caption_blip2_8_bit = generate_caption(preprocessor_blip2_8_bit, model_blip2_8_bit, image, use_float_16=True).strip()
140
 
141
  # Generate captions for the image using the Blip base model
142
- caption_blip_base = generate_caption(preprocessor_blip_base, model_blip_base, image).strip()
143
 
144
- # Generate captions for the image using the Blip large model
145
- caption_blip_large = generate_caption(preprocessor_blip_large, model_blip_large, image).strip()
146
 
147
- # Generate captions for the image using the GIT coco model
148
- caption_git_large_coco = generate_caption(preprocessor_git_large_coco, model_git_large_coco, image).strip()
149
 
150
- # Generate captions for the image using the CLIP model
151
- caption_oc_coca = generate_captions_clip(model_oc_coca, transform_oc_coca, image).strip()
152
 
153
- return caption_blip2_8_bit, caption_blip_base, caption_blip_large, caption_git_large_coco, caption_oc_coca
154
 
155
 
156
  # Create the interface
@@ -165,10 +170,10 @@ iface = gr.Interface(
165
  # Define the outputs
166
  outputs=[
167
  gr.outputs.Textbox(label="Blip2 8-bit"),
168
- gr.outputs.Textbox(label="Blip base"),
169
- gr.outputs.Textbox(label="Blip large"),
170
- gr.outputs.Textbox(label="GIT large coco"),
171
- gr.outputs.Textbox(label="CLIP"),
172
  ],
173
  title="Image Captioning",
174
  description="Generate captions for images using the Blip2 model, the Blip base model, the Blip large model, the GIT large coco model, and the CLIP model.",
 
1
  import gradio as gr
2
+ from transformers import AutoProcessor, AutoTokenizer, AutoImageProcessor, AutoModelForCausalLM, BlipForConditionalGeneration, Blip2ForConditionalGeneration, VisionEncoderDecoderModel, BitsAndBytesConfig
3
  import torch
4
  import open_clip
5
 
6
  from huggingface_hub import hf_hub_download
7
 
8
+ quantizer_config = BitsAndBytesConfig(llm_int8_enable_fp32_cpu_offload=True)
9
  # Use when running on a CPU
10
  device_map = {
11
+ "transformer.word_embeddings": 0,
12
+ "transformer.word_embeddings_layernorm": 0,
13
+ "lm_head": "cpu",
14
+ "transformer.h": 0,
15
+ "transformer.ln_f": 0,
16
  }
17
 
18
  # Load the Blip2 model
 
20
  model_blip2_8_bit = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-6.7b", device_map=device_map, load_in_8bit_fp32_cpu_offload=True)
21
 
22
  # Load the Blip base model
23
+ # preprocessor_blip_base = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
24
+ # model_blip_base = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
25
 
26
+ # # Load the Blip large model
27
+ # preprocessor_blip_large = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
28
+ # model_blip_large = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
29
 
30
+ # # Load the GIT coco model
31
+ # preprocessor_git_large_coco = AutoProcessor.from_pretrained("microsoft/git-large-coco")
32
+ # model_git_large_coco = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
33
 
34
+ # # Load the CLIP model
35
+ # model_oc_coca, _, transform_oc_coca = open_clip.create_model_and_transforms(
36
+ # model_name="coca_ViT-L-14",
37
+ # pretrained="mscoco_finetuned_laion2B-s13B-b90k"
38
+ # )
39
 
40
  device = "cuda" if torch.cuda.is_available() else "cpu"
41
  # Transfer the models to the device
42
  model_blip2_8_bit.to(device)
43
+ # model_blip_base.to(device)
44
+ # model_blip_large.to(device)
45
+ # model_git_large_coco.to(device)
46
+ # model_oc_coca.to(device)
47
 
48
 
49
  def generate_caption(
 
144
  caption_blip2_8_bit = generate_caption(preprocessor_blip2_8_bit, model_blip2_8_bit, image, use_float_16=True).strip()
145
 
146
  # Generate captions for the image using the Blip base model
147
+ # caption_blip_base = generate_caption(preprocessor_blip_base, model_blip_base, image).strip()
148
 
149
+ # # Generate captions for the image using the Blip large model
150
+ # caption_blip_large = generate_caption(preprocessor_blip_large, model_blip_large, image).strip()
151
 
152
+ # # Generate captions for the image using the GIT coco model
153
+ # caption_git_large_coco = generate_caption(preprocessor_git_large_coco, model_git_large_coco, image).strip()
154
 
155
+ # # Generate captions for the image using the CLIP model
156
+ # caption_oc_coca = generate_captions_clip(model_oc_coca, transform_oc_coca, image).strip()
157
 
158
+ return caption_blip2_8_bit
159
 
160
 
161
  # Create the interface
 
170
  # Define the outputs
171
  outputs=[
172
  gr.outputs.Textbox(label="Blip2 8-bit"),
173
+ # gr.outputs.Textbox(label="Blip base"),
174
+ # gr.outputs.Textbox(label="Blip large"),
175
+ # gr.outputs.Textbox(label="GIT large coco"),
176
+ # gr.outputs.Textbox(label="CLIP"),
177
  ],
178
  title="Image Captioning",
179
  description="Generate captions for images using the Blip2 model, the Blip base model, the Blip large model, the GIT large coco model, and the CLIP model.",