chats-bug commited on
Commit
d3bbf05
·
1 Parent(s): 99813d9

Activated 4 models

Browse files
Files changed (1) hide show
  1. app.py +25 -49
app.py CHANGED
@@ -1,54 +1,34 @@
1
  import gradio as gr
2
- from transformers import AutoProcessor, AutoTokenizer, AutoImageProcessor, AutoModelForCausalLM, BlipForConditionalGeneration, Blip2ForConditionalGeneration, VisionEncoderDecoderModel, BitsAndBytesConfig, BlipProcessor
3
  import torch
4
  import open_clip
5
 
6
  from huggingface_hub import hf_hub_download
7
 
8
- quantization_config = BitsAndBytesConfig(llm_int8_enable_fp32_cpu_offload=True)
9
- # Use when running on a CPU
10
- device_map = {
11
- "transformer.word_embeddings": 0,
12
- "transformer.word_embeddings_layernorm": 0,
13
- "lm_head": "cpu",
14
- "transformer.h": 0,
15
- "transformer.ln_f": 0,
16
- }
17
-
18
- # Load the Blip2 model
19
- # preprocessor_blip2_8_bit = BlipProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
20
- # model_blip2_8_bit = Blip2ForConditionalGeneration.from_pretrained(
21
- # "Salesforce/blip2-opt-2.7b",
22
- # device_map="auto",
23
- # quantization_config=quantization_config,
24
- # load_in_8bit=True
25
- # )
26
-
27
  # Load the Blip base model
28
  preprocessor_blip_base = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
29
  model_blip_base = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
30
 
31
- # # Load the Blip large model
32
- # preprocessor_blip_large = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
33
- # model_blip_large = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
34
 
35
- # # Load the GIT coco model
36
- # preprocessor_git_large_coco = AutoProcessor.from_pretrained("microsoft/git-large-coco")
37
- # model_git_large_coco = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
38
 
39
- # # Load the CLIP model
40
- # model_oc_coca, _, transform_oc_coca = open_clip.create_model_and_transforms(
41
- # model_name="coca_ViT-L-14",
42
- # pretrained="mscoco_finetuned_laion2B-s13B-b90k"
43
- # )
44
 
45
  device = "cuda" if torch.cuda.is_available() else "cpu"
46
  # Transfer the models to the device
47
- # model_blip2_8_bit.to(device)
48
  model_blip_base.to(device)
49
- # model_blip_large.to(device)
50
- # model_git_large_coco.to(device)
51
- # model_oc_coca.to(device)
52
 
53
 
54
  def generate_caption(
@@ -149,22 +129,19 @@ def generate_captions(
149
  str
150
  The generated caption.
151
  """
152
- # Generate captions for the image using the Blip2 model
153
- # caption_blip2_8_bit = generate_caption(preprocessor_blip2_8_bit, model_blip2_8_bit, image, use_float_16=True).strip()
154
-
155
  # Generate captions for the image using the Blip base model
156
  caption_blip_base = generate_caption(preprocessor_blip_base, model_blip_base, image).strip()
157
 
158
- # # Generate captions for the image using the Blip large model
159
- # caption_blip_large = generate_caption(preprocessor_blip_large, model_blip_large, image).strip()
160
 
161
- # # Generate captions for the image using the GIT coco model
162
- # caption_git_large_coco = generate_caption(preprocessor_git_large_coco, model_git_large_coco, image).strip()
163
 
164
- # # Generate captions for the image using the CLIP model
165
- # caption_oc_coca = generate_captions_clip(model_oc_coca, transform_oc_coca, image).strip()
166
 
167
- return caption_blip_base
168
 
169
 
170
  # Create the interface
@@ -178,11 +155,10 @@ iface = gr.Interface(
178
  ],
179
  # Define the outputs
180
  outputs=[
181
- # gr.outputs.Textbox(label="Blip2 8-bit"),
182
  gr.outputs.Textbox(label="Blip base"),
183
- # gr.outputs.Textbox(label="Blip large"),
184
- # gr.outputs.Textbox(label="GIT large coco"),
185
- # gr.outputs.Textbox(label="CLIP"),
186
  ],
187
  title="Image Captioning",
188
  description="Generate captions for images using the Blip2 model, the Blip base model, the Blip large model, the GIT large coco model, and the CLIP model.",
 
1
  import gradio as gr
2
+ from transformers import AutoProcessor, AutoTokenizer, AutoImageProcessor, AutoModelForCausalLM, BlipForConditionalGeneration, VisionEncoderDecoderModel, BitsAndBytesConfig
3
  import torch
4
  import open_clip
5
 
6
  from huggingface_hub import hf_hub_download
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  # Load the Blip base model
9
  preprocessor_blip_base = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
10
  model_blip_base = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
11
 
12
+ # Load the Blip large model
13
+ preprocessor_blip_large = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
14
+ model_blip_large = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
15
 
16
+ # Load the GIT coco model
17
+ preprocessor_git_large_coco = AutoProcessor.from_pretrained("microsoft/git-large-coco")
18
+ model_git_large_coco = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
19
 
20
+ # Load the CLIP model
21
+ model_oc_coca, _, transform_oc_coca = open_clip.create_model_and_transforms(
22
+ model_name="coca_ViT-L-14",
23
+ pretrained="mscoco_finetuned_laion2B-s13B-b90k"
24
+ )
25
 
26
  device = "cuda" if torch.cuda.is_available() else "cpu"
27
  # Transfer the models to the device
 
28
  model_blip_base.to(device)
29
+ model_blip_large.to(device)
30
+ model_git_large_coco.to(device)
31
+ model_oc_coca.to(device)
32
 
33
 
34
  def generate_caption(
 
129
  str
130
  The generated caption.
131
  """
 
 
 
132
  # Generate captions for the image using the Blip base model
133
  caption_blip_base = generate_caption(preprocessor_blip_base, model_blip_base, image).strip()
134
 
135
+ # Generate captions for the image using the Blip large model
136
+ caption_blip_large = generate_caption(preprocessor_blip_large, model_blip_large, image).strip()
137
 
138
+ # Generate captions for the image using the GIT coco model
139
+ caption_git_large_coco = generate_caption(preprocessor_git_large_coco, model_git_large_coco, image).strip()
140
 
141
+ # Generate captions for the image using the CLIP model
142
+ caption_oc_coca = generate_captions_clip(model_oc_coca, transform_oc_coca, image).strip()
143
 
144
+ return caption_blip_base, caption_blip_large, caption_git_large_coco, caption_oc_coca
145
 
146
 
147
  # Create the interface
 
155
  ],
156
  # Define the outputs
157
  outputs=[
 
158
  gr.outputs.Textbox(label="Blip base"),
159
+ gr.outputs.Textbox(label="Blip large"),
160
+ gr.outputs.Textbox(label="GIT large coco"),
161
+ gr.outputs.Textbox(label="CLIP"),
162
  ],
163
  title="Image Captioning",
164
  description="Generate captions for images using the Blip2 model, the Blip base model, the Blip large model, the GIT large coco model, and the CLIP model.",