kartikay24 commited on
Commit
76a3399
·
1 Parent(s): c78e02a

CPU version of the code

Browse files
Files changed (1) hide show
  1. app.py +7 -16
app.py CHANGED
@@ -1,13 +1,11 @@
1
- import torch
2
  import requests
3
  from PIL import Image
4
  from transformers import BlipProcessor, BlipForConditionalGeneration
5
  import gradio as gr
6
- device="cpu"
7
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
8
- model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base", torch_dtype=torch.float16).to(device)
9
 
10
- # Function to process the image and generate captions
11
  def generate_caption(image, caption_type, text):
12
  raw_image = Image.fromarray(image.astype('uint8'), 'RGB')
13
 
@@ -18,28 +16,21 @@ def generate_caption(image, caption_type, text):
18
 
19
  return caption
20
 
21
- # Conditional image captioning
22
  def conditional_image_captioning(raw_image, text):
23
- inputs = processor(raw_image, text, return_tensors="pt").to(device, torch.float16)
24
  out = model.generate(**inputs)
25
  caption = processor.decode(out[0], skip_special_tokens=True)
26
  return caption
27
 
28
- # Unconditional image captioning
29
  def unconditional_image_captioning(raw_image):
30
- inputs = processor(raw_image, return_tensors="pt").to(device, torch.float16)
31
  out = model.generate(**inputs)
32
  caption = processor.decode(out[0], skip_special_tokens=True)
33
  return caption
34
 
35
- # Interface setup
36
- input_image = gr.inputs.Image()
37
  input_text = gr.inputs.Textbox(label="Enter Text (for Conditional Captioning)")
38
-
39
- choices = ["Conditional", "Unconditional"]
40
- radio_button = gr.inputs.Radio(choices, label="Captioning Type")
41
-
42
  output_text = gr.outputs.Textbox(label="Caption")
43
 
44
- # Create the interface
45
- gr.Interface(fn=generate_caption, inputs=[input_image, radio_button, input_text], outputs=output_text, title="Image Captioning",debug=True).launch()
 
 
1
  import requests
2
  from PIL import Image
3
  from transformers import BlipProcessor, BlipForConditionalGeneration
4
  import gradio as gr
5
+
6
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
7
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
8
 
 
9
  def generate_caption(image, caption_type, text):
10
  raw_image = Image.fromarray(image.astype('uint8'), 'RGB')
11
 
 
16
 
17
  return caption
18
 
 
19
  def conditional_image_captioning(raw_image, text):
20
+ inputs = processor(raw_image, text, return_tensors="pt")
21
  out = model.generate(**inputs)
22
  caption = processor.decode(out[0], skip_special_tokens=True)
23
  return caption
24
 
 
25
  def unconditional_image_captioning(raw_image):
26
+ inputs = processor(raw_image, return_tensors="pt")
27
  out = model.generate(**inputs)
28
  caption = processor.decode(out[0], skip_special_tokens=True)
29
  return caption
30
 
31
+ input_image = gr.inputs.Image(label="Upload an Image")
 
32
  input_text = gr.inputs.Textbox(label="Enter Text (for Conditional Captioning)")
33
+ radio_button = gr.inputs.Radio(choices=["Conditional", "Unconditional"], label="Captioning Type")
 
 
 
34
  output_text = gr.outputs.Textbox(label="Caption")
35
 
36
+ gr.Interface(fn=generate_caption, inputs=[input_image, radio_button, input_text], outputs=output_text, title="Image Captioning").launch()