Spaces:

ahmedmbutt
/

Salesforce-blip-image-captioning-large

Sleeping

ahmedmbutt commited on Jun 27, 2024

Commit

76df62a

verified ·

1 Parent(s): 5e422b6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,27 +1,23 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-import requests
 from PIL import Image
-import io
-# Initialize the Hugging Face Inference Client
-model_id = "Salesforce/blip-image-captioning-large"
-client = InferenceClient(model=model_id)
 def caption_image(image):
-    # Convert the PIL image to bytes
-    buffered = io.BytesIO()
-    image.save(buffered, format="JPEG")
-    img_bytes = buffered.getvalue()
-    # Call the Hugging Face inference API
-    response = client.image_to_text(inputs=img_bytes)
-    # Check the response and format it properly
-    if isinstance(response, list) and response:
-        return response[0]["generated_text"]
-    else:
-        return "Error generating caption"
 # Set up the Gradio interface
 interface = gr.Interface(

 import gradio as gr
+from transformers import BlipProcessor, BlipForConditionalGeneration
 from PIL import Image
+import torch
+# Load the processor and model
+processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
+model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
 def caption_image(image):
+    # Prepare the image
+    inputs = processor(images=image, return_tensors="pt")
+    # Generate caption
+    out = model.generate(**inputs)
+    # Decode the generated caption
+    caption = processor.decode(out[0], skip_special_tokens=True)
+    return caption
 # Set up the Gradio interface
 interface = gr.Interface(