ImageSense / image_caption /get_caption.py
rishabh-zuma's picture
Upload folder using huggingface_hub
b9083a9
#!/usr/bin/env python
"""get_caption.py: """
__author__ = "Rishabh Gupta"
__copyright__ = "Copyright 2023, Zuma"
__created_date__ = "04-08-2023"
import os
import json
import requests
import base64
from io import BytesIO
import PIL
def get_image_caption(img_obj, prompt=None):
"""
Args:
img_obj: Base 64 image object
prompt: Prompt to be used for captioning
Returns:
Captions for the image
"""
# print("Image Obj", img_obj)
buffered = BytesIO()
if img_obj is None:
return "No image data provided!"
try:
img_obj.save(buffered, format="JPEG")
except PIL.UnidentifiedImageError as e:
print("Error in saving the image", e)
return "Invalid image format!"
except Exception as e:
print("Some error occurred while saving the image", e)
return "Some error occurred while loading the image!"
img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
url = os.environ.get("IMAGE_CAPTION_ENDPOINT")
auth_token = os.environ.get("IMAGE_CAPTION_AUTH_TOKEN")
# print("img_str", img_str)
payload = json.dumps({
"inputs": img_str,
"prompt": None # Prompts are disabled as accuracy is not very good or not experimented enough.
})
headers = {
'Authorization': auth_token,
'Content-Type': 'application/json'
}
response = requests.request("POST", url, headers=headers, data=payload)
print("Response Status Code = ", response.status_code)
print("Response = ", response.text)
if response.status_code == 200:
caption = json.loads(response.text)["captions"]
print(f"Caption = {caption}")
return caption
elif response.status_code == 502:
print("Status 502")
return "Model is getting loaded, please wait for 3-4 minutes and then try again."
else:
return "Error in generating the caption!"
if __name__ == '__main__':
with open("/Users/rishabh/Downloads/5dbadb934042f1.16205085439.jpg", "rb") as image_file:
encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
print(get_image_caption(encoded_string))