File size: 1,390 Bytes
f0240fe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
import requests
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
# Initialize processor and model
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to("cuda")
# Function to process and caption an image from a URL
def caption_image(image_url):
try:
# Load image from the provided URL
raw_image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
# Conditional image captioning
text = "a photography of"
inputs = processor(raw_image, text, return_tensors="pt").to("cuda")
out = model.generate(**inputs)
conditional_caption = processor.decode(out[0], skip_special_tokens=True)
# Unconditional image captioning
inputs = processor(raw_image, return_tensors="pt").to("cuda")
out = model.generate(**inputs)
unconditional_caption = processor.decode(out[0], skip_special_tokens=True)
# Print the results
print("Conditional Caption:", conditional_caption)
print("Unconditional Caption:", unconditional_caption)
except Exception as e:
print(f"Error occurred: {e}")
# Get image URL from user input
image_url = input("Enter the image URL: ")
caption_image(image_url)
|