Spaces:
Sleeping
Sleeping
import streamlit as st | |
import requests | |
from PIL import Image | |
from transformers import AutoProcessor, Blip2ForConditionalGeneration | |
import torch | |
import io | |
def load_model(): | |
model = Blip2ForConditionalGeneration.from_pretrained("ybelkada/blip2-opt-2.7b-fp16-sharded") | |
model.load_adapter('blip-cpu-model') | |
processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b") | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model.to(device) | |
return model, processor | |
model, processor = load_model() | |
st.title("Image Captioning with Fine-Tuned BLiPv2 Model") | |
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) | |
if uploaded_file is not None: | |
image = Image.open(uploaded_file) | |
st.image(image, caption="Uploaded Image", use_column_width=True) | |
# files = {"file": uploaded_file.getvalue()} | |
print("Sending API request") | |
# response = requests.post("http://0.0.0.0:8502/generate-caption/", files=files) | |
# caption = response.json().get("caption") | |
inputs = processor(images=image, return_tensors="pt").to(device, torch.float16) | |
with torch.no_grad(): | |
caption_ids = model.generate(**inputs, max_length=128) | |
caption = processor.decode(caption_ids[0], skip_special_tokens=True) | |
st.write("Generated Caption:") | |
st.write(f"**{caption}**") |