|
from transformers import Blip2ForConditionalGeneration |
|
from transformers import Blip2Processor |
|
from peft import PeftModel |
|
import streamlit as st |
|
from PIL import Image |
|
|
|
import os |
|
|
|
preprocess_ckp = "Salesforce/blip2-opt-2.7b" |
|
base_model_ckp = "./model/blip2-opt-2.7b-fp16-sharded" |
|
peft_model_ckp = "./model/blip2_peft" |
|
sample_img_path = "./sample_images/" |
|
|
|
|
|
|
|
def init_model(): |
|
|
|
|
|
|
|
|
|
processor = Blip2Processor.from_pretrained(preprocess_ckp) |
|
|
|
|
|
|
|
|
|
|
|
|
|
model = Blip2ForConditionalGeneration.from_pretrained(base_model_ckp) |
|
|
|
model = PeftModel.from_pretrained(model, peft_model_ckp) |
|
|
|
|
|
|
|
return processor, model |
|
|
|
|
|
def main(): |
|
|
|
st.title("Fashion Image Caption using BLIP2") |
|
|
|
|
|
|
|
|
|
option = st.selectbox('Select from sample an images', ('None','cap', 'tee', 'dress'), index = 0) |
|
st.text("OR") |
|
file_name = st.file_uploader("Upload an image") |
|
st.text(option) |
|
|
|
""" |
|
if file_name is None and option is not None: |
|
|
|
file_name = os.path.join(sample_img_path, option) |
|
|
|
if file_name is not None: |
|
|
|
image_col, caption_text = st.columns(2) |
|
|
|
image_col.header("Image") |
|
image = Image.open(file_name) |
|
image_col.image(image, use_column_width = True) |
|
|
|
#Preprocess the image |
|
#Inferance on GPU. When used this on GPU will get errors like: "slow_conv2d_cpu" not implemented for 'Half'" , " Input type (float) and bias type (struct c10::Half)" |
|
#inputs = processor(images = image, return_tensors = "pt").to('cuda', torch.float16) |
|
|
|
#Inferance on CPU |
|
inputs = processor(images = image, return_tensors = "pt") |
|
|
|
pixel_values = inputs.pixel_values |
|
|
|
#Predict the caption for the imahe |
|
generated_ids = model.generate(pixel_values = pixel_values, max_length = 25) |
|
generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] |
|
|
|
#Output the predict text |
|
caption_text.header("Generated Caption") |
|
caption_text.text(generated_caption) |
|
""" |
|
|
|
if __name__ == "__main__": |
|
main() |