#Libraries import streamlit as st from transformers import BlipForConditionalGeneration, AutoTokenizer import torch from PIL import Image import torchvision.transforms as transforms # Load the fine-tuned model and tokenizer model = BlipForConditionalGeneration.from_pretrained("PrabalPaul007/Prabal_AI_ML_stable") tokenizer = AutoTokenizer.from_pretrained("PrabalPaul007/Prabal_AI_ML_stable") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Function to generate caption for the uploaded image def generate_caption(image): # Preprocess the image image = Image.open(image).convert("RGB") image = image.resize((224, 224)) # Resize the image to match model input size # Convert the image to a tensor transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) image_tensor = transform(image).unsqueeze(0).to(device) # Generate caption output = model.generate(pixel_values=image_tensor) caption = tokenizer.decode(output[0], skip_special_tokens=True) return caption # Streamlit app st.title("Cartoon Caption Generator") uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", 'png']) if uploaded_image is not None: st.image(uploaded_image, caption='Uploaded Image.', use_column_width=True) st.write("") st.write("Generating caption...") # Generate caption for the uploaded image with the fixed prompt caption = generate_caption(uploaded_image) st.write("Caption:", caption)