import streamlit as st from PIL import Image from transformers import BlipProcessor, BlipForConditionalGeneration # Title of the Streamlit app st.title("Image and Text Combined in One Message") # Load the pre-trained BLIP model processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") # Image upload uploaded_file = st.file_uploader("Upload a product image (JPG, JPEG, PNG):", type=["jpg", "jpeg", "png"]) if uploaded_file: # Open and display the uploaded image image = Image.open(uploaded_file) st.image(image, caption="Uploaded Image", use_column_width=True) # Generate the description using BLIP model st.write("Processing the image...") # Process the image and generate a detailed description inputs = processor(images=image, return_tensors="pt") out = model.generate(**inputs) # Decode and display the description generated_description = processor.decode(out[0], skip_special_tokens=True) # Combine Image and Text in One Message st.markdown(f"**Generated Product Description:** {generated_description}") st.markdown(f"**Here is your product image:**") st.image(image, caption="Generated Product Image", use_column_width=True)