import streamlit as st import os import zipfile import tempfile import base64 from PIL import Image from transformers import VisionEncoderDecoderModel, ViTFeatureExtractor, AutoTokenizer import pandas as pd from nltk.corpus import wordnet import spacy import io from spacy.cli import download # Download the model if it is not already present download("en_core_web_sm") # Load the model nlp = spacy.load("en_core_web_sm") # Your existing code here # Download NLTK WordNet data import nltk nltk.download('wordnet') nltk.download('omw-1.4') # Load spaCy model nlp = spacy.load("en_core_web_sm") # Load the pre-trained model for image captioning model_name = "NourFakih/Vit-GPT2-COCO2017Flickr-85k-09" model = VisionEncoderDecoderModel.from_pretrained(model_name) feature_extractor = ViTFeatureExtractor.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) def generate_caption(image): pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values output_ids = model.generate(pixel_values) caption = tokenizer.decode(output_ids[0], skip_special_tokens=True) return caption def get_synonyms(word): synonyms = set() for syn in wordnet.synsets(word): for lemma in syn.lemmas(): synonyms.add(lemma.name()) return synonyms def preprocess_query(query): doc = nlp(query) tokens = set() for token in doc: tokens.add(token.text) tokens.add(token.lemma_) tokens.update(get_synonyms(token.text)) return tokens def search_captions(query, captions): query_tokens = preprocess_query(query) results = [] for path, caption in captions.items(): caption_tokens = preprocess_query(caption) if query_tokens & caption_tokens: results.append((path, caption)) return results st.title("Image Gallery with Captioning and Search") # Sidebar for search functionality with st.sidebar: query = st.text_input("Search images by caption:") # Right side for folder path input and displaying images option = st.selectbox("Select input method:", ["Folder Path", "Upload Images"]) if option == "Folder Path": folder_path = st.text_input("Enter the folder path containing images:") image_files = [] if folder_path and os.path.isdir(folder_path): image_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.lower().endswith(('png', 'jpg', 'jpeg'))] else: uploaded_files = st.file_uploader("Upload images or a zip file containing images:", type=['png', 'jpg', 'jpeg', 'zip'], accept_multiple_files=True) image_files = [] if uploaded_files: for uploaded_file in uploaded_files: if uploaded_file.name.endswith('.zip'): with zipfile.ZipFile(uploaded_file, 'r') as zip_ref: zip_ref.extractall("uploaded_images") for file in zip_ref.namelist(): if file.lower().endswith(('png', 'jpg', 'jpeg')): image_files.append(os.path.join("uploaded_images", file)) else: if uploaded_file.name.lower().endswith(('png', 'jpg', 'jpeg')): temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) temp_file.write(uploaded_file.read()) image_files.append(temp_file.name) captions = {} if st.button("Generate Captions", key='generate_captions'): for image_file in image_files: try: image = Image.open(image_file) caption = generate_caption(image) if option == "Folder Path": captions[os.path.join(folder_path, os.path.basename(image_file))] = caption else: if image_file.startswith("uploaded_images"): captions[image_file.replace("uploaded_images/", "")] = caption else: captions[os.path.basename(image_file)] = caption except Exception as e: st.error(f"Error processing {image_file}: {e}") # Display images in a grid st.subheader("Images and Captions:") cols = st.columns(4) idx = 0 for image_path, caption in captions.items(): col = cols[idx % 4] with col: try: with open(image_path, "rb") as img_file: img_bytes = img_file.read() encoded_image = base64.b64encode(img_bytes).decode() st.markdown( f"""
{caption}
{image_path}
{caption}
{image_path}