Spaces:

codemaker2015
/

ai-image-discovery-studio

Sleeping

App Files Files Community

ai-image-discovery-studio / app.py

codemaker2015

Rename gradio_app.py to app.py

129c740 verified 13 days ago

raw

history blame contribute delete

18.5 kB

	import gradio as gr
	import torch
	from transformers import CLIPProcessor, CLIPModel
	from datasets import load_dataset
	from PIL import Image
	import requests
	import matplotlib.pyplot as plt
	import os
	import glob
	from pathlib import Path
	import numpy as np
	import io
	import base64

	# Global variables for model and data
	model = None
	processor = None
	device = None
	demo_data = None
	demo_text_emb = None
	demo_image_emb = None

	# Custom folder data
	custom_images = []
	custom_descriptions = []
	custom_paths = []
	custom_image_emb = None
	current_data_source = "demo"

	def load_model_and_demo_data():
	"""Load CLIP model and demo dataset"""
	global model, processor, device, demo_data, demo_text_emb, demo_image_emb

	try:
	# Load dataset
	demo_data = load_dataset("jamescalam/image-text-demo", split="train")

	# Load model
	model_id = "openai/clip-vit-base-patch32"
	processor = CLIPProcessor.from_pretrained(model_id)
	model = CLIPModel.from_pretrained(model_id)

	# Move to device
	device = 'cuda' if torch.cuda.is_available() else 'cpu'
	model.to(device)

	# Pre-compute image embeddings
	text = demo_data['text']
	images = demo_data['image']

	inputs = processor(
	text=text,
	images=images,
	return_tensors="pt",
	padding=True,
	).to(device)

	outputs = model(**inputs)

	# Normalize embeddings
	demo_text_emb = outputs.text_embeds
	demo_text_emb = demo_text_emb / torch.norm(demo_text_emb, dim=1, keepdim=True)

	demo_image_emb = outputs.image_embeds
	demo_image_emb = demo_image_emb / torch.norm(demo_image_emb, dim=1, keepdim=True)

	return f"✅ Model loaded successfully on {device.upper()}. Demo dataset: {len(demo_data)} images."

	except Exception as e:
	return f"❌ Error loading model: {str(e)}"

	def load_custom_folder(folder_path):
	"""Load images from a custom folder"""
	global custom_images, custom_descriptions, custom_paths, custom_image_emb, current_data_source

	if not folder_path or not os.path.exists(folder_path):
	return "❌ Invalid folder path"

	try:
	supported_formats = ['.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff']
	image_paths = []

	# Get all image files from the folder
	for format_type in supported_formats:
	image_paths.extend(glob.glob(os.path.join(folder_path, format_type)))
	image_paths.extend(glob.glob(os.path.join(folder_path, format_type.upper())))

	# Also search in subdirectories
	for format_type in supported_formats:
	image_paths.extend(glob.glob(os.path.join(folder_path, '**', format_type), recursive=True))
	image_paths.extend(glob.glob(os.path.join(folder_path, '**', format_type.upper()), recursive=True))

	# Remove duplicates and sort
	image_paths = sorted(list(set(image_paths)))

	if not image_paths:
	return "❌ No valid images found in the specified folder"

	# Load images
	custom_images.clear()
	custom_descriptions.clear()
	custom_paths.clear()

	for img_path in image_paths[:100]: # Limit to 100 images for demo
	try:
	img = Image.open(img_path).convert('RGB')
	custom_images.append(img)
	filename = Path(img_path).stem
	custom_descriptions.append(f"Image: {filename}")
	custom_paths.append(img_path)
	except Exception as e:
	continue

	if not custom_images:
	return "❌ No valid images could be loaded"

	# Compute embeddings
	custom_image_emb = compute_custom_embeddings(custom_images, custom_descriptions)
	current_data_source = "custom"

	return f"✅ Loaded {len(custom_images)} images from custom folder"

	except Exception as e:
	return f"❌ Error loading custom folder: {str(e)}"

	def compute_custom_embeddings(images, descriptions):
	"""Compute embeddings for custom images"""
	try:
	batch_size = 8
	all_image_embeddings = []

	for i in range(0, len(images), batch_size):
	batch_images = images[i:i+batch_size]
	batch_texts = descriptions[i:i+batch_size]

	inputs = processor(
	text=batch_texts,
	images=batch_images,
	return_tensors="pt",
	padding=True,
	).to(device)

	with torch.no_grad():
	outputs = model(**inputs)
	image_emb = outputs.image_embeds
	image_emb = image_emb / torch.norm(image_emb, dim=1, keepdim=True)
	all_image_embeddings.append(image_emb.cpu())

	return torch.cat(all_image_embeddings, dim=0).to(device)

	except Exception as e:
	print(f"Error computing embeddings: {str(e)}")
	return None

	def search_images_by_text(query_text, top_k=5, data_source="demo"):
	"""Search images based on text query"""
	if not query_text.strip():
	return [], "Please enter a search query"

	try:
	# Choose data source
	if data_source == "custom" and custom_image_emb is not None:
	images = custom_images
	descriptions = custom_descriptions
	image_emb = custom_image_emb
	else:
	images = demo_data['image']
	descriptions = demo_data['text']
	image_emb = demo_image_emb

	# Process the text query
	inputs = processor(text=[query_text], return_tensors="pt", padding=True).to(device)

	with torch.no_grad():
	text_features = model.get_text_features(**inputs)
	text_features = text_features / text_features.norm(dim=-1, keepdim=True)

	# Calculate similarity scores
	similarity = torch.mm(text_features, image_emb.T)

	# Get top-k matches
	values, indices = similarity[0].topk(min(top_k, len(images)))

	results = []
	for idx, score in zip(indices, values):
	results.append((images[idx], f"Score: {score.item():.3f}\n{descriptions[idx]}"))

	status = f"Found {len(results)} matches for: '{query_text}'"
	return results, status

	except Exception as e:
	return [], f"Error during search: {str(e)}"

	def search_similar_images(query_image, top_k=5, data_source="demo"):
	"""Search similar images based on query image"""
	if query_image is None:
	return [], "Please provide a query image"

	try:
	# Choose data source
	if data_source == "custom" and custom_image_emb is not None:
	images = custom_images
	descriptions = custom_descriptions
	image_emb = custom_image_emb
	else:
	images = demo_data['image']
	descriptions = demo_data['text']
	image_emb = demo_image_emb

	# Process the query image
	inputs = processor(images=query_image, return_tensors="pt", padding=True).to(device)

	with torch.no_grad():
	image_features = model.get_image_features(**inputs)
	image_features = image_features / image_features.norm(dim=-1, keepdim=True)

	# Calculate similarity scores
	similarity = torch.mm(image_features, image_emb.T)

	# Get top-k matches
	values, indices = similarity[0].topk(min(top_k, len(images)))

	results = []
	for idx, score in zip(indices, values):
	results.append((images[idx], f"Score: {score.item():.3f}\n{descriptions[idx]}"))

	status = f"Found {len(results)} similar images"
	return results, status

	except Exception as e:
	return [], f"Error during search: {str(e)}"

	def classify_image(image, labels_text):
	"""Classify image with custom labels"""
	if image is None:
	return None, "Please provide an image"

	if not labels_text.strip():
	return None, "Please provide labels"

	try:
	labels = [label.strip() for label in labels_text.split('\n') if label.strip()]

	if not labels:
	return None, "Please provide valid labels"

	# Prepare text prompts
	text_prompts = [f"a photo of {label}" for label in labels]

	inputs = processor(
	text=text_prompts,
	images=image,
	return_tensors="pt",
	padding=True,
	).to(device)

	with torch.no_grad():
	outputs = model(**inputs)
	logits_per_image = outputs.logits_per_image
	probs = logits_per_image.softmax(dim=1)

	# Create bar chart
	probabilities = probs[0].cpu().numpy()

	fig, ax = plt.subplots(figsize=(10, 6))
	bars = ax.barh(labels, probabilities)
	ax.set_xlabel('Probability')
	ax.set_title('Zero-Shot Classification Results')

	# Color bars based on probability
	for i, bar in enumerate(bars):
	bar.set_color(plt.cm.viridis(probabilities[i]))

	plt.tight_layout()

	# Create detailed results text
	results_text = "Classification Results:\n\n"
	sorted_results = sorted(zip(labels, probabilities), key=lambda x: x[1], reverse=True)

	for label, prob in sorted_results:
	results_text += f"{label}: {prob:.3f} ({prob*100:.1f}%)\n"

	return fig, results_text

	except Exception as e:
	return None, f"Error during classification: {str(e)}"

	def get_random_demo_images():
	"""Get random images from current dataset"""
	try:
	if current_data_source == "custom" and custom_images:
	images = custom_images
	descriptions = custom_descriptions
	else:
	images = demo_data['image']
	descriptions = demo_data['text']

	if len(images) == 0:
	return []

	# Get random indices
	indices = np.random.choice(len(images), min(6, len(images)), replace=False)

	results = []
	for idx in indices:
	results.append((images[idx], f"Image {idx}: {descriptions[idx][:100]}..."))

	return results

	except Exception as e:
	return []

	def switch_data_source(choice):
	"""Switch between demo and custom data source"""
	global current_data_source
	current_data_source = "demo" if choice == "Demo Dataset" else "custom"

	if current_data_source == "custom" and not custom_images:
	return "⚠️ Custom folder not loaded. Please load a custom folder first."
	elif current_data_source == "custom":
	return f"✅ Switched to custom folder ({len(custom_images)} images)"
	else:
	return f"✅ Switched to demo dataset ({len(demo_data)} images)"

	# Initialize the model when the module loads
	initialization_status = load_model_and_demo_data()

	# Create Gradio interface
	with gr.Blocks(title="AI Image Discovery Studio", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 🖼️ AI Image Discovery Studio

	Search images using natural language or find visually similar content with CLIP embeddings!
	""")

	# Status display
	with gr.Row():
	status_display = gr.Textbox(
	value=initialization_status,
	label="System Status",
	interactive=False
	)

	# Data source selection and custom folder loading
	with gr.Row():
	with gr.Column(scale=1):
	data_source_radio = gr.Radio(
	["Demo Dataset", "Custom Folder"],
	value="Demo Dataset",
	label="Data Source"
	)

	folder_path_input = gr.Textbox(
	label="Custom Folder Path",
	placeholder="e.g., /path/to/your/images",
	visible=False
	)

	load_folder_btn = gr.Button("Load Custom Folder", visible=False)
	folder_status = gr.Textbox(label="Folder Status", visible=False, interactive=False)

	with gr.Column(scale=2):
	source_status = gr.Textbox(
	value=f"✅ Using demo dataset ({len(demo_data)} images)",
	label="Current Data Source",
	interactive=False
	)

	# Show/hide custom folder controls based on selection
	def toggle_folder_controls(choice):
	visible = choice == "Custom Folder"
	return (
	gr.update(visible=visible), # folder_path_input
	gr.update(visible=visible), # load_folder_btn
	gr.update(visible=visible) # folder_status
	)

	data_source_radio.change(
	toggle_folder_controls,
	inputs=[data_source_radio],
	outputs=[folder_path_input, load_folder_btn, folder_status]
	)

	# Update data source status
	data_source_radio.change(
	switch_data_source,
	inputs=[data_source_radio],
	outputs=[source_status]
	)

	# Load custom folder
	load_folder_btn.click(
	load_custom_folder,
	inputs=[folder_path_input],
	outputs=[folder_status]
	)

	# Main tabs
	with gr.Tabs():
	# Text to Image Search Tab
	with gr.TabItem("🔤 Text to Image Search"):
	gr.Markdown("Enter a text description to find matching images")

	with gr.Row():
	with gr.Column():
	text_query = gr.Textbox(
	label="Search Query",
	placeholder="e.g., 'Dog running on grass', 'Beautiful sunset over mountains'"
	)
	text_top_k = gr.Slider(1, 10, value=5, step=1, label="Number of Results")
	text_search_btn = gr.Button("🔍 Search Images", variant="primary")

	with gr.Column():
	text_search_status = gr.Textbox(label="Search Status", interactive=False)

	text_results = gr.Gallery(
	label="Search Results",
	show_label=True,
	elem_id="text_search_gallery",
	columns=5,
	rows=1,
	height="auto"
	)

	# Connect text search
	text_search_btn.click(
	lambda query, top_k, source: search_images_by_text(
	query, top_k, "custom" if source == "Custom Folder" else "demo"
	),
	inputs=[text_query, text_top_k, data_source_radio],
	outputs=[text_results, text_search_status]
	)

	# Image to Image Search Tab
	with gr.TabItem("🖼️ Image to Image Search"):
	gr.Markdown("Upload an image to find visually similar ones")

	with gr.Row():
	with gr.Column():
	query_image = gr.Image(label="Query Image", type="pil")
	image_top_k = gr.Slider(1, 10, value=5, step=1, label="Number of Results")
	image_search_btn = gr.Button("🔍 Find Similar Images", variant="primary")

	with gr.Column():
	image_search_status = gr.Textbox(label="Search Status", interactive=False)

	image_results = gr.Gallery(
	label="Similar Images",
	show_label=True,
	elem_id="image_search_gallery",
	columns=5,
	rows=1,
	height="auto"
	)

	# Connect image search
	image_search_btn.click(
	lambda img, top_k, source: search_similar_images(
	img, top_k, "custom" if source == "Custom Folder" else "demo"
	),
	inputs=[query_image, image_top_k, data_source_radio],
	outputs=[image_results, image_search_status]
	)

	# Zero-Shot Classification Tab
	with gr.TabItem("🏷️ Zero-Shot Classification"):
	gr.Markdown("Classify an image with custom labels using CLIP")

	with gr.Row():
	with gr.Column():
	classify_image_input = gr.Image(label="Image to Classify", type="pil")
	labels_input = gr.Textbox(
	label="Classification Labels (one per line)",
	value="cat\ndog\ncar\nbird\nflower",
	lines=5
	)
	classify_btn = gr.Button("🔍 Classify Image", variant="primary")

	with gr.Column():
	classification_results = gr.Textbox(
	label="Detailed Results",
	lines=10,
	interactive=False
	)

	classification_plot = gr.Plot(label="Classification Results")

	# Connect classification
	classify_btn.click(
	classify_image,
	inputs=[classify_image_input, labels_input],
	outputs=[classification_plot, classification_results]
	)

	# Dataset Explorer Tab
	with gr.TabItem("📊 Dataset Explorer"):
	gr.Markdown("Browse through the dataset images")

	with gr.Row():
	random_sample_btn = gr.Button("🎲 Show Random Sample", variant="primary")

	explorer_gallery = gr.Gallery(
	label="Dataset Sample",
	show_label=True,
	elem_id="explorer_gallery",
	columns=3,
	rows=2,
	height="auto"
	)

	# Connect random sampling
	random_sample_btn.click(
	get_random_demo_images,
	outputs=[explorer_gallery]
	)

	# Launch the app
	if __name__ == "__main__":
	demo.launch()