Spaces:

Futuretop
/

CaricatureGenerator-4.0

Runtime error

App Files Files Community

CaricatureGenerator-4.0 / app.py

Futuretop

Update app.py

e2b45fa verified about 2 months ago

raw

history blame

6.1 kB

	from PIL import Image
	from transformers import BlipProcessor, BlipForConditionalGeneration
	import numpy as np
	import cv2
	from deepface import DeepFace
	import gradio as gr

	# Load BLIP model
	processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
	model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

	# Clothing extractor
	def extract_clothing(text):
	colors = ['red', 'blue', 'green', 'black', 'white', 'yellow', 'brown', 'gray', 'pink', 'orange']
	patterns = ['striped', 'checkered', 'plaid', 'polka-dot', 'solid', 'patterned', 'floral']
	items = ['jacket', 'coat', 'dress', 'shirt', 't-shirt', 'jeans', 'pants', 'shorts',
	'suit', 'sneakers', 'hat', 'scarf', 'uniform']

	found_colors = [c for c in colors if c in text.lower()]
	found_patterns = [p for p in patterns if p in text.lower()]
	found_items = [i for i in items if i in text.lower()]

	return found_colors, found_patterns, found_items

	# Main function
	def analyze_image(image_pil):
	image_pil = image_pil.convert("RGB")
	image_np = np.array(image_pil)

	# Caption generation
	inputs = processor(image_pil, return_tensors="pt")
	out = model.generate(**inputs)
	caption = processor.decode(out[0], skip_special_tokens=True)

	# Convert to BGR for DeepFace
	image_bgr = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)

	# Face detection using DeepFace with RetinaFace backend
	try:
	faces = DeepFace.extract_faces(img_path=image_bgr, detector_backend="retinaface", enforce_detection=False)
	print(f"DeepFace detected {len(faces)} face(s)")
	except Exception as e:
	print("DeepFace error:", e)
	faces = []

	face_infos = []
	for face_data in faces:
	face_crop = face_data["face"]
	try:
	analysis = DeepFace.analyze(face_crop, actions=['age', 'gender', 'emotion'], enforce_detection=False)
	age = analysis[0]['age']
	gender = analysis[0]['gender']
	emotion = analysis[0]['dominant_emotion']

	if age < 13:
	age_group = "child"
	elif age < 20:
	age_group = "teen"
	elif age < 60:
	age_group = "adult"
	else:
	age_group = "senior"

	face_infos.append({
	"age": age,
	"gender": gender,
	"age_group": age_group,
	"emotion": emotion
	})
	except Exception:
	continue

	# Summary stats
	num_faces = len(face_infos)
	gender_counts = {"Man": 0, "Woman": 0}
	age_summary = {}
	emotion_summary = {}

	for face in face_infos:
	gender = face['gender']
	age_group = face['age_group']
	emotion = face['emotion']

	gender_counts[gender] += 1
	age_summary[age_group] = age_summary.get(age_group, 0) + 1
	emotion_summary[emotion] = emotion_summary.get(emotion, 0) + 1

	# Clothing info from caption
	colors, patterns, items = extract_clothing(caption)

	# Generate 15 sentences
	sentences = []
	sentences.append(f"According to the BLIP model, the scene can be described as: \"{caption}\".")
	sentences.append(f"The image contains {num_faces} visible face(s) detected using DeepFace (RetinaFace backend).")

	gender_desc = []
	if gender_counts["Man"] > 0:
	gender_desc.append(f"{gender_counts['Man']} male(s)")
	if gender_counts["Woman"] > 0:
	gender_desc.append(f"{gender_counts['Woman']} female(s)")
	if gender_desc:
	sentences.append("Gender distribution shows " + " and ".join(gender_desc) + ".")
	else:
	sentences.append("Gender analysis was inconclusive.")

	if age_summary:
	age_list = [f"{count} {group}(s)" for group, count in age_summary.items()]
	sentences.append("Age groups represented include " + ", ".join(age_list) + ".")
	else:
	sentences.append("No conclusive age groupings found.")

	if emotion_summary:
	emo_list = [f"{count} showing {emo}" for emo, count in emotion_summary.items()]
	sentences.append("Facial expressions include " + ", ".join(emo_list) + ".")
	else:
	sentences.append("Emotion detection yielded limited results.")

	if colors or patterns or items:
	cloth_parts = []
	if colors:
	cloth_parts.append(f"colors like {', '.join(colors)}")
	if patterns:
	cloth_parts.append(f"patterns such as {', '.join(patterns)}")
	if items:
	cloth_parts.append(f"items like {', '.join(items)}")
	sentences.append("The clothing observed includes " + " and ".join(cloth_parts) + ".")
	else:
	sentences.append("Clothing details were not clearly identified.")

	if num_faces > 0:
	sentences.append("Faces are distributed naturally across the image.")
	sentences.append("Differences in face size suggest variation in distance from the camera.")
	sentences.append("Hairstyles appear diverse, from short to tied-back styles.")
	sentences.append("Lighting emphasizes certain facial features and expressions.")
	sentences.append("Some individuals face the camera while others look away.")
	sentences.append("Mood diversity is reflected in the variety of facial expressions.")
	sentences.append("The clothing style appears casual or semi-formal.")
	else:
	sentences.append("No visible faces were found to analyze further visual characteristics.")

	sentences.append("Overall, the image integrates facial, emotional, and clothing features into a cohesive scene.")

	return "\n".join([f"{i+1}. {s}" for i, s in enumerate(sentences)])

	# Gradio Interface
	demo = gr.Interface(
	fn=analyze_image,
	inputs=gr.Image(type="pil"),
	outputs=gr.Textbox(label="📝 15-Sentence Detailed Description"),
	title="🖼️ Image Analysis with BLIP + DeepFace",
	description ="Upload an image to get a detailed 15-sentence description of facial features, age, gender, clothing, and more."
	)

	demo.launch()