Spaces:

Futuretop
/

CaricatureGenerator-4.0

Runtime error

App Files Files Community

CaricatureGenerator-4.0 / app.py

Futuretop

Update app.py

6affcd7 verified 14 days ago

raw

history blame

4.92 kB

	from PIL import Image
	from transformers import BlipProcessor, BlipForConditionalGeneration
	import torch
	import cv2
	import numpy as np
	from deepface import DeepFace
	import re

	# Load BLIP model
	processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
	model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

	# Load image
	image_path = "your_image.jpg" # Replace with your image path
	image_pil = Image.open(image_path).convert('RGB')
	image_np = np.array(image_pil)

	# BLIP caption
	inputs = processor(image_pil, return_tensors="pt")
	out = model.generate(**inputs)
	caption = processor.decode(out[0], skip_special_tokens=True)

	# OpenCV for face detection
	face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
	gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
	faces = face_cascade.detectMultiScale(gray, 1.1, 4)

	# Analyze each face with DeepFace
	face_infos = []
	for (x, y, w, h) in faces:
	face_crop = image_np[y:y+h, x:x+w]
	try:
	analysis = DeepFace.analyze(face_crop, actions=['age', 'gender'], enforce_detection=False)
	age = analysis[0]['age']
	gender = analysis[0]['gender']
	# Map age to range
	if age < 13:
	age_group = "child"
	elif age < 20:
	age_group = "teen"
	elif age < 60:
	age_group = "adult"
	else:
	age_group = "senior"
	face_infos.append({
	"age_group": age_group,
	"gender": gender,
	})
	except Exception as e:
	continue

	# 얼굴 수, 연령대 요약
	num_faces = len(face_infos)
	age_summary = {}
	for face in face_infos:
	key = f"{face['gender']} {face['age_group']}"
	age_summary[key] = age_summary.get(key, 0) + 1

	# Extract clothing details
	def extract_clothing(text):
	colors = ['red', 'blue', 'green', 'black', 'white', 'yellow', 'brown', 'gray', 'pink', 'orange']
	patterns = ['striped', 'checkered', 'plaid', 'polka-dot', 'solid', 'patterned', 'floral']
	items = ['jacket', 'coat', 'dress', 'shirt', 't-shirt', 'jeans', 'pants', 'shorts',
	'suit', 'sneakers', 'hat', 'scarf', 'uniform']

	found_colors = [c for c in colors if c in text.lower()]
	found_patterns = [p for p in patterns if p in text.lower()]
	found_items = [i for i in items if i in text.lower()]

	return found_colors, found_patterns, found_items

	colors, patterns, items = extract_clothing(caption)

	def clothing_sentence():
	parts = []
	if colors:
	parts.append(f"colors such as {', '.join(colors)}")
	if patterns:
	parts.append(f"patterns like {', '.join(patterns)}")
	if items:
	parts.append(f"clothing items such as {', '.join(items)}")
	return "The clothing observed includes " + " with ".join(parts) + "." if parts else "Clothing is present but not clearly distinguishable."

	# Generate final 15-sentence description
	def generate_15_sentences():
	sentences = []
	sentences.append(f"The image presents the scene: {caption}.")
	sentences.append("The visual tone combines human presence with context-rich elements.")
	sentences.append(f"A total of {num_faces} people with visible faces were detected.")

	if age_summary:
	summary_list = [f"{v} {k}(s)" for k, v in age_summary.items()]
	sentences.append("The crowd includes " + ", ".join(summary_list) + ".")
	else:
	sentences.append("No specific age or gender details were identified.")

	sentences.append(clothing_sentence())
	sentences.append("Facial expressions range from neutral to slightly expressive, adding emotional context.")
	sentences.append("Some individuals appear to be interacting with the environment or each other.")
	sentences.append("Although specific facial shapes are not automatically classified here, a mix of face sizes and angles is present.")
	sentences.append("Hairstyles vary, including short hair, longer cuts, and tied-back styles depending on individual orientation.")
	sentences.append("The photo captures diversity not only in people but also in visual textures and tones.")
	sentences.append("Clothing styles vary, suggesting informal or casual settings rather than formal events.")
	sentences.append("The spatial arrangement of individuals indicates natural movement or candid posture.")
	sentences.append("Background elements such as buildings or trees provide additional narrative depth.")
	sentences.append("The lighting helps highlight human features and adds dimensionality to the scene.")
	sentences.append("Overall, the image blends appearance, age, fashion, and emotion into a coherent story.")

	return sentences

	# Output result
	final_description = generate_15_sentences()
	print("\n📝 Full 15-Sentence Detailed Description:\n")
	for i, s in enumerate(final_description, 1):
	print(f"{i}. {s}")