Spaces:

Futuretop
/

CaricatureGenerator-4.0

Runtime error

App Files Files Community

Futuretop commited on May 18

Commit

2a2fa4c

verified ·

1 Parent(s): f9e7316

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -71

app.py CHANGED Viewed

@@ -4,59 +4,14 @@ import torch
 import cv2
 import numpy as np
 from deepface import DeepFace
-import re
-# Load BLIP model
 processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
 model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
-# Load image
-image_path = "your_image.jpg"  # Replace with your image path
-image_pil = Image.open(image_path).convert('RGB')
-image_np = np.array(image_pil)
-# BLIP caption
-inputs = processor(image_pil, return_tensors="pt")
-out = model.generate(**inputs)
-caption = processor.decode(out[0], skip_special_tokens=True)
-# OpenCV for face detection
 face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
-gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
-faces = face_cascade.detectMultiScale(gray, 1.1, 4)
-# Analyze each face with DeepFace
-face_infos = []
-for (x, y, w, h) in faces:
-    face_crop = image_np[y:y+h, x:x+w]
-    try:
-        analysis = DeepFace.analyze(face_crop, actions=['age', 'gender'], enforce_detection=False)
-        age = analysis[0]['age']
-        gender = analysis[0]['gender']
-        # Map age to range
-        if age < 13:
-            age_group = "child"
-        elif age < 20:
-            age_group = "teen"
-        elif age < 60:
-            age_group = "adult"
-        else:
-            age_group = "senior"
-        face_infos.append({
-            "age_group": age_group,
-            "gender": gender,
-        })
-    except Exception as e:
-        continue
-# 얼굴 수, 연령대 요약
-num_faces = len(face_infos)
-age_summary = {}
-for face in face_infos:
-    key = f"{face['gender']} {face['age_group']}"
-    age_summary[key] = age_summary.get(key, 0) + 1
-# Extract clothing details
 def extract_clothing(text):
     colors = ['red', 'blue', 'green', 'black', 'white', 'yellow', 'brown', 'gray', 'pink', 'orange']
     patterns = ['striped', 'checkered', 'plaid', 'polka-dot', 'solid', 'patterned', 'floral']
@@ -65,24 +20,12 @@ def extract_clothing(text):
     found_colors = [c for c in colors if c in text.lower()]
     found_patterns = [p for p in patterns if p in text.lower()]
-    found_items = [i for i in items if i in text.lower()]
     return found_colors, found_patterns, found_items
-colors, patterns, items = extract_clothing(caption)
-def clothing_sentence():
-    parts = []
-    if colors:
-        parts.append(f"colors such as {', '.join(colors)}")
-    if patterns:
-        parts.append(f"patterns like {', '.join(patterns)}")
-    if items:
-        parts.append(f"clothing items such as {', '.join(items)}")
-    return "The clothing observed includes " + " with ".join(parts) + "." if parts else "Clothing is present but not clearly distinguishable."
-# Generate final 15-sentence description
-def generate_15_sentences():
     sentences = []
     sentences.append(f"The image presents the scene: {caption}.")
     sentences.append("The visual tone combines human presence with context-rich elements.")
@@ -94,7 +37,7 @@ def generate_15_sentences():
     else:
         sentences.append("No specific age or gender details were identified.")
-    sentences.append(clothing_sentence())
     sentences.append("Facial expressions range from neutral to slightly expressive, adding emotional context.")
     sentences.append("Some individuals appear to be interacting with the environment or each other.")
     sentences.append("Although specific facial shapes are not automatically classified here, a mix of face sizes and angles is present.")
@@ -105,13 +48,76 @@ def generate_15_sentences():
     sentences.append("Background elements such as buildings or trees provide additional narrative depth.")
     sentences.append("The lighting helps highlight human features and adds dimensionality to the scene.")
     sentences.append("Overall, the image blends appearance, age, fashion, and emotion into a coherent story.")
     return sentences
-# Output result
-final_description = generate_15_sentences()
-print("\n📝 Full 15-Sentence Detailed Description:\n")
-for i, s in enumerate(final_description, 1):
-    print(f"{i}. {s}")
-demo.launch()

 import cv2
 import numpy as np
 from deepface import DeepFace
+import gradio as gr
+# ====== 모델 로딩 ======
 processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
 model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
 face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
+# ====== 옷 정보 추출 함수 ======
 def extract_clothing(text):
     colors = ['red', 'blue', 'green', 'black', 'white', 'yellow', 'brown', 'gray', 'pink', 'orange']
     patterns = ['striped', 'checkered', 'plaid', 'polka-dot', 'solid', 'patterned', 'floral']
     found_colors = [c for c in colors if c in text.lower()]
     found_patterns = [p for p in patterns if p in text.lower()]
+    found_items = [i for i in text.lower().split() if i in items]
     return found_colors, found_patterns, found_items
+# ====== 최종 설명 생성 함수 ======
+def generate_15_sentences(caption, num_faces, age_summary, clothing_sentence):
     sentences = []
     sentences.append(f"The image presents the scene: {caption}.")
     sentences.append("The visual tone combines human presence with context-rich elements.")
     else:
         sentences.append("No specific age or gender details were identified.")
+    sentences.append(clothing_sentence)
     sentences.append("Facial expressions range from neutral to slightly expressive, adding emotional context.")
     sentences.append("Some individuals appear to be interacting with the environment or each other.")
     sentences.append("Although specific facial shapes are not automatically classified here, a mix of face sizes and angles is present.")
     sentences.append("Background elements such as buildings or trees provide additional narrative depth.")
     sentences.append("The lighting helps highlight human features and adds dimensionality to the scene.")
     sentences.append("Overall, the image blends appearance, age, fashion, and emotion into a coherent story.")
     return sentences
+# ====== 메인 분석 함수 ======
+def analyze_uploaded_image(image_pil):
+    image_pil = image_pil.convert("RGB")
+    image_np = np.array(image_pil)
+    # 1. Caption 생성 (BLIP)
+    inputs = processor(image_pil, return_tensors="pt")
+    out = model.generate(**inputs)
+    caption = processor.decode(out[0], skip_special_tokens=True)
+    # 2. 얼굴 감지 (OpenCV)
+    gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
+    faces = face_cascade.detectMultiScale(gray, 1.1, 4)
+    # 3. DeepFace로 연령/성별 분석
+    face_infos = []
+    for (x, y, w, h) in faces:
+        face_crop = image_np[y:y+h, x:x+w]
+        try:
+            analysis = DeepFace.analyze(face_crop, actions=['age', 'gender'], enforce_detection=False)
+            age = analysis[0]['age']
+            gender = analysis[0]['gender']
+            if age < 13:
+                age_group = "child"
+            elif age < 20:
+                age_group = "teen"
+            elif age < 60:
+                age_group = "adult"
+            else:
+                age_group = "senior"
+            face_infos.append({
+                "age_group": age_group,
+                "gender": gender,
+            })
+        except:
+            continue
+    num_faces = len(face_infos)
+    # 4. 연령대 요약
+    age_summary = {}
+    for face in face_infos:
+        key = f"{face['gender']} {face['age_group']}"
+        age_summary[key] = age_summary.get(key, 0) + 1
+    # 5. 의복 정보 추출
+    colors, patterns, items = extract_clothing(caption)
+    parts = []
+    if colors:
+        parts.append(f"colors such as {', '.join(colors)}")
+    if patterns:
+        parts.append(f"patterns like {', '.join(patterns)}")
+    if items:
+        parts.append(f"clothing items such as {', '.join(items)}")
+    clothing_sentence = "The clothing observed includes " + " with ".join(parts) + "." if parts else "Clothing is present but not clearly distinguishable."
+    # 6. 최종 설명 생성
+    final_description = generate_15_sentences(caption, num_faces, age_summary, clothing_sentence)
+    return "\n".join([f"{i+1}. {s}" for i, s in enumerate(final_description)])
+# ====== Gradio 인터페이스 설정 ======
+interface = gr.Interface(
+    fn=analyze_uploaded_image,
+    inputs=gr.Image(type="pil", label="이미지를 업로드하세요"),
+    outputs=gr.Textbox(label="15문장 이미지 설명"),
+    title="🧠 이미지 인식 설명기 (BLIP + DeepFace)",
+    description="이미지를 업로드하면 사람 수, 성별, 연령대, 옷, 분위기 등을 15개의 문장으로 설명합니다."
+)
+# ====== 앱 실행 ======
+interface.launch()