Futuretop commited on
Commit
2a2fa4c
ยท
verified ยท
1 Parent(s): f9e7316

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -71
app.py CHANGED
@@ -4,59 +4,14 @@ import torch
4
  import cv2
5
  import numpy as np
6
  from deepface import DeepFace
7
- import re
8
 
9
- # Load BLIP model
10
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
11
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
12
-
13
- # Load image
14
- image_path = "your_image.jpg" # Replace with your image path
15
- image_pil = Image.open(image_path).convert('RGB')
16
- image_np = np.array(image_pil)
17
-
18
- # BLIP caption
19
- inputs = processor(image_pil, return_tensors="pt")
20
- out = model.generate(**inputs)
21
- caption = processor.decode(out[0], skip_special_tokens=True)
22
-
23
- # OpenCV for face detection
24
  face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
25
- gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
26
- faces = face_cascade.detectMultiScale(gray, 1.1, 4)
27
-
28
- # Analyze each face with DeepFace
29
- face_infos = []
30
- for (x, y, w, h) in faces:
31
- face_crop = image_np[y:y+h, x:x+w]
32
- try:
33
- analysis = DeepFace.analyze(face_crop, actions=['age', 'gender'], enforce_detection=False)
34
- age = analysis[0]['age']
35
- gender = analysis[0]['gender']
36
- # Map age to range
37
- if age < 13:
38
- age_group = "child"
39
- elif age < 20:
40
- age_group = "teen"
41
- elif age < 60:
42
- age_group = "adult"
43
- else:
44
- age_group = "senior"
45
- face_infos.append({
46
- "age_group": age_group,
47
- "gender": gender,
48
- })
49
- except Exception as e:
50
- continue
51
 
52
- # ์–ผ๊ตด ์ˆ˜, ์—ฐ๋ น๋Œ€ ์š”์•ฝ
53
- num_faces = len(face_infos)
54
- age_summary = {}
55
- for face in face_infos:
56
- key = f"{face['gender']} {face['age_group']}"
57
- age_summary[key] = age_summary.get(key, 0) + 1
58
-
59
- # Extract clothing details
60
  def extract_clothing(text):
61
  colors = ['red', 'blue', 'green', 'black', 'white', 'yellow', 'brown', 'gray', 'pink', 'orange']
62
  patterns = ['striped', 'checkered', 'plaid', 'polka-dot', 'solid', 'patterned', 'floral']
@@ -65,24 +20,12 @@ def extract_clothing(text):
65
 
66
  found_colors = [c for c in colors if c in text.lower()]
67
  found_patterns = [p for p in patterns if p in text.lower()]
68
- found_items = [i for i in items if i in text.lower()]
69
 
70
  return found_colors, found_patterns, found_items
71
 
72
- colors, patterns, items = extract_clothing(caption)
73
-
74
- def clothing_sentence():
75
- parts = []
76
- if colors:
77
- parts.append(f"colors such as {', '.join(colors)}")
78
- if patterns:
79
- parts.append(f"patterns like {', '.join(patterns)}")
80
- if items:
81
- parts.append(f"clothing items such as {', '.join(items)}")
82
- return "The clothing observed includes " + " with ".join(parts) + "." if parts else "Clothing is present but not clearly distinguishable."
83
-
84
- # Generate final 15-sentence description
85
- def generate_15_sentences():
86
  sentences = []
87
  sentences.append(f"The image presents the scene: {caption}.")
88
  sentences.append("The visual tone combines human presence with context-rich elements.")
@@ -94,7 +37,7 @@ def generate_15_sentences():
94
  else:
95
  sentences.append("No specific age or gender details were identified.")
96
 
97
- sentences.append(clothing_sentence())
98
  sentences.append("Facial expressions range from neutral to slightly expressive, adding emotional context.")
99
  sentences.append("Some individuals appear to be interacting with the environment or each other.")
100
  sentences.append("Although specific facial shapes are not automatically classified here, a mix of face sizes and angles is present.")
@@ -105,13 +48,76 @@ def generate_15_sentences():
105
  sentences.append("Background elements such as buildings or trees provide additional narrative depth.")
106
  sentences.append("The lighting helps highlight human features and adds dimensionality to the scene.")
107
  sentences.append("Overall, the image blends appearance, age, fashion, and emotion into a coherent story.")
108
-
109
  return sentences
110
 
111
- # Output result
112
- final_description = generate_15_sentences()
113
- print("\n๐Ÿ“ Full 15-Sentence Detailed Description:\n")
114
- for i, s in enumerate(final_description, 1):
115
- print(f"{i}. {s}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
- demo.launch()
 
 
4
  import cv2
5
  import numpy as np
6
  from deepface import DeepFace
7
+ import gradio as gr
8
 
9
+ # ====== ๋ชจ๋ธ ๋กœ๋”ฉ ======
10
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
11
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
 
 
 
 
 
 
 
 
 
 
 
 
12
  face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ # ====== ์˜ท ์ •๋ณด ์ถ”์ถœ ํ•จ์ˆ˜ ======
 
 
 
 
 
 
 
15
  def extract_clothing(text):
16
  colors = ['red', 'blue', 'green', 'black', 'white', 'yellow', 'brown', 'gray', 'pink', 'orange']
17
  patterns = ['striped', 'checkered', 'plaid', 'polka-dot', 'solid', 'patterned', 'floral']
 
20
 
21
  found_colors = [c for c in colors if c in text.lower()]
22
  found_patterns = [p for p in patterns if p in text.lower()]
23
+ found_items = [i for i in text.lower().split() if i in items]
24
 
25
  return found_colors, found_patterns, found_items
26
 
27
+ # ====== ์ตœ์ข… ์„ค๋ช… ์ƒ์„ฑ ํ•จ์ˆ˜ ======
28
+ def generate_15_sentences(caption, num_faces, age_summary, clothing_sentence):
 
 
 
 
 
 
 
 
 
 
 
 
29
  sentences = []
30
  sentences.append(f"The image presents the scene: {caption}.")
31
  sentences.append("The visual tone combines human presence with context-rich elements.")
 
37
  else:
38
  sentences.append("No specific age or gender details were identified.")
39
 
40
+ sentences.append(clothing_sentence)
41
  sentences.append("Facial expressions range from neutral to slightly expressive, adding emotional context.")
42
  sentences.append("Some individuals appear to be interacting with the environment or each other.")
43
  sentences.append("Although specific facial shapes are not automatically classified here, a mix of face sizes and angles is present.")
 
48
  sentences.append("Background elements such as buildings or trees provide additional narrative depth.")
49
  sentences.append("The lighting helps highlight human features and adds dimensionality to the scene.")
50
  sentences.append("Overall, the image blends appearance, age, fashion, and emotion into a coherent story.")
 
51
  return sentences
52
 
53
+ # ====== ๋ฉ”์ธ ๋ถ„์„ ํ•จ์ˆ˜ ======
54
+ def analyze_uploaded_image(image_pil):
55
+ image_pil = image_pil.convert("RGB")
56
+ image_np = np.array(image_pil)
57
+
58
+ # 1. Caption ์ƒ์„ฑ (BLIP)
59
+ inputs = processor(image_pil, return_tensors="pt")
60
+ out = model.generate(**inputs)
61
+ caption = processor.decode(out[0], skip_special_tokens=True)
62
+
63
+ # 2. ์–ผ๊ตด ๊ฐ์ง€ (OpenCV)
64
+ gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
65
+ faces = face_cascade.detectMultiScale(gray, 1.1, 4)
66
+
67
+ # 3. DeepFace๋กœ ์—ฐ๋ น/์„ฑ๋ณ„ ๋ถ„์„
68
+ face_infos = []
69
+ for (x, y, w, h) in faces:
70
+ face_crop = image_np[y:y+h, x:x+w]
71
+ try:
72
+ analysis = DeepFace.analyze(face_crop, actions=['age', 'gender'], enforce_detection=False)
73
+ age = analysis[0]['age']
74
+ gender = analysis[0]['gender']
75
+ if age < 13:
76
+ age_group = "child"
77
+ elif age < 20:
78
+ age_group = "teen"
79
+ elif age < 60:
80
+ age_group = "adult"
81
+ else:
82
+ age_group = "senior"
83
+ face_infos.append({
84
+ "age_group": age_group,
85
+ "gender": gender,
86
+ })
87
+ except:
88
+ continue
89
+
90
+ num_faces = len(face_infos)
91
+
92
+ # 4. ์—ฐ๋ น๋Œ€ ์š”์•ฝ
93
+ age_summary = {}
94
+ for face in face_infos:
95
+ key = f"{face['gender']} {face['age_group']}"
96
+ age_summary[key] = age_summary.get(key, 0) + 1
97
+
98
+ # 5. ์˜๋ณต ์ •๋ณด ์ถ”์ถœ
99
+ colors, patterns, items = extract_clothing(caption)
100
+ parts = []
101
+ if colors:
102
+ parts.append(f"colors such as {', '.join(colors)}")
103
+ if patterns:
104
+ parts.append(f"patterns like {', '.join(patterns)}")
105
+ if items:
106
+ parts.append(f"clothing items such as {', '.join(items)}")
107
+ clothing_sentence = "The clothing observed includes " + " with ".join(parts) + "." if parts else "Clothing is present but not clearly distinguishable."
108
+
109
+ # 6. ์ตœ์ข… ์„ค๋ช… ์ƒ์„ฑ
110
+ final_description = generate_15_sentences(caption, num_faces, age_summary, clothing_sentence)
111
+ return "\n".join([f"{i+1}. {s}" for i, s in enumerate(final_description)])
112
+
113
+ # ====== Gradio ์ธํ„ฐํŽ˜์ด์Šค ์„ค์ • ======
114
+ interface = gr.Interface(
115
+ fn=analyze_uploaded_image,
116
+ inputs=gr.Image(type="pil", label="์ด๋ฏธ์ง€๋ฅผ ์—…๋กœ๋“œํ•˜์„ธ์š”"),
117
+ outputs=gr.Textbox(label="15๋ฌธ์žฅ ์ด๋ฏธ์ง€ ์„ค๋ช…"),
118
+ title="๐Ÿง  ์ด๋ฏธ์ง€ ์ธ์‹ ์„ค๋ช…๊ธฐ (BLIP + DeepFace)",
119
+ description="์ด๋ฏธ์ง€๋ฅผ ์—…๋กœ๋“œํ•˜๋ฉด ์‚ฌ๋žŒ ์ˆ˜, ์„ฑ๋ณ„, ์—ฐ๋ น๋Œ€, ์˜ท, ๋ถ„์œ„๊ธฐ ๋“ฑ์„ 15๊ฐœ์˜ ๋ฌธ์žฅ์œผ๋กœ ์„ค๋ช…ํ•ฉ๋‹ˆ๋‹ค."
120
+ )
121
 
122
+ # ====== ์•ฑ ์‹คํ–‰ ======
123
+ interface.launch()