Spaces:
Sleeping
Sleeping
added photo upload
Browse files
app.py
CHANGED
@@ -16,12 +16,10 @@ from mtcnn import MTCNN
|
|
16 |
from PIL import Image, ImageDraw
|
17 |
from transformers import pipeline
|
18 |
|
19 |
-
|
20 |
# Initialize the Hugging Face pipeline for facial emotion detection
|
21 |
emotion_pipeline = pipeline("image-classification", model="trpakov/vit-face-expression")
|
22 |
|
23 |
-
img_container = {"webcam": None,
|
24 |
-
"analyzed": None}
|
25 |
|
26 |
# Initialize MTCNN for face detection
|
27 |
mtcnn = MTCNN()
|
@@ -37,21 +35,13 @@ class Detection(NamedTuple):
|
|
37 |
score: float
|
38 |
box: np.ndarray
|
39 |
|
40 |
-
# NOTE: The callback will be called in another thread,
|
41 |
-
# so use a queue here for thread-safety to pass the data
|
42 |
-
# from inside to outside the callback.
|
43 |
-
# TODO: A general-purpose shared state object may be more useful.
|
44 |
result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
|
45 |
|
46 |
# Function to analyze sentiment
|
47 |
def analyze_sentiment(face):
|
48 |
-
# Convert face to RGB
|
49 |
rgb_face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
|
50 |
-
# Convert the face to a PIL image
|
51 |
pil_image = Image.fromarray(rgb_face)
|
52 |
-
# Analyze sentiment using the Hugging Face pipeline
|
53 |
results = emotion_pipeline(pil_image)
|
54 |
-
# Get the dominant emotion
|
55 |
dominant_emotion = max(results, key=lambda x: x['score'])['label']
|
56 |
return dominant_emotion
|
57 |
|
@@ -60,28 +50,19 @@ LINE_SIZE = 2
|
|
60 |
|
61 |
# Function to detect faces, analyze sentiment, and draw a red box around them
|
62 |
def detect_and_draw_faces(frame):
|
63 |
-
# Detect faces using MTCNN
|
64 |
results = mtcnn.detect_faces(frame)
|
65 |
-
|
66 |
-
# Draw on the frame
|
67 |
for result in results:
|
68 |
x, y, w, h = result['box']
|
69 |
face = frame[y:y+h, x:x+w]
|
70 |
sentiment = analyze_sentiment(face)
|
71 |
-
cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255), LINE_SIZE)
|
72 |
-
|
73 |
-
# Calculate position for the text background and the text itself
|
74 |
text_size = cv2.getTextSize(sentiment, cv2.FONT_HERSHEY_SIMPLEX, TEXT_SIZE, 2)[0]
|
75 |
text_x = x
|
76 |
text_y = y - 10
|
77 |
background_tl = (text_x, text_y - text_size[1])
|
78 |
background_br = (text_x + text_size[0], text_y + 5)
|
79 |
-
|
80 |
-
# Draw black rectangle as background
|
81 |
cv2.rectangle(frame, background_tl, background_br, (0, 0, 0), cv2.FILLED)
|
82 |
-
# Draw white text on top
|
83 |
cv2.putText(frame, sentiment, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, TEXT_SIZE, (255, 255, 255), 2)
|
84 |
-
|
85 |
result_queue.put(results)
|
86 |
return frame
|
87 |
|
@@ -90,9 +71,7 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
|
|
90 |
img_container["webcam"] = img
|
91 |
frame_with_boxes = detect_and_draw_faces(img.copy())
|
92 |
img_container["analyzed"] = frame_with_boxes
|
93 |
-
|
94 |
return frame
|
95 |
-
# return av.VideoFrame.from_ndarray(frame_with_boxes, format="bgr24")
|
96 |
|
97 |
ice_servers = get_ice_servers()
|
98 |
|
@@ -135,6 +114,8 @@ st.markdown(
|
|
135 |
st.title("Computer Vision Test Lab")
|
136 |
st.subheader("Facial Sentiment Analysis")
|
137 |
|
|
|
|
|
138 |
# Columns for input and output streams
|
139 |
col1, col2 = st.columns(2)
|
140 |
|
@@ -150,31 +131,50 @@ with col1:
|
|
150 |
async_processing=True,
|
151 |
)
|
152 |
|
|
|
|
|
|
|
153 |
with col2:
|
154 |
st.header("Analysis")
|
155 |
-
st.
|
156 |
input_placeholder = st.empty()
|
157 |
-
|
|
|
158 |
output_placeholder = st.empty()
|
159 |
|
160 |
if webrtc_ctx.state.playing:
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
while True:
|
169 |
-
result = result_queue.get()
|
170 |
labels_placeholder.table(result)
|
171 |
|
172 |
-
|
173 |
-
|
|
|
|
|
|
|
174 |
|
175 |
-
|
176 |
-
|
177 |
|
178 |
-
|
179 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
|
|
|
|
16 |
from PIL import Image, ImageDraw
|
17 |
from transformers import pipeline
|
18 |
|
|
|
19 |
# Initialize the Hugging Face pipeline for facial emotion detection
|
20 |
emotion_pipeline = pipeline("image-classification", model="trpakov/vit-face-expression")
|
21 |
|
22 |
+
img_container = {"webcam": None, "analyzed": None, "uploaded": None}
|
|
|
23 |
|
24 |
# Initialize MTCNN for face detection
|
25 |
mtcnn = MTCNN()
|
|
|
35 |
score: float
|
36 |
box: np.ndarray
|
37 |
|
|
|
|
|
|
|
|
|
38 |
result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
|
39 |
|
40 |
# Function to analyze sentiment
|
41 |
def analyze_sentiment(face):
|
|
|
42 |
rgb_face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
|
|
|
43 |
pil_image = Image.fromarray(rgb_face)
|
|
|
44 |
results = emotion_pipeline(pil_image)
|
|
|
45 |
dominant_emotion = max(results, key=lambda x: x['score'])['label']
|
46 |
return dominant_emotion
|
47 |
|
|
|
50 |
|
51 |
# Function to detect faces, analyze sentiment, and draw a red box around them
|
52 |
def detect_and_draw_faces(frame):
|
|
|
53 |
results = mtcnn.detect_faces(frame)
|
|
|
|
|
54 |
for result in results:
|
55 |
x, y, w, h = result['box']
|
56 |
face = frame[y:y+h, x:x+w]
|
57 |
sentiment = analyze_sentiment(face)
|
58 |
+
cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255), LINE_SIZE)
|
|
|
|
|
59 |
text_size = cv2.getTextSize(sentiment, cv2.FONT_HERSHEY_SIMPLEX, TEXT_SIZE, 2)[0]
|
60 |
text_x = x
|
61 |
text_y = y - 10
|
62 |
background_tl = (text_x, text_y - text_size[1])
|
63 |
background_br = (text_x + text_size[0], text_y + 5)
|
|
|
|
|
64 |
cv2.rectangle(frame, background_tl, background_br, (0, 0, 0), cv2.FILLED)
|
|
|
65 |
cv2.putText(frame, sentiment, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, TEXT_SIZE, (255, 255, 255), 2)
|
|
|
66 |
result_queue.put(results)
|
67 |
return frame
|
68 |
|
|
|
71 |
img_container["webcam"] = img
|
72 |
frame_with_boxes = detect_and_draw_faces(img.copy())
|
73 |
img_container["analyzed"] = frame_with_boxes
|
|
|
74 |
return frame
|
|
|
75 |
|
76 |
ice_servers = get_ice_servers()
|
77 |
|
|
|
114 |
st.title("Computer Vision Test Lab")
|
115 |
st.subheader("Facial Sentiment Analysis")
|
116 |
|
117 |
+
show_labels = st.checkbox("Show the detected labels", value=True)
|
118 |
+
|
119 |
# Columns for input and output streams
|
120 |
col1, col2 = st.columns(2)
|
121 |
|
|
|
131 |
async_processing=True,
|
132 |
)
|
133 |
|
134 |
+
st.subheader("Upload an Image")
|
135 |
+
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
|
136 |
+
|
137 |
with col2:
|
138 |
st.header("Analysis")
|
139 |
+
input_subheader_placeholder = st.empty()
|
140 |
input_placeholder = st.empty()
|
141 |
+
|
142 |
+
output_subheader_placeholder = st.empty()
|
143 |
output_placeholder = st.empty()
|
144 |
|
145 |
if webrtc_ctx.state.playing:
|
146 |
+
labels_placeholder = st.empty()
|
147 |
+
input_subheader_placeholder.subheader("Input Frame")
|
148 |
+
output_subheader_placeholder.subheader("Output Frame")
|
149 |
+
|
150 |
+
while True:
|
151 |
+
result = result_queue.get()
|
152 |
+
if show_labels:
|
|
|
|
|
153 |
labels_placeholder.table(result)
|
154 |
|
155 |
+
img = img_container["webcam"]
|
156 |
+
frame_with_boxes = img_container["analyzed"]
|
157 |
+
|
158 |
+
if img is None:
|
159 |
+
continue
|
160 |
|
161 |
+
input_placeholder.image(img, channels="BGR")
|
162 |
+
output_placeholder.image(frame_with_boxes, channels="BGR")
|
163 |
|
164 |
+
if uploaded_file is not None:
|
165 |
+
input_subheader_placeholder.subheader("Input Frame")
|
166 |
+
output_subheader_placeholder.subheader("Output Frame")
|
167 |
+
|
168 |
+
image = Image.open(uploaded_file)
|
169 |
+
img = np.array(image.convert("RGB")) # Ensure image is in RGB format
|
170 |
+
img_container["uploaded"] = img
|
171 |
+
analyzed_img = detect_and_draw_faces(img.copy())
|
172 |
+
input_placeholder.image(img)
|
173 |
+
output_placeholder.image(analyzed_img)
|
174 |
+
|
175 |
+
result = result_queue.get()
|
176 |
+
if show_labels:
|
177 |
+
labels_placeholder = st.empty()
|
178 |
+
labels_placeholder.table(result)
|
179 |
|
180 |
+
|