eusholli commited on
Commit
9dc0fd2
·
1 Parent(s): 56a64f9

added photo upload

Browse files
Files changed (1) hide show
  1. app.py +40 -40
app.py CHANGED
@@ -16,12 +16,10 @@ from mtcnn import MTCNN
16
  from PIL import Image, ImageDraw
17
  from transformers import pipeline
18
 
19
-
20
  # Initialize the Hugging Face pipeline for facial emotion detection
21
  emotion_pipeline = pipeline("image-classification", model="trpakov/vit-face-expression")
22
 
23
- img_container = {"webcam": None,
24
- "analyzed": None}
25
 
26
  # Initialize MTCNN for face detection
27
  mtcnn = MTCNN()
@@ -37,21 +35,13 @@ class Detection(NamedTuple):
37
  score: float
38
  box: np.ndarray
39
 
40
- # NOTE: The callback will be called in another thread,
41
- # so use a queue here for thread-safety to pass the data
42
- # from inside to outside the callback.
43
- # TODO: A general-purpose shared state object may be more useful.
44
  result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
45
 
46
  # Function to analyze sentiment
47
  def analyze_sentiment(face):
48
- # Convert face to RGB
49
  rgb_face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
50
- # Convert the face to a PIL image
51
  pil_image = Image.fromarray(rgb_face)
52
- # Analyze sentiment using the Hugging Face pipeline
53
  results = emotion_pipeline(pil_image)
54
- # Get the dominant emotion
55
  dominant_emotion = max(results, key=lambda x: x['score'])['label']
56
  return dominant_emotion
57
 
@@ -60,28 +50,19 @@ LINE_SIZE = 2
60
 
61
  # Function to detect faces, analyze sentiment, and draw a red box around them
62
  def detect_and_draw_faces(frame):
63
- # Detect faces using MTCNN
64
  results = mtcnn.detect_faces(frame)
65
-
66
- # Draw on the frame
67
  for result in results:
68
  x, y, w, h = result['box']
69
  face = frame[y:y+h, x:x+w]
70
  sentiment = analyze_sentiment(face)
71
- cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255), LINE_SIZE) # Thicker red box
72
-
73
- # Calculate position for the text background and the text itself
74
  text_size = cv2.getTextSize(sentiment, cv2.FONT_HERSHEY_SIMPLEX, TEXT_SIZE, 2)[0]
75
  text_x = x
76
  text_y = y - 10
77
  background_tl = (text_x, text_y - text_size[1])
78
  background_br = (text_x + text_size[0], text_y + 5)
79
-
80
- # Draw black rectangle as background
81
  cv2.rectangle(frame, background_tl, background_br, (0, 0, 0), cv2.FILLED)
82
- # Draw white text on top
83
  cv2.putText(frame, sentiment, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, TEXT_SIZE, (255, 255, 255), 2)
84
-
85
  result_queue.put(results)
86
  return frame
87
 
@@ -90,9 +71,7 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
90
  img_container["webcam"] = img
91
  frame_with_boxes = detect_and_draw_faces(img.copy())
92
  img_container["analyzed"] = frame_with_boxes
93
-
94
  return frame
95
- # return av.VideoFrame.from_ndarray(frame_with_boxes, format="bgr24")
96
 
97
  ice_servers = get_ice_servers()
98
 
@@ -135,6 +114,8 @@ st.markdown(
135
  st.title("Computer Vision Test Lab")
136
  st.subheader("Facial Sentiment Analysis")
137
 
 
 
138
  # Columns for input and output streams
139
  col1, col2 = st.columns(2)
140
 
@@ -150,31 +131,50 @@ with col1:
150
  async_processing=True,
151
  )
152
 
 
 
 
153
  with col2:
154
  st.header("Analysis")
155
- st.subheader("Input Frame")
156
  input_placeholder = st.empty()
157
- st.subheader("Output Frame")
 
158
  output_placeholder = st.empty()
159
 
160
  if webrtc_ctx.state.playing:
161
- if st.checkbox("Show the detected labels", value=True):
162
- labels_placeholder = st.empty()
163
- # NOTE: The video transformation with object detection and
164
- # this loop displaying the result labels are running
165
- # in different threads asynchronously.
166
- # Then the rendered video frames and the labels displayed here
167
- # are not strictly synchronized.
168
- while True:
169
- result = result_queue.get()
170
  labels_placeholder.table(result)
171
 
172
- img = img_container["webcam"]
173
- frame_with_boxes = img_container["analyzed"]
 
 
 
174
 
175
- if img is None:
176
- continue
177
 
178
- input_placeholder.image(img, channels="BGR")
179
- output_placeholder.image(frame_with_boxes, channels="BGR")
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
 
 
16
  from PIL import Image, ImageDraw
17
  from transformers import pipeline
18
 
 
19
  # Initialize the Hugging Face pipeline for facial emotion detection
20
  emotion_pipeline = pipeline("image-classification", model="trpakov/vit-face-expression")
21
 
22
+ img_container = {"webcam": None, "analyzed": None, "uploaded": None}
 
23
 
24
  # Initialize MTCNN for face detection
25
  mtcnn = MTCNN()
 
35
  score: float
36
  box: np.ndarray
37
 
 
 
 
 
38
  result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
39
 
40
  # Function to analyze sentiment
41
  def analyze_sentiment(face):
 
42
  rgb_face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
 
43
  pil_image = Image.fromarray(rgb_face)
 
44
  results = emotion_pipeline(pil_image)
 
45
  dominant_emotion = max(results, key=lambda x: x['score'])['label']
46
  return dominant_emotion
47
 
 
50
 
51
  # Function to detect faces, analyze sentiment, and draw a red box around them
52
  def detect_and_draw_faces(frame):
 
53
  results = mtcnn.detect_faces(frame)
 
 
54
  for result in results:
55
  x, y, w, h = result['box']
56
  face = frame[y:y+h, x:x+w]
57
  sentiment = analyze_sentiment(face)
58
+ cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255), LINE_SIZE)
 
 
59
  text_size = cv2.getTextSize(sentiment, cv2.FONT_HERSHEY_SIMPLEX, TEXT_SIZE, 2)[0]
60
  text_x = x
61
  text_y = y - 10
62
  background_tl = (text_x, text_y - text_size[1])
63
  background_br = (text_x + text_size[0], text_y + 5)
 
 
64
  cv2.rectangle(frame, background_tl, background_br, (0, 0, 0), cv2.FILLED)
 
65
  cv2.putText(frame, sentiment, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, TEXT_SIZE, (255, 255, 255), 2)
 
66
  result_queue.put(results)
67
  return frame
68
 
 
71
  img_container["webcam"] = img
72
  frame_with_boxes = detect_and_draw_faces(img.copy())
73
  img_container["analyzed"] = frame_with_boxes
 
74
  return frame
 
75
 
76
  ice_servers = get_ice_servers()
77
 
 
114
  st.title("Computer Vision Test Lab")
115
  st.subheader("Facial Sentiment Analysis")
116
 
117
+ show_labels = st.checkbox("Show the detected labels", value=True)
118
+
119
  # Columns for input and output streams
120
  col1, col2 = st.columns(2)
121
 
 
131
  async_processing=True,
132
  )
133
 
134
+ st.subheader("Upload an Image")
135
+ uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
136
+
137
  with col2:
138
  st.header("Analysis")
139
+ input_subheader_placeholder = st.empty()
140
  input_placeholder = st.empty()
141
+
142
+ output_subheader_placeholder = st.empty()
143
  output_placeholder = st.empty()
144
 
145
  if webrtc_ctx.state.playing:
146
+ labels_placeholder = st.empty()
147
+ input_subheader_placeholder.subheader("Input Frame")
148
+ output_subheader_placeholder.subheader("Output Frame")
149
+
150
+ while True:
151
+ result = result_queue.get()
152
+ if show_labels:
 
 
153
  labels_placeholder.table(result)
154
 
155
+ img = img_container["webcam"]
156
+ frame_with_boxes = img_container["analyzed"]
157
+
158
+ if img is None:
159
+ continue
160
 
161
+ input_placeholder.image(img, channels="BGR")
162
+ output_placeholder.image(frame_with_boxes, channels="BGR")
163
 
164
+ if uploaded_file is not None:
165
+ input_subheader_placeholder.subheader("Input Frame")
166
+ output_subheader_placeholder.subheader("Output Frame")
167
+
168
+ image = Image.open(uploaded_file)
169
+ img = np.array(image.convert("RGB")) # Ensure image is in RGB format
170
+ img_container["uploaded"] = img
171
+ analyzed_img = detect_and_draw_faces(img.copy())
172
+ input_placeholder.image(img)
173
+ output_placeholder.image(analyzed_img)
174
+
175
+ result = result_queue.get()
176
+ if show_labels:
177
+ labels_placeholder = st.empty()
178
+ labels_placeholder.table(result)
179
 
180
+