Ankan Ghosh commited on
Commit
a42db59
·
verified ·
1 Parent(s): 9acb682

Upload 4 files

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. app.py +305 -0
  3. click.wav +0 -0
  4. input-video.mp4 +3 -0
  5. requirements.txt +4 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ input-video.mp4 filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import time
4
+ import os
5
+ import matplotlib.pyplot as plt
6
+ import gradio as gr
7
+
8
+ try:
9
+ from pygame import mixer
10
+
11
+ mixer_init = True
12
+ except ModuleNotFoundError:
13
+ mixer = None
14
+ mixer_init = False
15
+
16
+ # ------------------------------------------------------------------------------
17
+ # 1. Initializations.
18
+ # ------------------------------------------------------------------------------
19
+
20
+ # Initialize counter for the number of blinks detected.
21
+ BLINK = 0
22
+
23
+ # Model file paths.
24
+ MODEL_PATH = "./model/res10_300x300_ssd_iter_140000.caffemodel"
25
+ CONFIG_PATH = "./model/deploy.prototxt"
26
+ LBF_MODEL = "./model/lbfmodel.yaml"
27
+
28
+ # Create a face detector network instance.
29
+ net = cv2.dnn.readNetFromCaffe(CONFIG_PATH, MODEL_PATH)
30
+
31
+ # Create the landmark detector instance.
32
+ landmarkDetector = cv2.face.createFacemarkLBF()
33
+ landmarkDetector.loadModel(LBF_MODEL)
34
+
35
+ # ------------------------------------------------------------------------------
36
+ # 2. Function definitions.
37
+ # ------------------------------------------------------------------------------
38
+
39
+
40
+ def detect_faces(image, detection_threshold=0.70):
41
+ blob = cv2.dnn.blobFromImage(image, 1.0, (300, 300), [104, 117, 123])
42
+ net.setInput(blob)
43
+ detections = net.forward()
44
+
45
+ faces = []
46
+ img_h = image.shape[0]
47
+ img_w = image.shape[1]
48
+
49
+ for detection in detections[0][0]:
50
+ if detection[2] >= detection_threshold:
51
+ left = detection[3] * img_w
52
+ top = detection[4] * img_h
53
+ right = detection[5] * img_w
54
+ bottom = detection[6] * img_h
55
+
56
+ face_w = right - left
57
+ face_h = bottom - top
58
+
59
+ face_roi = (left, top, face_w, face_h)
60
+ faces.append(face_roi)
61
+
62
+ return np.array(faces).astype(int)
63
+
64
+
65
+ def get_primary_face(faces, frame_h, frame_w):
66
+ primary_face_index = None
67
+ face_height_max = 0
68
+ for idx in range(len(faces)):
69
+ face = faces[idx]
70
+ x1 = face[0]
71
+ y1 = face[1]
72
+ x2 = x1 + face[2]
73
+ y2 = y1 + face[3]
74
+ if x1 > frame_w or y1 > frame_h or x2 > frame_w or y2 > frame_h:
75
+ continue
76
+ if x1 < 0 or y1 < 0 or x2 < 0 or y2 < 0:
77
+ continue
78
+
79
+ # Prioritize the face with the maximum height.
80
+ if face[3] > face_height_max:
81
+ primary_face_index = idx
82
+ face_height_max = face[3]
83
+
84
+ if primary_face_index is not None:
85
+ primary_face = faces[primary_face_index]
86
+ else:
87
+ primary_face = None
88
+
89
+ return primary_face
90
+
91
+
92
+ def visualize_eyes(landmarks, frame):
93
+ for i in range(36, 48):
94
+ cv2.circle(frame, tuple(landmarks[i].astype("int")), 2, (0, 255, 0), -1)
95
+
96
+
97
+ def get_eye_aspect_ratio(landmarks):
98
+ vert_dist_1right = calculate_distance(landmarks[37], landmarks[41])
99
+ vert_dist_2right = calculate_distance(landmarks[38], landmarks[40])
100
+ vert_dist_1left = calculate_distance(landmarks[43], landmarks[47])
101
+ vert_dist_2left = calculate_distance(landmarks[44], landmarks[46])
102
+ horz_dist_right = calculate_distance(landmarks[36], landmarks[39])
103
+ horz_dist_left = calculate_distance(landmarks[42], landmarks[45])
104
+ EAR_left = (vert_dist_1left + vert_dist_2left) / (2.0 * horz_dist_left)
105
+ EAR_right = (vert_dist_1right + vert_dist_2right) / (2.0 * horz_dist_right)
106
+ ear = (EAR_left + EAR_right) / 2
107
+ return ear
108
+
109
+
110
+ def calculate_distance(A, B):
111
+ distance = ((A[0] - B[0]) ** 2 + (A[1] - B[1]) ** 2) ** 0.5
112
+ return distance
113
+
114
+
115
+ def play(file):
116
+ if mixer_init:
117
+ mixer.init()
118
+ sound = mixer.Sound(file)
119
+ sound.play()
120
+
121
+
122
+ # ------------------------------------------------------------------------------
123
+ # 3. Processing function (to be used in Gradio).
124
+ # ------------------------------------------------------------------------------
125
+
126
+
127
+ def process_video(input_video):
128
+
129
+ # Generate unique filenames for the outputs
130
+ out_video_filename = "processed_video.mp4"
131
+ out_plot_filename = "ear_plot.png"
132
+
133
+ cap = cv2.VideoCapture(input_video)
134
+ ret, frame = cap.read()
135
+ if not ret:
136
+ print("Cannot read the input video.")
137
+ return None, None
138
+
139
+ frame_h = frame.shape[0]
140
+ frame_w = frame.shape[1]
141
+
142
+ # Initialize writer for processed video
143
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
144
+ fps = cap.get(cv2.CAP_PROP_FPS) if cap.get(cv2.CAP_PROP_FPS) > 0 else 30
145
+ out_writer = cv2.VideoWriter(out_video_filename, fourcc, fps, (frame_w, frame_h))
146
+
147
+ # Calibration
148
+ frame_count = 0
149
+ frame_calib = 30 # Number of frames to use for threshold calibration.
150
+ sum_ear = 0
151
+
152
+ BLINK = 0
153
+ state_prev = state_curr = "open"
154
+
155
+ ear_values = []
156
+
157
+ while True:
158
+ ret, frame = cap.read()
159
+ if not ret:
160
+ break
161
+
162
+ # Detect Face.
163
+ faces = detect_faces(frame, detection_threshold=0.90)
164
+
165
+ if len(faces) > 0:
166
+ # Use primary face
167
+ primary_face = get_primary_face(faces, frame_h, frame_w)
168
+
169
+ if primary_face is not None:
170
+ cv2.rectangle(
171
+ frame,
172
+ (primary_face[0], primary_face[1]),
173
+ (primary_face[0] + primary_face[2], primary_face[1] + primary_face[3]),
174
+ (0, 255, 0),
175
+ 3,
176
+ )
177
+
178
+ # Detect Landmarks
179
+ retval, landmarksList = landmarkDetector.fit(frame, np.expand_dims(primary_face, 0))
180
+
181
+ if retval:
182
+ landmarks = landmarksList[0][0]
183
+
184
+ # Display detections.
185
+ visualize_eyes(landmarks, frame)
186
+
187
+ # Get EAR
188
+ ear = get_eye_aspect_ratio(landmarks)
189
+ ear_values.append(ear)
190
+
191
+ if frame_count < frame_calib:
192
+ frame_count += 1
193
+ sum_ear += ear
194
+ elif frame_count == frame_calib:
195
+ frame_count += 1
196
+ avg_ear = sum_ear / frame_count
197
+ HIGHER_TH = 0.90 * avg_ear
198
+ LOWER_TH = 0.80 * HIGHER_TH
199
+ print("SET EAR HIGH: ", HIGHER_TH)
200
+ print("SET EAR LOW: ", LOWER_TH)
201
+ else:
202
+ if ear < LOWER_TH:
203
+ state_curr = "closed"
204
+ elif ear > HIGHER_TH:
205
+ state_curr = "open"
206
+
207
+ if state_prev == "closed" and state_curr == "open":
208
+ BLINK += 1
209
+ if mixer_init:
210
+ play("./click.wav")
211
+
212
+ state_prev = state_curr
213
+
214
+ cv2.putText(
215
+ frame,
216
+ f"Blink Counter: {BLINK}",
217
+ (10, 80),
218
+ cv2.FONT_HERSHEY_SIMPLEX,
219
+ 1.5,
220
+ (0, 0, 255),
221
+ 4,
222
+ cv2.LINE_AA,
223
+ )
224
+ else:
225
+ # No valid face detected
226
+ pass
227
+ else:
228
+ # No faces
229
+ pass
230
+ frame_out_final = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
231
+ out_writer.write(frame)
232
+
233
+ yield frame_out_final, None, None
234
+
235
+ cap.release()
236
+ out_writer.release()
237
+
238
+ # Plot EAR values if collected
239
+ if ear_values:
240
+ plt.figure(figsize=(10, 5.625))
241
+ plt.plot(ear_values, label="EAR")
242
+ plt.title("Eye Aspect Ratio (EAR) over time")
243
+ plt.xlabel("Frame Index")
244
+ plt.ylabel("EAR")
245
+ plt.legend()
246
+ plt.grid(True)
247
+ plt.savefig(out_plot_filename)
248
+ plt.close()
249
+ else:
250
+ out_plot_filename = None
251
+
252
+ yield None, out_video_filename, out_plot_filename
253
+
254
+
255
+ # ------------------------------------------------------------------------------
256
+ # 4. Gradio UI
257
+ # ------------------------------------------------------------------------------
258
+
259
+
260
+ def process_gradio(video_file):
261
+ if video_file is None:
262
+ return None, None, None
263
+
264
+ video_path = video_file
265
+ output_frames = None
266
+ processed_video = None
267
+ plot_img = None
268
+
269
+ # Process video using generator
270
+ for frame_out, processed_video_path, plot_path in process_video(video_path):
271
+ if frame_out is not None:
272
+ output_frames = frame_out # Update frames dynamically
273
+ yield output_frames, None, None # Gradio updates frames step-by-step
274
+ else:
275
+ processed_video = processed_video_path
276
+ plot_img = plot_path
277
+
278
+ # Final yield with processed video and EAR plot
279
+ yield None, processed_video, plot_img
280
+
281
+
282
+ with gr.Blocks() as demo:
283
+ gr.Markdown("# Blink Detection with OpenCV")
284
+ gr.Markdown("Upload a video to detect blinks and view the EAR plot after processing.")
285
+ video_input = gr.Video(label="Input Video")
286
+ process_btn = gr.Button("Process")
287
+ output_frames = gr.Image(label="Output Frames")
288
+ with gr.Row():
289
+ processed_video = gr.Video(label="Processed Video")
290
+ ear_plot = gr.Image(label="EAR Plot")
291
+ process_btn.click(process_gradio, inputs=video_input, outputs=[output_frames, processed_video, ear_plot])
292
+
293
+ examples = [
294
+ ["./input-video.mp4"],
295
+ ]
296
+
297
+ with gr.Row():
298
+ gr.Examples(
299
+ examples=examples,
300
+ inputs=[video_input],
301
+ label="Load Example Video",
302
+ )
303
+
304
+ if __name__ == "__main__":
305
+ demo.launch()
click.wav ADDED
Binary file (195 kB). View file
 
input-video.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c1bdb3d8302bbb63bc5fb8137e2b532182bb3126261bebd5f1d6cd48d52dfab
3
+ size 38229628
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ opencv-contrib-python
2
+ gradio
3
+ matplotlib
4
+ pygame