text_to_speech_sync_video

Running

App Files Files Community

TDN-M commited on Jan 20

Commit

939a8b2

verified ·

1 Parent(s): cb87636

Update avatar.py

Browse files

Files changed (1) hide show

avatar.py +54 -37

avatar.py CHANGED Viewed

@@ -182,45 +182,62 @@ class Avatar:
   def create_face_detection_results(self, full_frames, save_result=True):
-    detector = FaceAlignment(LandmarksType.TWO_D, flip_input=False, device=self.device)
-    images = full_frames
-    while 1:
-        predictions = []
-        try:
-            for i in tqdm(range(0, len(images), self.face_detect_batch_size)):
-                batch_images = np.array(images[i:i + self.face_detect_batch_size])
-                batch_images = torch.from_numpy(batch_images).permute(0, 3, 1, 2).float().to(self.device)
-                predictions.extend(detector.face_detector.detect_from_batch(batch_images))
-        except RuntimeError:
-            if self.face_detect_batch_size == 1:
-                raise RuntimeError('Image too big to run face detection on GPU. Please use the --resize_factor argument')
-            self.face_detect_batch_size //= 2
-            print('Recovering from OOM error; New batch size: {}'.format(self.face_detect_batch_size))
-            continue
-        break
-    face_detect_results = []
-    pady1, pady2, padx1, padx2 = [0, 10, 0, 0]
-    for rect, image in zip(predictions, images):
-        # Kiểm tra nếu rect là None hoặc không đủ phần tử
-        if rect is None or len(rect) < 4:
-            # Bỏ qua frame này và tiếp tục với frame tiếp theo
-            face_detect_results.append(None)
-            continue
-        try:
-            y1 = max(0, rect[1] - pady1)
-            y2 = min(image.shape[0], rect[3] + pady2)
-            x1 = max(0, rect[0] - padx1)
-            x2 = min(image.shape[1], rect[2] + padx2)
-            face_detect_results.append([x1, y1, x2, y2])
-        except (IndexError, TypeError) as e:
-            print(f"Error processing face detection result: {e}")
-            face_detect_results.append(None)
-            continue
-    return face_detect_results
     # print("\n")
     # print("face_detect_results length = " + str(len(face_detect_results)))
     # print("face_detect_results[2]="+str(face_detect_results[2]))

   def create_face_detection_results(self, full_frames, save_result=True):
+    try:
+        from face_alignment import FaceAlignment, LandmarksType
+        # Kiểm tra và thiết lập device
+        if not hasattr(self, 'device') or self.device is None:
+            self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        # Khởi tạo detector với cấu hình đơn giản hơn
+        detector = FaceAlignment(
+            LandmarksType.TWO_D,
+            device=self.device,
+            face_detector='sfd'  # Sử dụng SFD thay vì mặc định
+        )
+    images = [frame for frame in full_frames]
+    # Xử lý batch
+        predictions = []
+        while True:
+            try:
+                for i in range(0, len(images), self.face_detect_batch_size):
+                    batch_images = np.array(images[i:i + self.face_detect_batch_size])
+                    batch_images = torch.from_numpy(batch_images).permute(0, 3, 1, 2).float().to(self.device)
+                    predictions.extend(detector.face_detector.detect_from_batch(batch_images))
+                break
+            except RuntimeError:
+                if self.face_detect_batch_size == 1:
+                    raise RuntimeError('Image too big to run face detection on GPU. Please use the --resize_factor argument')
+                self.face_detect_batch_size //= 2
+                print(f'Reducing batch size to {self.face_detect_batch_size} due to OOM error')
+                continue
+        # Xử lý kết quả
+        face_detect_results = []
+        pady1, pady2, padx1, padx2 = [0, 10, 0, 0]
+        for rect, image in zip(predictions, images):
+            if rect is None or len(rect) < 4:
+                face_detect_results.append(None)
+                continue
+            try:
+                y1 = max(0, rect[1] - pady1)
+                y2 = min(image.shape[0], rect[3] + pady2)
+                x1 = max(0, rect[0] - padx1)
+                x2 = min(image.shape[1], rect[2] + padx2)
+                face_detect_results.append([x1, y1, x2, y2])
+            except (IndexError, TypeError) as e:
+                print(f"Error processing face detection result: {e}")
+                face_detect_results.append(None)
+                continue
+        return face_detect_results
+    except Exception as e:
+        print(f"Error in create_face_detection_results: {str(e)}")
+        raise
     # print("\n")
     # print("face_detect_results length = " + str(len(face_detect_results)))
     # print("face_detect_results[2]="+str(face_detect_results[2]))