text_to_speech_sync_video

Running

App Files Files Community

TDN-M commited on Jan 20

Commit

74076e9

verified ·

1 Parent(s): 4766c2d

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -13

app.py CHANGED Viewed

@@ -5,11 +5,38 @@ import torch
 import pickle
 import numpy
 import librosa
 from avatar import Avatar
 # Cấu hình ban đầu
 options = ['Aude', 'Kyla', 'Liv', 'MC6']
 images = ['ref_videos/Aude.png', 'ref_videos/Kyla.png', 'ref_videos/Liv.png', 'ref_videos/MC6.png']
 # Thêm đường dẫn đến thư mục Wav2Lip
 wav2lip_path = os.path.join(os.path.dirname(__file__), "Wav2Lip")
 if wav2lip_path not in sys.path:
@@ -27,18 +54,20 @@ init_progress_bar = st.progress(0)
 # Khởi tạo session state
 if 'is_initialized' not in st.session_state:
     # Khởi tạo Avatar
     st.session_state.avatar = Avatar()
     st.session_state.avatar.export_video = False
     # Load model
-    current_status_placeholder.write("load model")
     st.session_state.avatar.load_model("checkpoint/wav2lip_gan.pth")
-    current_status_placeholder.write("load model finished")
     # Cấu hình thiết bị
     st.session_state.avatar.device = 'cuda' if torch.cuda.is_available() else 'cpu'
-    print(st.session_state.avatar.device)
     # Cấu hình đường dẫn
     st.session_state.avatar.output_audio_path = "audio/"
@@ -56,15 +85,6 @@ if 'is_initialized' not in st.session_state:
     st.session_state.avatar.get_video_full_frames(st.session_state.avatar.ref_video_path_and_filename)
     st.session_state.avatar.face_detect_batch_size = 16
-    # Tạo và lưu face detection results
-    face_det_results = st.session_state.avatar.create_face_detection_results(
-        st.session_state.avatar.video_full_frames,
-        save_result=False
-    )
-    output_path = f"ref_videos/{st.session_state.selected_option}_face_det_result.pkl"
-    with open(output_path, 'wb') as f:
-        pickle.dump(face_det_results, f)
     # Load face detection results cho tất cả options
     st.session_state.face_det_results_dict = {}
     for option in options:
@@ -77,7 +97,7 @@ if 'is_initialized' not in st.session_state:
     # Xử lý text to speech
     input_text = "Hi How are you?"
     st.session_state.avatar.text_to_lip_video(input_text, init_progress_bar)
-    current_status_placeholder.write("load face detection result done")
     st.session_state['is_initialized'] = True

 import pickle
 import numpy
 import librosa
+import subprocess
 from avatar import Avatar
+def run_pickleface():
+    try:
+        result = subprocess.run(
+            ['python', 'pickleface.py'],
+            check=True,
+            capture_output=True,
+            text=True
+        )
+        print(result.stdout)
+        return True
+    except subprocess.CalledProcessError as e:
+        print(f"Error running pickleface.py: {e.stderr}")
+        return False
+def initialize_face_detection_results():
+    # Kiểm tra xem tất cả file pkl đã tồn tại chưa
+    missing_files = [opt for opt in options if not os.path.exists(f'ref_videos/{opt}_face_det_result.pkl')]
+    if missing_files:
+        current_status_placeholder.write("Creating face detection results...")
+        if not run_pickleface():
+            st.error("Failed to create face detection results")
+            st.stop()
+        current_status_placeholder.write("Face detection results created successfully!")
 # Cấu hình ban đầu
 options = ['Aude', 'Kyla', 'Liv', 'MC6']
 images = ['ref_videos/Aude.png', 'ref_videos/Kyla.png', 'ref_videos/Liv.png', 'ref_videos/MC6.png']
 # Thêm đường dẫn đến thư mục Wav2Lip
 wav2lip_path = os.path.join(os.path.dirname(__file__), "Wav2Lip")
 if wav2lip_path not in sys.path:
 # Khởi tạo session state
 if 'is_initialized' not in st.session_state:
+    initialize_face_detection_results()
     # Khởi tạo Avatar
     st.session_state.avatar = Avatar()
     st.session_state.avatar.export_video = False
     # Load model
+    current_status_placeholder.write("Loading model...")
     st.session_state.avatar.load_model("checkpoint/wav2lip_gan.pth")
+    current_status_placeholder.write("Model loaded successfully")
     # Cấu hình thiết bị
     st.session_state.avatar.device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    print(f"Using device: {st.session_state.avatar.device}")
     # Cấu hình đường dẫn
     st.session_state.avatar.output_audio_path = "audio/"
     st.session_state.avatar.get_video_full_frames(st.session_state.avatar.ref_video_path_and_filename)
     st.session_state.avatar.face_detect_batch_size = 16
     # Load face detection results cho tất cả options
     st.session_state.face_det_results_dict = {}
     for option in options:
     # Xử lý text to speech
     input_text = "Hi How are you?"
     st.session_state.avatar.text_to_lip_video(input_text, init_progress_bar)
+    current_status_placeholder.write("Face detection results loaded")
     st.session_state['is_initialized'] = True