text_to_speech_sync_video

Running

App Files Files Community

zmbfeng commited on Jun 4, 2024

Commit

a8c9931

1 Parent(s): 782884c

multiple ref video selection done

Browse files

Files changed (2) hide show

app.py +33 -5
ref_videos/.gitattributes +1 -0

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import streamlit as st
 import os
 import sys
 import torch
 import numpy
 print(numpy.__version__)
 import librosa
@@ -14,10 +14,12 @@ if path_to_add not in sys.path:
     sys.path.insert(0, path_to_add)
 from avatar import Avatar
 if 'is_initialized' not in st.session_state:
     st.session_state.avatar = Avatar()
     st.session_state.avatar.export_video = False
     st.session_state.avatar.load_model("checkpoint/wav2lip_gan.pth")
     print("load model finished")
     st.session_state.avatar.device = 'cuda' if torch.cuda.is_available() else 'cpu'
@@ -28,19 +30,45 @@ if 'is_initialized' not in st.session_state:
     st.session_state.avatar.temp_lip_video_no_voice_filename = "result.avi"
     st.session_state.avatar.output_video_path = "results/"
     st.session_state.avatar.output_video_name = "result_voice.mp4"
-    st.session_state.avatar.ref_video_path_and_filename = "ref_videos/Liv.mp4"
-    st.session_state.avatar.face_det_results_path_and_name = 'ref_videos/Liv_face_det_result.pkl'
     st.session_state.avatar.get_video_full_frames(st.session_state.avatar.ref_video_path_and_filename)
     st.session_state.avatar.face_detect_batch_size = 16
     # avatar.create_face_detection_results(avatar.video_full_frames,True)
     print("load face detection result")
-    st.session_state.avatar.load_face_detection_results()
     input_text = "Hi How are you?"
     st.session_state.avatar.text_to_lip_video(input_text)
     print("load face detection result done")
     st.session_state['is_initialized'] = True
 from avatar import Avatar
 # Create a text input box and store the input in a variable
 user_input = st.text_input("Enter your text:")

 import os
 import sys
 import torch
+import pickle
 import numpy
 print(numpy.__version__)
 import librosa
     sys.path.insert(0, path_to_add)
 from avatar import Avatar
+options = ['Aude', 'Kyla', 'Liv']
+images = ['ref_videos/Aude.png', 'ref_videos/Kyla.png', 'ref_videos/Liv.png']
 if 'is_initialized' not in st.session_state:
     st.session_state.avatar = Avatar()
     st.session_state.avatar.export_video = False
     st.session_state.avatar.load_model("checkpoint/wav2lip_gan.pth")
     print("load model finished")
     st.session_state.avatar.device = 'cuda' if torch.cuda.is_available() else 'cpu'
     st.session_state.avatar.temp_lip_video_no_voice_filename = "result.avi"
     st.session_state.avatar.output_video_path = "results/"
     st.session_state.avatar.output_video_name = "result_voice.mp4"
+    st.session_state.selected_option = "Liv"
+    st.session_state.avatar.ref_video_path_and_filename = f"ref_videos/{st.session_state.selected_option}.mp4"
     st.session_state.avatar.get_video_full_frames(st.session_state.avatar.ref_video_path_and_filename)
     st.session_state.avatar.face_detect_batch_size = 16
     # avatar.create_face_detection_results(avatar.video_full_frames,True)
     print("load face detection result")
+    st.session_state.face_det_results_dict={}
+    for option in options:
+        with open(f'ref_videos/{option}_face_det_result.pkl', 'rb') as file:
+            st.session_state.face_det_results_dict[option] = pickle.load(file)
+    st.session_state.avatar.face_detect_img_results =st.session_state.face_det_results_dict[st.session_state.selected_option]
+    #st.session_state.avatar.face_det_results_path_and_name = 'ref_videos/Liv_face_det_result.pkl'
+    #st.session_state.avatar.load_face_detection_results()
+    #   def load_face_detection_results(self):
+    #     with open(self.face_det_results_path_and_name, 'rb') as file:
+    #       self.face_detect_img_results = pickle.load(file)
     input_text = "Hi How are you?"
     st.session_state.avatar.text_to_lip_video(input_text)
     print("load face detection result done")
     st.session_state['is_initialized'] = True
+# Create the radio button group
+selected_option = st.radio("Choose an option:", options, index=options.index(st.session_state.selected_option))
+st.image(images[options.index(selected_option)])
+if st.session_state.selected_option != selected_option:
+    print("The selected option has changed!")
+    st.session_state.selected_option = selected_option
+    st.session_state.avatar.ref_video_path_and_filename = f"ref_videos/{st.session_state.selected_option}.mp4"
+    st.session_state.avatar.get_video_full_frames(st.session_state.avatar.ref_video_path_and_filename)
+    st.session_state.avatar.face_detect_img_results =st.session_state.face_det_results_dict[st.session_state.selected_option]
 from avatar import Avatar
 # Create a text input box and store the input in a variable
 user_input = st.text_input("Enter your text:")

ref_videos/.gitattributes CHANGED Viewed

@@ -1,2 +1,3 @@
 *.mp4 filter=lfs diff=lfs merge=lfs -text
 *.pkl filter=lfs diff=lfs merge=lfs -text

 *.mp4 filter=lfs diff=lfs merge=lfs -text
 *.pkl filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text