Spaces:
Running
Running
import streamlit as st | |
import os | |
import sys | |
import torch | |
import pickle | |
import numpy | |
print(numpy.__version__) | |
import librosa | |
print(librosa.__version__) | |
import numpy | |
print(numpy.__version__) | |
path_to_add = os.path.join(os.path.dirname(__file__), "Wav2Lip") | |
if path_to_add not in sys.path: | |
sys.path.insert(0, path_to_add) | |
from avatar import Avatar | |
options = ['Aude', 'Kyla', 'Liv', 'MC6'] | |
images = ['ref_videos/Aude.png', 'ref_videos/Kyla.png', 'ref_videos/Liv.png', 'ref_videos/MC6.png'] | |
big_text = """ | |
<div style='text-align: center;'> | |
<h1 style='font-size: 30x;'>Text to Speech Synchronized Video</h1> | |
</div> | |
""" | |
# Display the styled text | |
st.markdown(big_text, unsafe_allow_html=True) | |
current_status_placeholder = st.empty() | |
init_progress_bar = st.progress(0) | |
if 'is_initialized' not in st.session_state: | |
st.session_state.avatar = Avatar() | |
st.session_state.avatar.export_video = False | |
current_status_placeholder.write("load model") | |
st.session_state.avatar.load_model("checkpoint/wav2lip_gan.pth") | |
current_status_placeholder.write("load model finished") | |
st.session_state.avatar.device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
print(st.session_state.avatar.device) | |
st.session_state.avatar.output_audio_path = "audio/" | |
st.session_state.avatar.output_audio_filename = "result.wav" | |
st.session_state.avatar.temp_lip_video_no_voice_path = "temp/" | |
st.session_state.avatar.temp_lip_video_no_voice_filename = "result.avi" | |
st.session_state.avatar.output_video_path = "results/" | |
st.session_state.avatar.output_video_name = "result_voice.mp4" | |
st.session_state.selected_option = "Liv" | |
st.session_state.avatar.ref_video_path_and_filename = f"ref_videos/{st.session_state.selected_option}.mp4" | |
st.session_state.avatar.get_video_full_frames(st.session_state.avatar.ref_video_path_and_filename) | |
st.session_state.avatar.face_detect_batch_size = 16 | |
st.session_state.avatar.create_face_detection_results(st.session_state.avatar.video_full_frames, True) | |
current_status_placeholder.write("load face detection result") | |
st.session_state.face_det_results_dict={} | |
for option in options: | |
with open(f'ref_videos/{option}_face_det_result.pkl', 'rb') as file: | |
st.session_state.face_det_results_dict[option] = pickle.load(file) | |
st.session_state.avatar.face_detect_img_results =st.session_state.face_det_results_dict[st.session_state.selected_option] | |
st.session_state.avatar.face_det_results_path_and_name = 'ref_videos/Liv_face_det_result.pkl' | |
st.session_state.avatar.load_face_detection_results() | |
def load_face_detection_results(self): | |
with open(self.face_det_results_path_and_name, 'rb') as file: | |
self.face_detect_img_results = pickle.load(file) | |
input_text = "Hi How are you?" | |
st.session_state.avatar.text_to_lip_video(input_text,init_progress_bar) | |
current_status_placeholder.write("load face detection result done") | |
st.session_state['is_initialized'] = True | |
# Create the radio button group | |
selected_option = st.radio("Choose an option:", options, index=options.index(st.session_state.selected_option)) | |
img_col1, img_col2 = st.columns([1,1]) | |
with img_col1: | |
st.image(images[options.index(selected_option)]) | |
if st.session_state.selected_option != selected_option: | |
print("The selected option has changed!") | |
st.session_state.selected_option = selected_option | |
st.session_state.avatar.ref_video_path_and_filename = f"ref_videos/{st.session_state.selected_option}.mp4" | |
st.session_state.avatar.get_video_full_frames(st.session_state.avatar.ref_video_path_and_filename) | |
st.session_state.avatar.face_detect_img_results =st.session_state.face_det_results_dict[st.session_state.selected_option] | |
from avatar import Avatar | |
# Create a text input box and store the input in a variable | |
user_input = st.text_input("Enter your text:") | |
inference_progress_bar = st.progress(0) | |
if user_input: | |
st.session_state.avatar.dir_clean_up() | |
# Display the entered text | |
st.write("You entered:", user_input) | |
st.session_state.avatar.export_video=True | |
st.session_state.avatar.text_to_lip_video(user_input,inference_progress_bar) | |
col1, col2, col3 = st.columns([1, 4, 1]) | |
# with col1: | |
# st.write("Column 1 content") | |
with col2: | |
st.video(st.session_state.avatar.output_video_path + st.session_state.avatar.output_video_name) | |
# with col3: | |
# st.write("Column 3 content") | |