Spaces:
Running
Running
multiple ref video selection done
Browse files- app.py +33 -5
- ref_videos/.gitattributes +1 -0
app.py
CHANGED
@@ -2,7 +2,7 @@ import streamlit as st
|
|
2 |
import os
|
3 |
import sys
|
4 |
import torch
|
5 |
-
|
6 |
import numpy
|
7 |
print(numpy.__version__)
|
8 |
import librosa
|
@@ -14,10 +14,12 @@ if path_to_add not in sys.path:
|
|
14 |
sys.path.insert(0, path_to_add)
|
15 |
from avatar import Avatar
|
16 |
|
17 |
-
|
|
|
18 |
if 'is_initialized' not in st.session_state:
|
19 |
st.session_state.avatar = Avatar()
|
20 |
st.session_state.avatar.export_video = False
|
|
|
21 |
st.session_state.avatar.load_model("checkpoint/wav2lip_gan.pth")
|
22 |
print("load model finished")
|
23 |
st.session_state.avatar.device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
@@ -28,19 +30,45 @@ if 'is_initialized' not in st.session_state:
|
|
28 |
st.session_state.avatar.temp_lip_video_no_voice_filename = "result.avi"
|
29 |
st.session_state.avatar.output_video_path = "results/"
|
30 |
st.session_state.avatar.output_video_name = "result_voice.mp4"
|
31 |
-
st.session_state.
|
32 |
-
st.session_state.avatar.
|
|
|
33 |
st.session_state.avatar.get_video_full_frames(st.session_state.avatar.ref_video_path_and_filename)
|
34 |
st.session_state.avatar.face_detect_batch_size = 16
|
35 |
# avatar.create_face_detection_results(avatar.video_full_frames,True)
|
36 |
print("load face detection result")
|
37 |
-
st.session_state.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
input_text = "Hi How are you?"
|
39 |
st.session_state.avatar.text_to_lip_video(input_text)
|
40 |
print("load face detection result done")
|
|
|
41 |
st.session_state['is_initialized'] = True
|
42 |
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
from avatar import Avatar
|
45 |
# Create a text input box and store the input in a variable
|
46 |
user_input = st.text_input("Enter your text:")
|
|
|
2 |
import os
|
3 |
import sys
|
4 |
import torch
|
5 |
+
import pickle
|
6 |
import numpy
|
7 |
print(numpy.__version__)
|
8 |
import librosa
|
|
|
14 |
sys.path.insert(0, path_to_add)
|
15 |
from avatar import Avatar
|
16 |
|
17 |
+
options = ['Aude', 'Kyla', 'Liv']
|
18 |
+
images = ['ref_videos/Aude.png', 'ref_videos/Kyla.png', 'ref_videos/Liv.png']
|
19 |
if 'is_initialized' not in st.session_state:
|
20 |
st.session_state.avatar = Avatar()
|
21 |
st.session_state.avatar.export_video = False
|
22 |
+
|
23 |
st.session_state.avatar.load_model("checkpoint/wav2lip_gan.pth")
|
24 |
print("load model finished")
|
25 |
st.session_state.avatar.device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
|
|
30 |
st.session_state.avatar.temp_lip_video_no_voice_filename = "result.avi"
|
31 |
st.session_state.avatar.output_video_path = "results/"
|
32 |
st.session_state.avatar.output_video_name = "result_voice.mp4"
|
33 |
+
st.session_state.selected_option = "Liv"
|
34 |
+
st.session_state.avatar.ref_video_path_and_filename = f"ref_videos/{st.session_state.selected_option}.mp4"
|
35 |
+
|
36 |
st.session_state.avatar.get_video_full_frames(st.session_state.avatar.ref_video_path_and_filename)
|
37 |
st.session_state.avatar.face_detect_batch_size = 16
|
38 |
# avatar.create_face_detection_results(avatar.video_full_frames,True)
|
39 |
print("load face detection result")
|
40 |
+
st.session_state.face_det_results_dict={}
|
41 |
+
for option in options:
|
42 |
+
with open(f'ref_videos/{option}_face_det_result.pkl', 'rb') as file:
|
43 |
+
st.session_state.face_det_results_dict[option] = pickle.load(file)
|
44 |
+
st.session_state.avatar.face_detect_img_results =st.session_state.face_det_results_dict[st.session_state.selected_option]
|
45 |
+
#st.session_state.avatar.face_det_results_path_and_name = 'ref_videos/Liv_face_det_result.pkl'
|
46 |
+
|
47 |
+
#st.session_state.avatar.load_face_detection_results()
|
48 |
+
# def load_face_detection_results(self):
|
49 |
+
# with open(self.face_det_results_path_and_name, 'rb') as file:
|
50 |
+
# self.face_detect_img_results = pickle.load(file)
|
51 |
input_text = "Hi How are you?"
|
52 |
st.session_state.avatar.text_to_lip_video(input_text)
|
53 |
print("load face detection result done")
|
54 |
+
|
55 |
st.session_state['is_initialized'] = True
|
56 |
|
57 |
|
58 |
+
|
59 |
+
|
60 |
+
# Create the radio button group
|
61 |
+
selected_option = st.radio("Choose an option:", options, index=options.index(st.session_state.selected_option))
|
62 |
+
st.image(images[options.index(selected_option)])
|
63 |
+
|
64 |
+
if st.session_state.selected_option != selected_option:
|
65 |
+
print("The selected option has changed!")
|
66 |
+
st.session_state.selected_option = selected_option
|
67 |
+
st.session_state.avatar.ref_video_path_and_filename = f"ref_videos/{st.session_state.selected_option}.mp4"
|
68 |
+
|
69 |
+
st.session_state.avatar.get_video_full_frames(st.session_state.avatar.ref_video_path_and_filename)
|
70 |
+
st.session_state.avatar.face_detect_img_results =st.session_state.face_det_results_dict[st.session_state.selected_option]
|
71 |
+
|
72 |
from avatar import Avatar
|
73 |
# Create a text input box and store the input in a variable
|
74 |
user_input = st.text_input("Enter your text:")
|
ref_videos/.gitattributes
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
*.mp4 filter=lfs diff=lfs merge=lfs -text
|
2 |
*.pkl filter=lfs diff=lfs merge=lfs -text
|
|
|
|
1 |
*.mp4 filter=lfs diff=lfs merge=lfs -text
|
2 |
*.pkl filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|