zmbfeng commited on
Commit
78953ab
·
1 Parent(s): 41d2d4a

status placeholder and progress bars added

Browse files
Files changed (2) hide show
  1. app.py +10 -6
  2. avatar.py +6 -4
app.py CHANGED
@@ -16,12 +16,14 @@ from avatar import Avatar
16
 
17
  options = ['Aude', 'Kyla', 'Liv']
18
  images = ['ref_videos/Aude.png', 'ref_videos/Kyla.png', 'ref_videos/Liv.png']
 
 
19
  if 'is_initialized' not in st.session_state:
20
  st.session_state.avatar = Avatar()
21
  st.session_state.avatar.export_video = False
22
-
23
  st.session_state.avatar.load_model("checkpoint/wav2lip_gan.pth")
24
- print("load model finished")
25
  st.session_state.avatar.device = 'cuda' if torch.cuda.is_available() else 'cpu'
26
  print(st.session_state.avatar.device)
27
  st.session_state.avatar.output_audio_path = "audio/"
@@ -36,7 +38,7 @@ if 'is_initialized' not in st.session_state:
36
  st.session_state.avatar.get_video_full_frames(st.session_state.avatar.ref_video_path_and_filename)
37
  st.session_state.avatar.face_detect_batch_size = 16
38
  # avatar.create_face_detection_results(avatar.video_full_frames,True)
39
- print("load face detection result")
40
  st.session_state.face_det_results_dict={}
41
  for option in options:
42
  with open(f'ref_videos/{option}_face_det_result.pkl', 'rb') as file:
@@ -49,8 +51,8 @@ if 'is_initialized' not in st.session_state:
49
  # with open(self.face_det_results_path_and_name, 'rb') as file:
50
  # self.face_detect_img_results = pickle.load(file)
51
  input_text = "Hi How are you?"
52
- st.session_state.avatar.text_to_lip_video(input_text)
53
- print("load face detection result done")
54
 
55
  st.session_state['is_initialized'] = True
56
 
@@ -64,6 +66,7 @@ img_col1, img_col2 = st.columns([1,1])
64
  with img_col1:
65
  st.image(images[options.index(selected_option)])
66
 
 
67
  if st.session_state.selected_option != selected_option:
68
  print("The selected option has changed!")
69
  st.session_state.selected_option = selected_option
@@ -75,12 +78,13 @@ if st.session_state.selected_option != selected_option:
75
  from avatar import Avatar
76
  # Create a text input box and store the input in a variable
77
  user_input = st.text_input("Enter your text:")
 
78
  if user_input:
79
  st.session_state.avatar.dir_clean_up()
80
  # Display the entered text
81
  st.write("You entered:", user_input)
82
  st.session_state.avatar.export_video=True
83
- st.session_state.avatar.text_to_lip_video(user_input)
84
  col1, col2, col3 = st.columns([1, 4, 1])
85
 
86
  # with col1:
 
16
 
17
  options = ['Aude', 'Kyla', 'Liv']
18
  images = ['ref_videos/Aude.png', 'ref_videos/Kyla.png', 'ref_videos/Liv.png']
19
+ current_status_placeholder = st.empty()
20
+ init_progress_bar = st.progress(0)
21
  if 'is_initialized' not in st.session_state:
22
  st.session_state.avatar = Avatar()
23
  st.session_state.avatar.export_video = False
24
+ current_status_placeholder.write("load model")
25
  st.session_state.avatar.load_model("checkpoint/wav2lip_gan.pth")
26
+ current_status_placeholder.write("load model finished")
27
  st.session_state.avatar.device = 'cuda' if torch.cuda.is_available() else 'cpu'
28
  print(st.session_state.avatar.device)
29
  st.session_state.avatar.output_audio_path = "audio/"
 
38
  st.session_state.avatar.get_video_full_frames(st.session_state.avatar.ref_video_path_and_filename)
39
  st.session_state.avatar.face_detect_batch_size = 16
40
  # avatar.create_face_detection_results(avatar.video_full_frames,True)
41
+ current_status_placeholder.write("load face detection result")
42
  st.session_state.face_det_results_dict={}
43
  for option in options:
44
  with open(f'ref_videos/{option}_face_det_result.pkl', 'rb') as file:
 
51
  # with open(self.face_det_results_path_and_name, 'rb') as file:
52
  # self.face_detect_img_results = pickle.load(file)
53
  input_text = "Hi How are you?"
54
+ st.session_state.avatar.text_to_lip_video(input_text,init_progress_bar)
55
+ current_status_placeholder.write("load face detection result done")
56
 
57
  st.session_state['is_initialized'] = True
58
 
 
66
  with img_col1:
67
  st.image(images[options.index(selected_option)])
68
 
69
+
70
  if st.session_state.selected_option != selected_option:
71
  print("The selected option has changed!")
72
  st.session_state.selected_option = selected_option
 
78
  from avatar import Avatar
79
  # Create a text input box and store the input in a variable
80
  user_input = st.text_input("Enter your text:")
81
+ inference_progress_bar = st.progress(0)
82
  if user_input:
83
  st.session_state.avatar.dir_clean_up()
84
  # Display the entered text
85
  st.write("You entered:", user_input)
86
  st.session_state.avatar.export_video=True
87
+ st.session_state.avatar.text_to_lip_video(user_input,inference_progress_bar)
88
  col1, col2, col3 = st.columns([1, 4, 1])
89
 
90
  # with col1:
avatar.py CHANGED
@@ -244,9 +244,10 @@ class Avatar:
244
  # print ("face_detect_img_results[1][1] shape = " +str(self.face_detect_img_results[1][0].shape)) #this is cropped image
245
 
246
 
247
- def datagen(self, full_frames, mels, face_detect_results):
248
  img_batch, mel_batch, frame_batch, coords_batch = [], [], [], []
249
  print(len(full_frames))
 
250
  for i, m in enumerate(mels):
251
  idx = i%len(full_frames)
252
  frame_to_save = full_frames[idx].copy()
@@ -271,7 +272,8 @@ class Avatar:
271
  #print(f"len(img_batch)>{self.datagen_batch_size} now len(img_batch)=" + str(len(img_batch)))
272
  yield img_batch, mel_batch, frame_batch, coords_batch
273
  img_batch, mel_batch, frame_batch, coords_batch = [], [], [], []
274
-
 
275
  if len(img_batch) > 0:
276
  img_batch, mel_batch = np.asarray(img_batch), np.asarray(mel_batch)
277
 
@@ -474,7 +476,7 @@ class Avatar:
474
  return out_frame_chunks_from_video,out_faces_from_detect_results,post_frame_chunks_from_video,post_faces_from_detect_results
475
 
476
 
477
- def text_to_lip_video(self, input_text):
478
 
479
  mel_chunks_from_audio, audio_segment =self.create_mel_from_audio(input_text)
480
  print(str(len(self.face_detect_img_results)))
@@ -482,7 +484,7 @@ class Avatar:
482
 
483
  video_full_frames_copy,face_detect_results=self.video_audio_adjust(mel_chunks_from_audio,video_full_frames_copy)
484
 
485
- gen=self.datagen(video_full_frames_copy, mel_chunks_from_audio,face_detect_results)
486
 
487
  if self.export_video:
488
  video_write_handle = cv2.VideoWriter(self.temp_lip_video_no_voice_path+self.temp_lip_video_no_voice_filename,
 
244
  # print ("face_detect_img_results[1][1] shape = " +str(self.face_detect_img_results[1][0].shape)) #this is cropped image
245
 
246
 
247
+ def datagen(self, full_frames, mels, face_detect_results,progress_bar):
248
  img_batch, mel_batch, frame_batch, coords_batch = [], [], [], []
249
  print(len(full_frames))
250
+ progress_bar.progress(0)
251
  for i, m in enumerate(mels):
252
  idx = i%len(full_frames)
253
  frame_to_save = full_frames[idx].copy()
 
272
  #print(f"len(img_batch)>{self.datagen_batch_size} now len(img_batch)=" + str(len(img_batch)))
273
  yield img_batch, mel_batch, frame_batch, coords_batch
274
  img_batch, mel_batch, frame_batch, coords_batch = [], [], [], []
275
+ progress_percentage = (i + 1) / len(mels)
276
+ progress_bar.progress(progress_percentage)
277
  if len(img_batch) > 0:
278
  img_batch, mel_batch = np.asarray(img_batch), np.asarray(mel_batch)
279
 
 
476
  return out_frame_chunks_from_video,out_faces_from_detect_results,post_frame_chunks_from_video,post_faces_from_detect_results
477
 
478
 
479
+ def text_to_lip_video(self, input_text,progress_bar):
480
 
481
  mel_chunks_from_audio, audio_segment =self.create_mel_from_audio(input_text)
482
  print(str(len(self.face_detect_img_results)))
 
484
 
485
  video_full_frames_copy,face_detect_results=self.video_audio_adjust(mel_chunks_from_audio,video_full_frames_copy)
486
 
487
+ gen=self.datagen(video_full_frames_copy, mel_chunks_from_audio,face_detect_results,progress_bar)
488
 
489
  if self.export_video:
490
  video_write_handle = cv2.VideoWriter(self.temp_lip_video_no_voice_path+self.temp_lip_video_no_voice_filename,