Spaces:
Runtime error
Runtime error
File size: 12,477 Bytes
c83dd81 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 |
import sys
from src.utils.img_utils import pil_to_cv2, cv2_to_pil, center_crop_cv2, pils_from_video, save_videos_from_pils, save_video_from_cv2_list
from PIL import Image
import cv2
from IPython import embed
import numpy as np
import copy
from src.utils.motion_utils import motion_sync
import pathlib
import torch
import pickle
from glob import glob
import os
from src.models.dwpose.dwpose_detector import dwpose_detector as dwprocessor
from src.models.dwpose.util import draw_pose
import decord
from tqdm import tqdm
from moviepy.editor import AudioFileClip, VideoFileClip
from multiprocessing.pool import ThreadPool
##################################
base_dir = "root"
tasks = ["emtd"]
process_num = 800 #1266
start = 0
end = process_num + start
#################################
MAX_SIZE = 768
def convert_fps(src_path, tgt_path, tgt_fps=24, tgt_sr=16000):
clip = VideoFileClip(src_path)
new_clip = clip.set_fps(tgt_fps)
if tgt_fps is not None:
audio = new_clip.audio
audio = audio.set_fps(tgt_sr)
new_clip = new_clip.set_audio(audio)
new_clip.write_videofile(tgt_path, codec='libx264', audio_codec='aac')
def get_video_pose(
video_path: str,
sample_stride: int=1,
max_frame=None):
# read input video
vr = decord.VideoReader(video_path, ctx=decord.cpu(0))
sample_stride *= max(1, int(vr.get_avg_fps() / 24))
frames = vr.get_batch(list(range(0, len(vr), sample_stride))).asnumpy()
if max_frame is not None:
frames = frames[0:max_frame,:,:]
height, width, _ = frames[0].shape
# detected_poses = [dwprocessor(frm) for frm in tqdm(frames, desc="DWPose")]
detected_poses = [dwprocessor(frm) for frm in frames]
dwprocessor.release_memory()
return detected_poses, height, width, frames
def resize_and_pad(img, max_size):
img_new = np.zeros((max_size, max_size, 3)).astype('uint8')
imh, imw = img.shape[0], img.shape[1]
half = max_size // 2
if imh > imw:
imh_new = max_size
imw_new = int(round(imw/imh * imh_new))
half_w = imw_new // 2
rb, re = 0, max_size
cb = half-half_w
ce = cb + imw_new
else:
imw_new = max_size
imh_new = int(round(imh/imw * imw_new))
half_h = imh_new // 2
cb, ce = 0, max_size
rb = half-half_h
re = rb + imh_new
img_resize = cv2.resize(img, (imw_new, imh_new))
img_new[rb:re,cb:ce,:] = img_resize
return img_new
def resize_and_pad_param(imh, imw, max_size):
half = max_size // 2
if imh > imw:
imh_new = max_size
imw_new = int(round(imw/imh * imh_new))
half_w = imw_new // 2
rb, re = 0, max_size
cb = half-half_w
ce = cb + imw_new
else:
imw_new = max_size
imh_new = int(round(imh/imw * imw_new))
imh_new = max_size
half_h = imh_new // 2
cb, ce = 0, max_size
rb = half-half_h
re = rb + imh_new
return imh_new, imw_new, rb, re, cb, ce
def get_pose_params(detected_poses, max_size):
print('get_pose_params...')
# pose rescale
w_min_all, w_max_all, h_min_all, h_max_all = [], [], [], []
mid_all = []
for num, detected_pose in enumerate(detected_poses):
detected_poses[num]['num'] = num
candidate_body = detected_pose['bodies']['candidate']
score_body = detected_pose['bodies']['score']
candidate_face = detected_pose['faces']
score_face = detected_pose['faces_score']
candidate_hand = detected_pose['hands']
score_hand = detected_pose['hands_score']
# 选取置信度最高的face
if candidate_face.shape[0] > 1:
index = 0
candidate_face = candidate_face[index]
score_face = score_face[index]
detected_poses[num]['faces'] = candidate_face.reshape(1, candidate_face.shape[0], candidate_face.shape[1])
detected_poses[num]['faces_score'] = score_face.reshape(1, score_face.shape[0])
else:
candidate_face = candidate_face[0]
score_face = score_face[0]
# 选取置信度最高的body
if score_body.shape[0] > 1:
tmp_score = []
for k in range(0, score_body.shape[0]):
tmp_score.append(score_body[k].mean())
index = np.argmax(tmp_score)
candidate_body = candidate_body[index*18:(index+1)*18,:]
score_body = score_body[index]
score_hand = score_hand[(index*2):(index*2+2),:]
candidate_hand = candidate_hand[(index*2):(index*2+2),:,:]
else:
score_body = score_body[0]
all_pose = np.concatenate((candidate_body, candidate_face))
all_score = np.concatenate((score_body, score_face))
all_pose = all_pose[all_score>0.8]
body_pose = np.concatenate((candidate_body,))
mid_ = body_pose[1, 0]
face_pose = candidate_face
hand_pose = candidate_hand
h_min, h_max = np.min(face_pose[:,1]), np.max(body_pose[:7,1])
h_ = h_max - h_min
mid_w = mid_
w_min = mid_w - h_ // 2
w_max = mid_w + h_ // 2
w_min_all.append(w_min)
w_max_all.append(w_max)
h_min_all.append(h_min)
h_max_all.append(h_max)
mid_all.append(mid_w)
w_min = np.min(w_min_all)
w_max = np.max(w_max_all)
h_min = np.min(h_min_all)
h_max = np.max(h_max_all)
mid = np.mean(mid_all)
print(mid)
margin_ratio = 0.25
h_margin = (h_max-h_min)*margin_ratio
h_min = max(h_min-h_margin*0.65, 0)
h_max = min(h_max+h_margin*0.5, 1)
h_new = h_max - h_min
h_min_real = int(h_min*height)
h_max_real = int(h_max*height)
mid_real = int(mid*width)
height_new = h_max_real-h_min_real+1
width_new = height_new
w_min_real = mid_real - height_new // 2
w_max_real = w_min_real + width_new
w_min = w_min_real / width
w_max = w_max_real / width
print(width_new, height_new)
imh_new, imw_new, rb, re, cb, ce = resize_and_pad_param(height_new, width_new, max_size)
res = {'draw_pose_params': [imh_new, imw_new, rb, re, cb, ce],
'pose_params': [w_min, w_max, h_min, h_max],
'video_params': [h_min_real, h_max_real, w_min_real, w_max_real],
}
return res
def save_pose_params_item(input_items):
detected_pose, pose_params, draw_pose_params, save_dir = input_items
w_min, w_max, h_min, h_max = pose_params
num = detected_pose['num']
candidate_body = detected_pose['bodies']['candidate']
candidate_face = detected_pose['faces'][0]
candidate_hand = detected_pose['hands']
candidate_body[:,0] = (candidate_body[:,0]-w_min)/(w_max-w_min)
candidate_body[:,1] = (candidate_body[:,1]-h_min)/(h_max-h_min)
candidate_face[:,0] = (candidate_face[:,0]-w_min)/(w_max-w_min)
candidate_face[:,1] = (candidate_face[:,1]-h_min)/(h_max-h_min)
candidate_hand[:,:,0] = (candidate_hand[:,:,0]-w_min)/(w_max-w_min)
candidate_hand[:,:,1] = (candidate_hand[:,:,1]-h_min)/(h_max-h_min)
detected_pose['bodies']['candidate'] = candidate_body
detected_pose['faces'] = candidate_face.reshape(1, candidate_face.shape[0], candidate_face.shape[1])
detected_pose['hands'] = candidate_hand
detected_pose['draw_pose_params'] = draw_pose_params
np.save(save_dir+'/'+str(num)+'.npy', detected_pose)
def save_pose_params(detected_poses, pose_params, draw_pose_params, ori_video_path):
save_dir = ori_video_path.replace('original_videos', 'image_audio_features/pose/')
if not os.path.exists(save_dir):
os.makedirs(save_dir)
input_list = []
for i, detected_pose in enumerate(detected_poses):
input_list.append([detected_pose, pose_params, draw_pose_params, save_dir])
pool = ThreadPool(8)
pool.map(save_pose_params_item, input_list)
pool.close()
pool.join()
def save_processed_video(ori_frames, video_params, ori_video_path, max_size):
save_path = ori_video_path.replace('original_videos', 'processed/video/')
save_dir = os.path.dirname(save_path)
if not os.path.exists(save_dir):
os.makedirs(save_dir)
h_min_real, h_max_real, w_min_real, w_max_real = video_params
video_frame_crop = []
for img in ori_frames:
img = img[h_min_real:h_max_real,w_min_real:w_max_real,:]
img = resize_and_pad(img, max_size=max_size)
video_frame_crop.append(img)
save_video_from_cv2_list(video_frame_crop, save_path, fps=24.0, rgb2bgr=True)
return video_frame_crop
def save_audio(ori_video_path, sub_task):
save_path = ori_video_path.replace('original_videos', 'processed/audio/')
save_dir = os.path.dirname(save_path)
save_path = save_path + '.wav'
if not os.path.exists(save_dir):
os.makedirs(save_dir)
ori_video_path = ori_video_path.replace(sub_task, sub_task+'_24fps')
audio_clip = AudioFileClip(ori_video_path)
audio_clip.write_audiofile(save_path)
def draw_pose_video(pose_params_path, save_path, max_size, ori_frames=None):
pose_files = os.listdir(pose_params_path)
# 生成Pose图cd pro
output_pose_img = []
for i in range(0, len(pose_files)):
pose_params_path_tmp = pose_params_path + '/' + str(i) + '.npy'
detected_pose = np.load(pose_params_path_tmp, allow_pickle=True).tolist()
imh_new, imw_new, rb, re, cb, ce = detected_pose['draw_pose_params']
im = draw_pose(detected_pose, imh_new, imw_new, ref_w=800)
im = np.transpose(np.array(im),(1,2,0))
img_new = np.zeros((max_size, max_size, 3)).astype('uint8')
img_new[rb:re,cb:ce,:] = im
if ori_frames is not None:
img_new = img_new * 0.6 + ori_frames[i] * 0.4
img_new = img_new.astype('uint8')
output_pose_img.append(img_new)
output_pose_img = np.stack(output_pose_img)
save_video_from_cv2_list(output_pose_img, save_path, fps=24.0, rgb2bgr=True)
print('save to ' + save_path)
visualization = False
for sub_task in tasks:
ori_list = os.listdir(base_dir+sub_task)[start:end]
mp4_list = ori_list
new_dir = base_dir+sub_task+'_24fps'
if not os.path.exists(new_dir):
os.makedirs(new_dir)
index = 1
for i, mp4_file in enumerate(mp4_list):
ori_video_path = base_dir+sub_task+'/'+mp4_file
if ori_video_path[-3:]=='mp4' or ori_video_path[-3:] =='MOV':
try:
# 转换祯率
ori_video_path_new = ori_video_path.replace(sub_task, sub_task+'_24fps')
if '.MOV' in ori_video_path_new:
ori_video_path_new.replace('.MOV', '.mp4')
convert_fps(ori_video_path, ori_video_path_new)
print([index+start, ori_video_path, start, end])
# 提取Pose
detected_poses, height, width, ori_frames = get_video_pose(ori_video_path_new, max_frame=None)
print(height, width)
# 提取相关参数
res_params = get_pose_params(detected_poses, MAX_SIZE)
# 存储Pose参数
save_pose_params(detected_poses, res_params['pose_params'], res_params['draw_pose_params'], ori_video_path)
# 存储截取视频
video_frame_crop = save_processed_video(ori_frames, res_params['video_params'], ori_video_path, MAX_SIZE)
# 存储音频
save_audio(ori_video_path, sub_task)
index += 1
if visualization:
# 绘制pose图
pose_params_path = ori_video_path.replace('original_videos', 'image_audio_features/pose')
save_path = "./vis_pose_results/" + os.path.basename(ori_video_path)
draw_pose_video(pose_params_path, save_path, ori_frames=video_frame_crop)
except:
print(["extract crash!", index+start, ori_video_path, start, end])
continue
print(["All Finished", sub_task, start, end])
|