MultiTalk-Code / MultiTalk_dataset /download_and_process.py
ameerazam08's picture
Upload folder using huggingface_hub
6931c7b verified
raw
history blame
5.13 kB
import os
import json
import cv2
import argparse
from pytube import Playlist, YouTube
from pytube.exceptions import VideoUnavailable
import os
import shutil
import subprocess
def downloadYouTube(yt, videourl, path):
video_stream = yt.streams.filter(progressive=False, file_extension='mp4').order_by('resolution').desc().first()
audio_stream = yt.streams.filter(only_audio=True).order_by('abr').desc().first()
if video_stream.fps >= 25:
video_id = videourl.split('=')[-1]
video_path = os.path.join(path, f"{video_id}_video.mp4")
audio_path = os.path.join(path, f"{video_id}_audio.mp4")
final_path = os.path.join(path, f"{video_id}.mp4")
print("Downloading video...")
video_stream.download(filename=video_path)
print("Downloading audio...")
audio_stream.download(filename=audio_path)
print("Merging video and audio...")
subprocess.run([
'ffmpeg', '-i', video_path, '-i', audio_path, '-r', '25',
'-c:v', 'copy', '-c:a', 'aac', '-strict', 'experimental',
final_path, '-y'
])
os.remove(video_path)
os.remove(audio_path)
return True
else:
return False
def process_ffmpeg(raw_vid_path, save_folder, save_vid_name,
bbox, time):
"""
raw_vid_path:
save_folder:
save_vid_name:
bbox: format: top, bottom, left, right. the values are normalized to 0~1
time: begin_sec, end_sec
"""
def secs_to_timestr(secs):
hrs = secs // (60 * 60)
min = (secs - hrs * 3600) // 60
sec = secs % 60
end = (secs - int(secs)) * 100
return "{:02d}:{:02d}:{:02d}.{:02d}".format(int(hrs), int(min),
int(sec), int(end))
def expand(bbox, ratio):
top, bottom = max(bbox[0] - ratio, 0), min(bbox[1] + ratio, 1)
left, right = max(bbox[2] - ratio, 0), min(bbox[3] + ratio, 1)
return top, bottom, left, right
def to_square(bbox):
top, bottom, left, right = bbox
h = bottom - top
w = right - left
c = min(h, w) // 2
c_h = (top + bottom) / 2
c_w = (left + right) / 2
top, bottom = c_h - c, c_h + c
left, right = c_w - c, c_w + c
return top, bottom, left, right
def denorm(bbox, height, width):
top, bottom, left, right = \
round(bbox[0] * height), \
round(bbox[1] * height), \
round(bbox[2] * width), \
round(bbox[3] * width)
return top, bottom, left, right
out_path = os.path.join(save_folder, save_vid_name)
cap = cv2.VideoCapture(raw_vid_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
top, bottom, left, right = to_square(
denorm(expand(bbox, 0.02), height, width))
start_sec, end_sec = time
cmd = f"ffmpeg -i {raw_vid_path} -r 25 -vf crop=w={right-left}:h={bottom-top}:x={left}:y={top},scale=512:512 -ss {start_sec} -to {end_sec} -loglevel error {out_path}"
os.system(cmd)
def load_data(file_path):
with open(file_path) as f:
data_dict = json.load(f)
for key, val in data_dict.items():
save_name = key+".mp4"
ytb_id = val['youtube_id']
time = val['duration']['start_sec'], val['duration']['end_sec']
bbox = [val['bbox']['top'], val['bbox']['bottom'],
val['bbox']['left'], val['bbox']['right']]
language = val['language']
yield ytb_id, save_name, time, bbox, language
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--language', type=str, default="dutch", help='Language')
args = parser.parse_args()
# you can change the root folder
root = './'
processed_vid_root = os.path.join(root, 'multitalk_dataset') # processed video path
raw_vid_root = os.path.join(root, 'raw_video') # downloaded raw video path
os.makedirs(processed_vid_root, exist_ok=True)
os.makedirs(raw_vid_root, exist_ok=True)
json_path = os.path.join('./annotations', f'{args.language}.json') # json file path
for vid_id, save_vid_name, time, bbox, language in load_data(json_path):
processed_vid_dir = os.path.join(processed_vid_root, language)
raw_vid_dir = os.path.join(raw_vid_root, language)
raw_vid_path = os.path.join(raw_vid_dir, vid_id + ".mp4")
os.makedirs(processed_vid_dir, exist_ok=True)
os.makedirs(raw_vid_dir, exist_ok=True)
url = 'https://www.youtube.com/watch?v='+vid_id
success = True
if not os.path.isfile(raw_vid_path) :
while True:
try:
yt = YouTube(url, use_oauth=True)
success = downloadYouTube(yt, url, raw_vid_dir)
break
except:
continue
if success:
process_ffmpeg(raw_vid_path, processed_vid_dir, save_vid_name, bbox, time)
# you can remove this directory after downloading
# shutil.rmtree(raw_vid_root)