MultiTalk-Code / MultiTalk_dataset /download_and_process.py
ameerazam08's picture
Upload folder using huggingface_hub
6931c7b verified
import os
import json
import cv2
import argparse
from pytube import Playlist, YouTube
from pytube.exceptions import VideoUnavailable
import os
import shutil
import subprocess
def downloadYouTube(yt, videourl, path):
video_stream = yt.streams.filter(progressive=False, file_extension='mp4').order_by('resolution').desc().first()
audio_stream = yt.streams.filter(only_audio=True).order_by('abr').desc().first()
if video_stream.fps >= 25:
video_id = videourl.split('=')[-1]
video_path = os.path.join(path, f"{video_id}_video.mp4")
audio_path = os.path.join(path, f"{video_id}_audio.mp4")
final_path = os.path.join(path, f"{video_id}.mp4")
print("Downloading video...")
video_stream.download(filename=video_path)
print("Downloading audio...")
audio_stream.download(filename=audio_path)
print("Merging video and audio...")
subprocess.run([
'ffmpeg', '-i', video_path, '-i', audio_path, '-r', '25',
'-c:v', 'copy', '-c:a', 'aac', '-strict', 'experimental',
final_path, '-y'
])
os.remove(video_path)
os.remove(audio_path)
return True
else:
return False
def process_ffmpeg(raw_vid_path, save_folder, save_vid_name,
bbox, time):
"""
raw_vid_path:
save_folder:
save_vid_name:
bbox: format: top, bottom, left, right. the values are normalized to 0~1
time: begin_sec, end_sec
"""
def secs_to_timestr(secs):
hrs = secs // (60 * 60)
min = (secs - hrs * 3600) // 60
sec = secs % 60
end = (secs - int(secs)) * 100
return "{:02d}:{:02d}:{:02d}.{:02d}".format(int(hrs), int(min),
int(sec), int(end))
def expand(bbox, ratio):
top, bottom = max(bbox[0] - ratio, 0), min(bbox[1] + ratio, 1)
left, right = max(bbox[2] - ratio, 0), min(bbox[3] + ratio, 1)
return top, bottom, left, right
def to_square(bbox):
top, bottom, left, right = bbox
h = bottom - top
w = right - left
c = min(h, w) // 2
c_h = (top + bottom) / 2
c_w = (left + right) / 2
top, bottom = c_h - c, c_h + c
left, right = c_w - c, c_w + c
return top, bottom, left, right
def denorm(bbox, height, width):
top, bottom, left, right = \
round(bbox[0] * height), \
round(bbox[1] * height), \
round(bbox[2] * width), \
round(bbox[3] * width)
return top, bottom, left, right
out_path = os.path.join(save_folder, save_vid_name)
cap = cv2.VideoCapture(raw_vid_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
top, bottom, left, right = to_square(
denorm(expand(bbox, 0.02), height, width))
start_sec, end_sec = time
cmd = f"ffmpeg -i {raw_vid_path} -r 25 -vf crop=w={right-left}:h={bottom-top}:x={left}:y={top},scale=512:512 -ss {start_sec} -to {end_sec} -loglevel error {out_path}"
os.system(cmd)
def load_data(file_path):
with open(file_path) as f:
data_dict = json.load(f)
for key, val in data_dict.items():
save_name = key+".mp4"
ytb_id = val['youtube_id']
time = val['duration']['start_sec'], val['duration']['end_sec']
bbox = [val['bbox']['top'], val['bbox']['bottom'],
val['bbox']['left'], val['bbox']['right']]
language = val['language']
yield ytb_id, save_name, time, bbox, language
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--language', type=str, default="dutch", help='Language')
args = parser.parse_args()
# you can change the root folder
root = './'
processed_vid_root = os.path.join(root, 'multitalk_dataset') # processed video path
raw_vid_root = os.path.join(root, 'raw_video') # downloaded raw video path
os.makedirs(processed_vid_root, exist_ok=True)
os.makedirs(raw_vid_root, exist_ok=True)
json_path = os.path.join('./annotations', f'{args.language}.json') # json file path
for vid_id, save_vid_name, time, bbox, language in load_data(json_path):
processed_vid_dir = os.path.join(processed_vid_root, language)
raw_vid_dir = os.path.join(raw_vid_root, language)
raw_vid_path = os.path.join(raw_vid_dir, vid_id + ".mp4")
os.makedirs(processed_vid_dir, exist_ok=True)
os.makedirs(raw_vid_dir, exist_ok=True)
url = 'https://www.youtube.com/watch?v='+vid_id
success = True
if not os.path.isfile(raw_vid_path) :
while True:
try:
yt = YouTube(url, use_oauth=True)
success = downloadYouTube(yt, url, raw_vid_dir)
break
except:
continue
if success:
process_ffmpeg(raw_vid_path, processed_vid_dir, save_vid_name, bbox, time)
# you can remove this directory after downloading
# shutil.rmtree(raw_vid_root)