|
import argparse |
|
import os |
|
import random |
|
|
|
|
|
def selenium(base_url, video_url): |
|
from selenium import webdriver |
|
from selenium.webdriver.common.keys import Keys |
|
from selenium.webdriver.common.by import By |
|
import time |
|
|
|
|
|
options = webdriver.ChromeOptions() |
|
options.add_argument("--disable-blink-features=AutomationControlled") |
|
options.add_argument("start-maximized") |
|
options.add_argument("--headless") |
|
options.add_argument("--no-sandbox") |
|
options.add_argument("--disable-dev-shm-usage") |
|
options.add_argument("--disable-gpu") |
|
options.add_experimental_option("excludeSwitches", ["enable-automation"]) |
|
options.add_experimental_option("useAutomationExtension", False) |
|
|
|
|
|
driver = webdriver.Chrome(options=options) |
|
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})") |
|
|
|
google_username = os.getenv('GOOGLE_USERNAME') |
|
google_password = os.getenv('GOOGLE_PASSWORD') |
|
if google_username and google_password: |
|
|
|
driver.get("https://accounts.google.com/signin") |
|
|
|
|
|
email_field = driver.find_element(By.ID, "identifierId") |
|
email_field.send_keys(google_username) |
|
email_field.send_keys(Keys.RETURN) |
|
time.sleep(random.uniform(2, 5)) |
|
|
|
|
|
password_field = driver.find_element(By.CSS_SELECTOR, "input[type='password']") |
|
password_field.send_keys(google_password) |
|
password_field.send_keys(Keys.RETURN) |
|
time.sleep(random.uniform(2, 5)) |
|
|
|
|
|
driver.get(base_url) |
|
|
|
|
|
search_bar = driver.find_element(By.NAME, "search_query") |
|
search_bar.send_keys(video_url) |
|
search_bar.send_keys(Keys.RETURN) |
|
|
|
|
|
time.sleep(random.uniform(3, 6)) |
|
|
|
|
|
driver.execute_script("window.scrollTo(0, document.body.scrollHeight/3);") |
|
first_video = driver.find_element(By.CSS_SELECTOR, "a#video-title") |
|
first_video.click() |
|
|
|
|
|
time.sleep(random.randint(5, 15)) |
|
|
|
|
|
video_url_new = driver.current_url |
|
print(f"Video URL: {video_url_new}") |
|
|
|
return video_url, driver |
|
|
|
|
|
def download_web_video(video_url, base_url="https://www.youtube.com", output_dir='.'): |
|
video_url, driver = selenium(base_url, video_url) |
|
|
|
|
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
ydl_opts = { |
|
'format': 'mp4', |
|
'outtmpl': os.path.join(output_dir, '%(title)s.%(ext)s'), |
|
'restrictfilenames': True, |
|
} |
|
oauth_refresh_token = os.getenv('OAUTH_REFRESH_TOKEN', '') |
|
if oauth_refresh_token: |
|
ydl_opts.update({'username': 'oauth', |
|
'password': os.getenv('OAUTH_REFRESH_TOKEN', ''), |
|
}) |
|
|
|
import yt_dlp |
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
|
ydl.download([video_url]) |
|
|
|
|
|
driver.quit() |
|
|
|
|
|
def main(): |
|
parser = argparse.ArgumentParser( |
|
description="Download a video from a given URL, e.g. https://www.youtube.com/watch?v=2Njmx-UuU3M") |
|
parser.add_argument("--video_url", type=str, required=True, help="The URL of the actual video to download") |
|
parser.add_argument("--base_url", type=str, required=False, default="https://www.youtube.com", |
|
help="The base website URL that has the video to download, e.g. https://www.youtube.com") |
|
parser.add_argument("--output_dir", type=str, default=".", help="The directory to save the downloaded video") |
|
args = parser.parse_args() |
|
|
|
download_web_video(video_url=args.video_url, base_url=args.base_url, output_dir=args.output_dir) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|