File size: 4,036 Bytes
3943768
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import argparse
import os
import random


def selenium(base_url, video_url):
    from selenium import webdriver
    from selenium.webdriver.common.keys import Keys
    from selenium.webdriver.common.by import By
    import time

    # Set up Selenium browser (Chrome in this case)
    options = webdriver.ChromeOptions()
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument("start-maximized")
    options.add_argument("--headless")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--disable-gpu")
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option("useAutomationExtension", False)
    # options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")

    driver = webdriver.Chrome(options=options)
    driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")

    google_username = os.getenv('GOOGLE_USERNAME')
    google_password = os.getenv('GOOGLE_PASSWORD')
    if google_username and google_password:
        # Go to Google login page
        driver.get("https://accounts.google.com/signin")

        # Enter email
        email_field = driver.find_element(By.ID, "identifierId")
        email_field.send_keys(google_username)
        email_field.send_keys(Keys.RETURN)
        time.sleep(random.uniform(2, 5))

        # Enter password
        password_field = driver.find_element(By.CSS_SELECTOR, "input[type='password']")
        password_field.send_keys(google_password)
        password_field.send_keys(Keys.RETURN)
        time.sleep(random.uniform(2, 5))

    # Visit site
    driver.get(base_url)

    # Simulate a human-like search
    search_bar = driver.find_element(By.NAME, "search_query")
    search_bar.send_keys(video_url)
    search_bar.send_keys(Keys.RETURN)

    # Wait for the page to load
    time.sleep(random.uniform(3, 6))

    # Click on the first video result
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight/3);")
    first_video = driver.find_element(By.CSS_SELECTOR, "a#video-title")
    first_video.click()

    # Let the video play for a few seconds (mimic human behavior)
    time.sleep(random.randint(5, 15))

    # Get video URL
    video_url_new = driver.current_url
    print(f"Video URL: {video_url_new}")

    return video_url, driver


def download_web_video(video_url, base_url="https://www.youtube.com", output_dir='.'):
    video_url, driver = selenium(base_url, video_url)

    # Ensure the output directory exists
    os.makedirs(output_dir, exist_ok=True)

    ydl_opts = {
        'format': 'mp4',
        'outtmpl': os.path.join(output_dir, '%(title)s.%(ext)s'),
        'restrictfilenames': True,
    }
    oauth_refresh_token = os.getenv('OAUTH_REFRESH_TOKEN', '')
    if oauth_refresh_token:
        ydl_opts.update({'username': 'oauth',
                         'password': os.getenv('OAUTH_REFRESH_TOKEN', ''),
                         })

    import yt_dlp
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([video_url])

    # Close the browser
    driver.quit()


def main():
    parser = argparse.ArgumentParser(
        description="Download a video from a given URL, e.g. https://www.youtube.com/watch?v=2Njmx-UuU3M")
    parser.add_argument("--video_url", type=str, required=True, help="The URL of the actual video to download")
    parser.add_argument("--base_url", type=str, required=False, default="https://www.youtube.com",
                        help="The base website URL that has the video to download, e.g. https://www.youtube.com")
    parser.add_argument("--output_dir", type=str, default=".", help="The directory to save the downloaded video")
    args = parser.parse_args()

    download_web_video(video_url=args.video_url, base_url=args.base_url, output_dir=args.output_dir)


if __name__ == "__main__":
    main()