aiben / openai_server /agent_tools /download_web_video.py
abugaber's picture
Upload folder using huggingface_hub
3943768 verified
raw
history blame
4.04 kB
import argparse
import os
import random
def selenium(base_url, video_url):
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import time
# Set up Selenium browser (Chrome in this case)
options = webdriver.ChromeOptions()
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("start-maximized")
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--disable-gpu")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option("useAutomationExtension", False)
# options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
driver = webdriver.Chrome(options=options)
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
google_username = os.getenv('GOOGLE_USERNAME')
google_password = os.getenv('GOOGLE_PASSWORD')
if google_username and google_password:
# Go to Google login page
driver.get("https://accounts.google.com/signin")
# Enter email
email_field = driver.find_element(By.ID, "identifierId")
email_field.send_keys(google_username)
email_field.send_keys(Keys.RETURN)
time.sleep(random.uniform(2, 5))
# Enter password
password_field = driver.find_element(By.CSS_SELECTOR, "input[type='password']")
password_field.send_keys(google_password)
password_field.send_keys(Keys.RETURN)
time.sleep(random.uniform(2, 5))
# Visit site
driver.get(base_url)
# Simulate a human-like search
search_bar = driver.find_element(By.NAME, "search_query")
search_bar.send_keys(video_url)
search_bar.send_keys(Keys.RETURN)
# Wait for the page to load
time.sleep(random.uniform(3, 6))
# Click on the first video result
driver.execute_script("window.scrollTo(0, document.body.scrollHeight/3);")
first_video = driver.find_element(By.CSS_SELECTOR, "a#video-title")
first_video.click()
# Let the video play for a few seconds (mimic human behavior)
time.sleep(random.randint(5, 15))
# Get video URL
video_url_new = driver.current_url
print(f"Video URL: {video_url_new}")
return video_url, driver
def download_web_video(video_url, base_url="https://www.youtube.com", output_dir='.'):
video_url, driver = selenium(base_url, video_url)
# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)
ydl_opts = {
'format': 'mp4',
'outtmpl': os.path.join(output_dir, '%(title)s.%(ext)s'),
'restrictfilenames': True,
}
oauth_refresh_token = os.getenv('OAUTH_REFRESH_TOKEN', '')
if oauth_refresh_token:
ydl_opts.update({'username': 'oauth',
'password': os.getenv('OAUTH_REFRESH_TOKEN', ''),
})
import yt_dlp
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([video_url])
# Close the browser
driver.quit()
def main():
parser = argparse.ArgumentParser(
description="Download a video from a given URL, e.g. https://www.youtube.com/watch?v=2Njmx-UuU3M")
parser.add_argument("--video_url", type=str, required=True, help="The URL of the actual video to download")
parser.add_argument("--base_url", type=str, required=False, default="https://www.youtube.com",
help="The base website URL that has the video to download, e.g. https://www.youtube.com")
parser.add_argument("--output_dir", type=str, default=".", help="The directory to save the downloaded video")
args = parser.parse_args()
download_web_video(video_url=args.video_url, base_url=args.base_url, output_dir=args.output_dir)
if __name__ == "__main__":
main()