ryuzaki-api

Running

App Files Files Community

ryuzaki-api / driver.py

randydev

Update driver.py

633c037 verified 11 months ago

raw

history blame contribute delete

13.1 kB

	import datetime
	import json
	import os
	import random
	import re
	import time
	import urllib.parse
	from urllib.parse import quote_plus

	import httpx
	import requests
	from pytz import country_names, country_timezones, timezone
	from selenium import webdriver
	from selenium.webdriver.chrome.options import Options
	from selenium.webdriver.chrome.service import Service
	from selenium.webdriver.common.by import By
	from selenium.webdriver.support.expected_conditions import presence_of_element_located
	from selenium.webdriver.support.wait import WebDriverWait

	CHROME_DRIVER = "/usr/bin/chromedriver"
	CHROME_BIN = "/usr/bin/google-chrome-stable"
	DWL_DIR = "./downloads/"
	TEMP_DIR = "./temp/"

	class YoutubeDriver:
	def __init__(self, search_terms: str, max_results: int = 5):
	self.base_url = "https://youtube.com/results?search_query={0}"
	self.search_terms = search_terms
	self.max_results = max_results
	self.videos = self._search()

	def _search(self):
	encoded_search = urllib.parse.quote_plus(self.search_terms)
	response = requests.get(self.base_url.format(encoded_search)).text

	while "ytInitialData" not in response:
	response = requests.get(self.base_url.format(encoded_search)).text

	results = self._parse_html(response)

	if self.max_results is not None and len(results) > self.max_results:
	return results[: self.max_results]

	return results

	def _parse_html(self, response: str):
	results = []
	start = response.index("ytInitialData") + len("ytInitialData") + 3
	end = response.index("};", start) + 1
	json_str = response[start:end]
	data = json.loads(json_str)

	videos = data["contents"]["twoColumnSearchResultsRenderer"]["primaryContents"][
	"sectionListRenderer"
	]["contents"][0]["itemSectionRenderer"]["contents"]

	for video in videos:
	res = {}
	if "videoRenderer" in video.keys():
	video_data = video.get("videoRenderer", {})
	_id = video_data.get("videoId", None)

	res["id"] = _id
	res["thumbnail"] = f"https://i.ytimg.com/vi/{_id}/hqdefault.jpg"
	res["title"] = (
	video_data.get("title", {}).get("runs", [[{}]])[0].get("text", None)
	)
	res["channel"] = (
	video_data.get("longBylineText", {})
	.get("runs", [[{}]])[0]
	.get("text", None)
	)
	res["duration"] = video_data.get("lengthText", {}).get("simpleText", 0)
	res["views"] = video_data.get("viewCountText", {}).get(
	"simpleText", "Unknown"
	)
	res["publish_time"] = video_data.get("publishedTimeText", {}).get(
	"simpleText", "Unknown"
	)
	res["url_suffix"] = (
	video_data.get("navigationEndpoint", {})
	.get("commandMetadata", {})
	.get("webCommandMetadata", {})
	.get("url", None)
	)

	results.append(res)
	return results

	def to_dict(self, clear_cache=True) -> list[dict]:
	result = self.videos
	if clear_cache:
	self.videos = []
	return result

	@staticmethod
	def check_url(url: str) -> tuple[bool, str]:
	if "&" in url:
	url = url[: url.index("&")]

	if "?si=" in url:
	url = url[: url.index("?si=")]

	youtube_regex = (
	r"(https?://)?(www\.)?"
	r"(youtube\|youtu\|youtube-nocookie)\.(com\|be)/"
	r'(video\|embed\|shorts/\|watch\?v=\|v/\|e/\|u/\\w+/\|\\w+/)?([^"&?\\s]{11})'
	)
	match = re.match(youtube_regex, url)
	if match:
	return True, match.group(6)
	else:
	return False, "Invalid YouTube URL!"

	@staticmethod
	def song_options() -> dict:
	return {
	"format": "bestaudio",
	"addmetadata": True,
	"key": "FFmpegMetadata",
	"prefer_ffmpeg": True,
	"geo_bypass": True,
	"nocheckcertificate": True,
	"postprocessors": [
	{
	"key": "FFmpegExtractAudio",
	"preferredcodec": "mp3",
	"preferredquality": "480",
	}
	],
	"cookiefile": "cookies.txt",
	"outtmpl": "%(id)s",
	"quiet": True,
	"logtostderr": False,
	}

	@staticmethod
	def video_options() -> dict:
	return {
	"format": "best",
	"addmetadata": True,
	"key": "FFmpegMetadata",
	"prefer_ffmpeg": True,
	"geo_bypass": True,
	"nocheckcertificate": True,
	"postprocessors": [
	{
	"key": "FFmpegVideoConvertor",
	"preferedformat": "mp4",
	}
	],
	"cookiefile": "cookies.txt",
	"outtmpl": "%(id)s.mp4",
	"quiet": True,
	"logtostderr": False,
	}

	class ChromeDriver:
	def __init__(self) -> None:
	self.carbon_theme = [
	"3024-night",
	"a11y-dark",
	"blackboard",
	"base16-dark",
	"base16-light",
	"cobalt",
	"duotone-dark",
	"hopscotch",
	"lucario",
	"material",
	"monokai",
	"night-owl",
	"nord",
	"oceanic-next",
	"one-light",
	"one-dark",
	"panda-syntax",
	"paraiso-dark",
	"seti",
	"shades-of-purple",
	"solarized+dark",
	"solarized+light",
	"synthwave-84",
	"twilight",
	"verminal",
	"vscode",
	"yeti",
	"zenburn",
	]

	def get(self):
	if not CHROME_BIN:
	return (
	None,
	"ChromeBinaryErr: No binary path found! Install Chromium or Google Chrome.",
	)

	try:
	options = Options()
	options.binary_location = CHROME_BIN
	options.add_argument("--disable-dev-shm-usage")
	options.add_argument("--ignore-certificate-errors")
	options.add_argument("--disable-gpu")
	options.add_argument("--headless=new")
	options.add_argument("--test-type")
	options.add_argument("--no-sandbox")
	options.add_argument("--window-size=1920x1080")
	options.add_argument("--enable-logging")
	options.add_argument("--v=1")
	options.add_argument("--remote-debugging-port=9222")
	options.add_experimental_option(
	"prefs", {"download.default_directory": "./"}
	)
	service = Service(CHROME_DRIVER)
	driver = webdriver.Chrome(options, service)
	return driver, None
	except Exception as e:
	return None, f"ChromeDriverErr: {e}"

	def close(self, driver: webdriver.Chrome):
	driver.close()
	driver.quit()

	@property
	def get_random_carbon(self) -> str:
	url = "https://carbon.now.sh/?l=auto"
	url += f"&t={random.choice(self.carbon_theme)}"
	url += f"&bg=rgba%28{random.randint(1, 255)}%2C{random.randint(1, 255)}%2C{random.randint(1, 255)}%2C1%29"
	url += "&code="
	return url

	async def generate_carbon(
	self, driver: webdriver.Chrome, code: str, is_random: bool = False
	) -> str:
	filename = f"{round(time.time())}"
	BASE_URL = (
	self.get_random_carbon
	if is_random
	else "https://carbon.now.sh/?l=auto&code="
	)

	driver.get(BASE_URL + format_text(quote_plus(code)))
	driver.command_executor._commands["send_command"] = (
	"POST",
	"/session/$sessionId/chromium/send_command",
	)
	params = {
	"cmd": "Page.setDownloadBehavior",
	"params": {"behavior": "allow", "downloadPath": DWL_DIR},
	}
	driver.execute("send_command", params)

	driver.find_element(By.XPATH, "//button[@id='export-menu']").click()
	driver.find_element(By.XPATH, "//input[@title='filename']").send_keys(filename)
	driver.find_element(By.XPATH, "//button[@id='export-png']").click()

	return f"{DWL_DIR}/{filename}.png"

	class SCRAP_DATA:
	"""Class to get and handel scrapped data"""

	def __init__(self, urls: list[str] \| str) -> None:
	self.urls = urls
	self.path = "./scrapped/"
	if not os.path.isdir(self.path):
	os.makedirs("./scrapped/")

	def get_images(self) -> list:
	images = []
	if isinstance(self.urls, str):
	requested = requests.get(self.urls)
	try:
	name = self.path + f"img_{time.time()}.jpg"
	with open(name, "wb") as f:
	f.write(requested.content)
	images.append(name)
	except Exception as e:
	requested.close()
	else:
	for i in self.urls:
	if i:
	requested = requests.get(i)
	else:
	continue
	try:
	name = self.path + f"img_{time.time()}.jpg"
	with open(name, "wb") as f:
	f.write(requested.content)
	images.append(name)
	except Exception as e:

	requested.close()
	continue
	return images

	def get_videos(self) -> list:
	videos = []
	if isinstance(self.urls, str):
	if i:
	requested = requests.get(i)
	else:
	return []
	try:
	name = self.path + f"vid_{time.time()}.mp4"
	with open(name, "wb") as f:
	f.write(requested.content)
	videos.append(name)
	except Exception as e:
	requested.close()
	else:
	for i in self.urls:
	if i:
	requested = requests.get(i)
	else:
	continue
	try:
	name = self.path + f"vid_{time.time()}.mp4"
	with open(name, "wb") as f:
	f.write(requested.content)
	videos.append(name)
	except Exception as e:

	requested.close()
	continue
	return videos


	class INSTAGRAM(ChromeDriver):
	"""Class to scrap data from instagram"""

	def __init__(self, url: str) -> None:
	self.url = url
	self.article = "article._aa6a"
	self.ul_class = "_acay"
	self.image_class = "x5yr21d"
	self.video_class = "x1lliihq"
	self.next_button = "button._afxw"
	self.return_dict = {"image": [], "video": []}
	super().__init__()

	def get_all(self):
	driver, error = self.get()
	if not driver:
	return error

	driver.get(self.url)
	wait = WebDriverWait(driver, 30)
	image_links = []
	video_links = []
	try:
	element = wait.until(presence_of_element_located(
	(By.CLASS_NAME, self.ul_class)))

	while True:
	sub_element = element.find_elements(
	By.CLASS_NAME, self.image_class)
	for i in sub_element:
	url = i.get_attribute("src")
	image_links.append(url)

	sub_element = element.find_elements(
	By.CLASS_NAME, self.video_class)
	for i in sub_element:
	url = i.get_attribute("src")
	video_links.append(url)

	try:
	driver.find_element(
	By.CSS_SELECTOR, self.next_button).click()
	except:
	break
	except:
	element = wait.until(presence_of_element_located((By.CSS_SELECTOR, self.article)))
	try:
	sub_element = element.find_element(By.TAG_NAME, "img")
	url = sub_element.get_attribute("src")
	image_links.append(url)
	except:
	sub_element = element.find_element(By.TAG_NAME, "video")
	url = sub_element.get_attribute("src")
	video_links.append(url)

	self.close(driver)
	if image_links:
	image_links = list(set(image_links))
	if video_links:
	video_links = list(set(video_links))
	for i in video_links:
	image_links.remove(i)

	self.return_dict.get("image").extend(image_links)
	self.return_dict.get("video").extend(video_links)
	return self.return_dict


	Driver = ChromeDriver()