ryuzaki-api / driver.py
randydev's picture
Update driver.py
633c037 verified
import datetime
import json
import os
import random
import re
import time
import urllib.parse
from urllib.parse import quote_plus
import httpx
import requests
from pytz import country_names, country_timezones, timezone
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.expected_conditions import presence_of_element_located
from selenium.webdriver.support.wait import WebDriverWait
CHROME_DRIVER = "/usr/bin/chromedriver"
CHROME_BIN = "/usr/bin/google-chrome-stable"
DWL_DIR = "./downloads/"
TEMP_DIR = "./temp/"
class YoutubeDriver:
def __init__(self, search_terms: str, max_results: int = 5):
self.base_url = "https://youtube.com/results?search_query={0}"
self.search_terms = search_terms
self.max_results = max_results
self.videos = self._search()
def _search(self):
encoded_search = urllib.parse.quote_plus(self.search_terms)
response = requests.get(self.base_url.format(encoded_search)).text
while "ytInitialData" not in response:
response = requests.get(self.base_url.format(encoded_search)).text
results = self._parse_html(response)
if self.max_results is not None and len(results) > self.max_results:
return results[: self.max_results]
return results
def _parse_html(self, response: str):
results = []
start = response.index("ytInitialData") + len("ytInitialData") + 3
end = response.index("};", start) + 1
json_str = response[start:end]
data = json.loads(json_str)
videos = data["contents"]["twoColumnSearchResultsRenderer"]["primaryContents"][
"sectionListRenderer"
]["contents"][0]["itemSectionRenderer"]["contents"]
for video in videos:
res = {}
if "videoRenderer" in video.keys():
video_data = video.get("videoRenderer", {})
_id = video_data.get("videoId", None)
res["id"] = _id
res["thumbnail"] = f"https://i.ytimg.com/vi/{_id}/hqdefault.jpg"
res["title"] = (
video_data.get("title", {}).get("runs", [[{}]])[0].get("text", None)
)
res["channel"] = (
video_data.get("longBylineText", {})
.get("runs", [[{}]])[0]
.get("text", None)
)
res["duration"] = video_data.get("lengthText", {}).get("simpleText", 0)
res["views"] = video_data.get("viewCountText", {}).get(
"simpleText", "Unknown"
)
res["publish_time"] = video_data.get("publishedTimeText", {}).get(
"simpleText", "Unknown"
)
res["url_suffix"] = (
video_data.get("navigationEndpoint", {})
.get("commandMetadata", {})
.get("webCommandMetadata", {})
.get("url", None)
)
results.append(res)
return results
def to_dict(self, clear_cache=True) -> list[dict]:
result = self.videos
if clear_cache:
self.videos = []
return result
@staticmethod
def check_url(url: str) -> tuple[bool, str]:
if "&" in url:
url = url[: url.index("&")]
if "?si=" in url:
url = url[: url.index("?si=")]
youtube_regex = (
r"(https?://)?(www\.)?"
r"(youtube|youtu|youtube-nocookie)\.(com|be)/"
r'(video|embed|shorts/|watch\?v=|v/|e/|u/\\w+/|\\w+/)?([^"&?\\s]{11})'
)
match = re.match(youtube_regex, url)
if match:
return True, match.group(6)
else:
return False, "Invalid YouTube URL!"
@staticmethod
def song_options() -> dict:
return {
"format": "bestaudio",
"addmetadata": True,
"key": "FFmpegMetadata",
"prefer_ffmpeg": True,
"geo_bypass": True,
"nocheckcertificate": True,
"postprocessors": [
{
"key": "FFmpegExtractAudio",
"preferredcodec": "mp3",
"preferredquality": "480",
}
],
"cookiefile": "cookies.txt",
"outtmpl": "%(id)s",
"quiet": True,
"logtostderr": False,
}
@staticmethod
def video_options() -> dict:
return {
"format": "best",
"addmetadata": True,
"key": "FFmpegMetadata",
"prefer_ffmpeg": True,
"geo_bypass": True,
"nocheckcertificate": True,
"postprocessors": [
{
"key": "FFmpegVideoConvertor",
"preferedformat": "mp4",
}
],
"cookiefile": "cookies.txt",
"outtmpl": "%(id)s.mp4",
"quiet": True,
"logtostderr": False,
}
class ChromeDriver:
def __init__(self) -> None:
self.carbon_theme = [
"3024-night",
"a11y-dark",
"blackboard",
"base16-dark",
"base16-light",
"cobalt",
"duotone-dark",
"hopscotch",
"lucario",
"material",
"monokai",
"night-owl",
"nord",
"oceanic-next",
"one-light",
"one-dark",
"panda-syntax",
"paraiso-dark",
"seti",
"shades-of-purple",
"solarized+dark",
"solarized+light",
"synthwave-84",
"twilight",
"verminal",
"vscode",
"yeti",
"zenburn",
]
def get(self):
if not CHROME_BIN:
return (
None,
"ChromeBinaryErr: No binary path found! Install Chromium or Google Chrome.",
)
try:
options = Options()
options.binary_location = CHROME_BIN
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--ignore-certificate-errors")
options.add_argument("--disable-gpu")
options.add_argument("--headless=new")
options.add_argument("--test-type")
options.add_argument("--no-sandbox")
options.add_argument("--window-size=1920x1080")
options.add_argument("--enable-logging")
options.add_argument("--v=1")
options.add_argument("--remote-debugging-port=9222")
options.add_experimental_option(
"prefs", {"download.default_directory": "./"}
)
service = Service(CHROME_DRIVER)
driver = webdriver.Chrome(options, service)
return driver, None
except Exception as e:
return None, f"ChromeDriverErr: {e}"
def close(self, driver: webdriver.Chrome):
driver.close()
driver.quit()
@property
def get_random_carbon(self) -> str:
url = "https://carbon.now.sh/?l=auto"
url += f"&t={random.choice(self.carbon_theme)}"
url += f"&bg=rgba%28{random.randint(1, 255)}%2C{random.randint(1, 255)}%2C{random.randint(1, 255)}%2C1%29"
url += "&code="
return url
async def generate_carbon(
self, driver: webdriver.Chrome, code: str, is_random: bool = False
) -> str:
filename = f"{round(time.time())}"
BASE_URL = (
self.get_random_carbon
if is_random
else "https://carbon.now.sh/?l=auto&code="
)
driver.get(BASE_URL + format_text(quote_plus(code)))
driver.command_executor._commands["send_command"] = (
"POST",
"/session/$sessionId/chromium/send_command",
)
params = {
"cmd": "Page.setDownloadBehavior",
"params": {"behavior": "allow", "downloadPath": DWL_DIR},
}
driver.execute("send_command", params)
driver.find_element(By.XPATH, "//button[@id='export-menu']").click()
driver.find_element(By.XPATH, "//input[@title='filename']").send_keys(filename)
driver.find_element(By.XPATH, "//button[@id='export-png']").click()
return f"{DWL_DIR}/{filename}.png"
class SCRAP_DATA:
"""Class to get and handel scrapped data"""
def __init__(self, urls: list[str] | str) -> None:
self.urls = urls
self.path = "./scrapped/"
if not os.path.isdir(self.path):
os.makedirs("./scrapped/")
def get_images(self) -> list:
images = []
if isinstance(self.urls, str):
requested = requests.get(self.urls)
try:
name = self.path + f"img_{time.time()}.jpg"
with open(name, "wb") as f:
f.write(requested.content)
images.append(name)
except Exception as e:
requested.close()
else:
for i in self.urls:
if i:
requested = requests.get(i)
else:
continue
try:
name = self.path + f"img_{time.time()}.jpg"
with open(name, "wb") as f:
f.write(requested.content)
images.append(name)
except Exception as e:
requested.close()
continue
return images
def get_videos(self) -> list:
videos = []
if isinstance(self.urls, str):
if i:
requested = requests.get(i)
else:
return []
try:
name = self.path + f"vid_{time.time()}.mp4"
with open(name, "wb") as f:
f.write(requested.content)
videos.append(name)
except Exception as e:
requested.close()
else:
for i in self.urls:
if i:
requested = requests.get(i)
else:
continue
try:
name = self.path + f"vid_{time.time()}.mp4"
with open(name, "wb") as f:
f.write(requested.content)
videos.append(name)
except Exception as e:
requested.close()
continue
return videos
class INSTAGRAM(ChromeDriver):
"""Class to scrap data from instagram"""
def __init__(self, url: str) -> None:
self.url = url
self.article = "article._aa6a"
self.ul_class = "_acay"
self.image_class = "x5yr21d"
self.video_class = "x1lliihq"
self.next_button = "button._afxw"
self.return_dict = {"image": [], "video": []}
super().__init__()
def get_all(self):
driver, error = self.get()
if not driver:
return error
driver.get(self.url)
wait = WebDriverWait(driver, 30)
image_links = []
video_links = []
try:
element = wait.until(presence_of_element_located(
(By.CLASS_NAME, self.ul_class)))
while True:
sub_element = element.find_elements(
By.CLASS_NAME, self.image_class)
for i in sub_element:
url = i.get_attribute("src")
image_links.append(url)
sub_element = element.find_elements(
By.CLASS_NAME, self.video_class)
for i in sub_element:
url = i.get_attribute("src")
video_links.append(url)
try:
driver.find_element(
By.CSS_SELECTOR, self.next_button).click()
except:
break
except:
element = wait.until(presence_of_element_located((By.CSS_SELECTOR, self.article)))
try:
sub_element = element.find_element(By.TAG_NAME, "img")
url = sub_element.get_attribute("src")
image_links.append(url)
except:
sub_element = element.find_element(By.TAG_NAME, "video")
url = sub_element.get_attribute("src")
video_links.append(url)
self.close(driver)
if image_links:
image_links = list(set(image_links))
if video_links:
video_links = list(set(video_links))
for i in video_links:
image_links.remove(i)
self.return_dict.get("image").extend(image_links)
self.return_dict.get("video").extend(video_links)
return self.return_dict
Driver = ChromeDriver()