File size: 7,203 Bytes
6f8cb9a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
import asyncio
import re
import time
import aiohttp
from bs4 import BeautifulSoup
from helper.asyncioPoliciesFix import decorator_asyncio_fix
from helper.html_scraper import Scraper
from constants.base_url import YTS
from constants.headers import HEADER_AIO
class Yts:
def __init__(self):
self.BASE_URL = YTS
self.LIMIT = None
@decorator_asyncio_fix
async def _individual_scrap(self, session, url, obj):
try:
async with session.get(url, headers=HEADER_AIO) as res:
html = await res.text(encoding="ISO-8859-1")
soup = BeautifulSoup(html, "html.parser")
try:
name = soup.select_one("div.hidden-xs h1").text
div = soup.select("div.hidden-xs h2")
date = div[0].text
genre = div[1].text.split("/")
rating = soup.select_one("[itemprop=ratingValue]").text
poster = (
soup.find("div", id="movie-poster")
.find("img")["src"]
.split("/")
)
poster[-1] = poster[-1].replace("medium", "large")
poster = "/".join(poster)
description = soup.select("div#synopsis > p")[0].text.strip()
runtime = (
soup.select_one(".tech-spec-info")
.find_all("div", class_="row")[-1]
.find_all("div")[-3]
.text.strip()
)
screenshots = soup.find_all("a", class_="screenshot-group")
screenshots = [a["href"] for a in screenshots]
torrents = []
for div in soup.find_all("div", class_="modal-torrent"):
quality = (
div.find("div", class_="modal-quality").find("span").text
)
all_p = div.find_all("p", class_="quality-size")
quality_type = all_p[0].text
size = all_p[1].text
torrent_link = div.find("a", class_="download-torrent")["href"]
magnet = div.find("a", class_="magnet-download")["href"]
hash = re.search(r"([{a-f\d,A-F\d}]{32,40})\b", magnet).group(0)
torrents.append(
{
"quality": quality,
"type": quality_type,
"size": size,
"torrent": torrent_link,
"magnet": magnet,
"hash": hash,
}
)
obj["name"] = name
obj["date"] = date
obj["genre"] = genre
obj["rating"] = rating
obj["poster"] = poster
obj["description"] = description
obj["runtime"] = runtime
obj["screenshot"] = screenshots
obj["torrents"] = torrents
except:
...
except:
return None
async def _get_torrent(self, result, session, urls):
tasks = []
for idx, url in enumerate(urls):
for obj in result["data"]:
if obj["url"] == url:
task = asyncio.create_task(
self._individual_scrap(session, url, result["data"][idx])
)
tasks.append(task)
await asyncio.gather(*tasks)
return result
def _parser(self, htmls):
try:
for html in htmls:
soup = BeautifulSoup(html, "html.parser")
list_of_urls = []
my_dict = {"data": []}
for div in soup.find_all("div", class_="browse-movie-wrap"):
url = div.find("a")["href"]
list_of_urls.append(url)
my_dict["data"].append({"url": url})
if len(my_dict["data"]) == self.LIMIT:
break
try:
ul = soup.find("ul", class_="tsc_pagination")
current_page = ul.find("a", class_="current").text
my_dict["current_page"] = int(current_page)
if current_page:
total_results = soup.select_one(
"body > div.main-content > div.browse-content > div > h2 > b"
).text
if "," in total_results:
total_results = total_results.replace(",", "")
total_page = int(total_results) / 20
my_dict["total_pages"] = (
int(total_page) + 1
if type(total_page) == float
else int(total_page)
)
except:
...
return my_dict, list_of_urls
except:
return None, None
async def search(self, query, page, limit):
async with aiohttp.ClientSession() as session:
start_time = time.time()
self.LIMIT = limit
if page != 1:
url = (
self.BASE_URL
+ "/browse-movies/{}/all/all/0/latest/0/all?page={}".format(
query, page
)
)
else:
url = self.BASE_URL + "/browse-movies/{}/all/all/0/latest/0/all".format(
query
)
return await self.parser_result(start_time, url, session)
async def parser_result(self, start_time, url, session):
htmls = await Scraper().get_all_results(session, url)
result, urls = self._parser(htmls)
if result is not None:
results = await self._get_torrent(result, session, urls)
results["time"] = time.time() - start_time
results["total"] = len(results["data"])
return results
return result
async def trending(self, category, page, limit):
async with aiohttp.ClientSession() as session:
start_time = time.time()
self.LIMIT = limit
url = self.BASE_URL + "/trending-movies"
return await self.parser_result(start_time, url, session)
async def recent(self, category, page, limit):
async with aiohttp.ClientSession() as session:
start_time = time.time()
self.LIMIT = limit
if page != 1:
url = (
self.BASE_URL
+ "/browse-movies/0/all/all/0/featured/0/all?page={}".format(page)
)
else:
url = self.BASE_URL + "/browse-movies/0/all/all/0/featured/0/all"
return await self.parser_result(start_time, url, session) |