import logging import platform import sys import time from datetime import timedelta from html import escape from urllib.parse import unquote, quote from func_timeout import FunctionTimedOut, func_timeout from selenium.common import TimeoutException from selenium.webdriver.chrome.webdriver import WebDriver from selenium.webdriver.common.by import By from selenium.webdriver.support.expected_conditions import ( presence_of_element_located, staleness_of, title_is) from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.support.wait import WebDriverWait import utils from dtos import (STATUS_ERROR, STATUS_OK, ChallengeResolutionResultT, ChallengeResolutionT, HealthResponse, IndexResponse, V1RequestBase, V1ResponseBase) from sessions import SessionsStorage ACCESS_DENIED_TITLES = [ # Cloudflare 'Access denied', # Cloudflare http://bitturk.net/ Firefox 'Attention Required! | Cloudflare' ] ACCESS_DENIED_SELECTORS = [ # Cloudflare 'div.cf-error-title span.cf-code-label span', # Cloudflare http://bitturk.net/ Firefox '#cf-error-details div.cf-error-overview h1' ] CHALLENGE_TITLES = [ # Cloudflare 'Just a moment...', # DDoS-GUARD 'DDoS-Guard' ] CHALLENGE_SELECTORS = [ # Cloudflare '#cf-challenge-running', '.ray_id', '.attack-box', '#cf-please-wait', '#challenge-spinner', '#trk_jschal_js', '#turnstile-wrapper', '.lds-ring', # Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands 'td.info #js_info', # Fairlane / pararius.com 'div.vc div.text-box h2' ] SHORT_TIMEOUT = 1 SESSIONS_STORAGE = SessionsStorage() def test_browser_installation(): logging.info("Testing web browser installation...") logging.info("Platform: " + platform.platform()) chrome_exe_path = utils.get_chrome_exe_path() if chrome_exe_path is None: logging.error("Chrome / Chromium web browser not installed!") sys.exit(1) else: logging.info("Chrome / Chromium path: " + chrome_exe_path) chrome_major_version = utils.get_chrome_major_version() if chrome_major_version == '': logging.error("Chrome / Chromium version not detected!") sys.exit(1) else: logging.info("Chrome / Chromium major version: " + chrome_major_version) logging.info("Launching web browser...") user_agent = utils.get_user_agent() logging.info("FlareSolverr User-Agent: " + user_agent) logging.info("Test successful!") def index_endpoint() -> IndexResponse: res = IndexResponse({}) res.msg = "FlareSolverr is ready!" res.version = utils.get_flaresolverr_version() res.userAgent = utils.get_user_agent() return res def health_endpoint() -> HealthResponse: res = HealthResponse({}) res.status = STATUS_OK return res def controller_v1_endpoint(req: V1RequestBase) -> V1ResponseBase: start_ts = int(time.time() * 1000) logging.info(f"Incoming request => POST /v1 body: {utils.object_to_dict(req)}") res: V1ResponseBase try: res = _controller_v1_handler(req) except Exception as e: res = V1ResponseBase({}) res.__error_500__ = True res.status = STATUS_ERROR res.message = "Error: " + str(e) logging.error(res.message) res.startTimestamp = start_ts res.endTimestamp = int(time.time() * 1000) res.version = utils.get_flaresolverr_version() logging.debug(f"Response => POST /v1 body: {utils.object_to_dict(res)}") logging.info(f"Response in {(res.endTimestamp - res.startTimestamp) / 1000} s") return res def _controller_v1_handler(req: V1RequestBase) -> V1ResponseBase: # do some validations if req.cmd is None: raise Exception("Request parameter 'cmd' is mandatory.") if req.headers is not None: logging.warning("Request parameter 'headers' was removed in FlareSolverr v2.") if req.userAgent is not None: logging.warning("Request parameter 'userAgent' was removed in FlareSolverr v2.") # set default values if req.maxTimeout is None or int(req.maxTimeout) < 1: req.maxTimeout = 60000 # execute the command res: V1ResponseBase if req.cmd == 'sessions.create': res = _cmd_sessions_create(req) elif req.cmd == 'sessions.list': res = _cmd_sessions_list(req) elif req.cmd == 'sessions.destroy': res = _cmd_sessions_destroy(req) elif req.cmd == 'request.get': res = _cmd_request_get(req) elif req.cmd == 'request.post': res = _cmd_request_post(req) else: raise Exception(f"Request parameter 'cmd' = '{req.cmd}' is invalid.") return res def _cmd_request_get(req: V1RequestBase) -> V1ResponseBase: # do some validations if req.url is None: raise Exception("Request parameter 'url' is mandatory in 'request.get' command.") if req.postData is not None: raise Exception("Cannot use 'postBody' when sending a GET request.") if req.returnRawHtml is not None: logging.warning("Request parameter 'returnRawHtml' was removed in FlareSolverr v2.") if req.download is not None: logging.warning("Request parameter 'download' was removed in FlareSolverr v2.") challenge_res = _resolve_challenge(req, 'GET') res = V1ResponseBase({}) res.status = challenge_res.status res.message = challenge_res.message res.solution = challenge_res.result return res def _cmd_request_post(req: V1RequestBase) -> V1ResponseBase: # do some validations if req.postData is None: raise Exception("Request parameter 'postData' is mandatory in 'request.post' command.") if req.returnRawHtml is not None: logging.warning("Request parameter 'returnRawHtml' was removed in FlareSolverr v2.") if req.download is not None: logging.warning("Request parameter 'download' was removed in FlareSolverr v2.") challenge_res = _resolve_challenge(req, 'POST') res = V1ResponseBase({}) res.status = challenge_res.status res.message = challenge_res.message res.solution = challenge_res.result return res def _cmd_sessions_create(req: V1RequestBase) -> V1ResponseBase: logging.debug("Creating new session...") session, fresh = SESSIONS_STORAGE.create(session_id=req.session, proxy=req.proxy) session_id = session.session_id if not fresh: return V1ResponseBase({ "status": STATUS_OK, "message": "Session already exists.", "session": session_id }) return V1ResponseBase({ "status": STATUS_OK, "message": "Session created successfully.", "session": session_id }) def _cmd_sessions_list(req: V1RequestBase) -> V1ResponseBase: session_ids = SESSIONS_STORAGE.session_ids() return V1ResponseBase({ "status": STATUS_OK, "message": "", "sessions": session_ids }) def _cmd_sessions_destroy(req: V1RequestBase) -> V1ResponseBase: session_id = req.session existed = SESSIONS_STORAGE.destroy(session_id) if not existed: raise Exception("The session doesn't exist.") return V1ResponseBase({ "status": STATUS_OK, "message": "The session has been removed." }) def _resolve_challenge(req: V1RequestBase, method: str) -> ChallengeResolutionT: timeout = int(req.maxTimeout) / 1000 driver = None try: if req.session: session_id = req.session ttl = timedelta(minutes=req.session_ttl_minutes) if req.session_ttl_minutes else None session, fresh = SESSIONS_STORAGE.get(session_id, ttl) if fresh: logging.debug(f"new session created to perform the request (session_id={session_id})") else: logging.debug(f"existing session is used to perform the request (session_id={session_id}, " f"lifetime={str(session.lifetime())}, ttl={str(ttl)})") driver = session.driver else: driver = utils.get_webdriver(req.proxy) logging.debug('New instance of webdriver has been created to perform the request') return func_timeout(timeout, _evil_logic, (req, driver, method)) except FunctionTimedOut: raise Exception(f'Error solving the challenge. Timeout after {timeout} seconds.') except Exception as e: raise Exception('Error solving the challenge. ' + str(e).replace('\n', '\\n')) finally: if not req.session and driver is not None: if utils.PLATFORM_VERSION == "nt": driver.close() driver.quit() logging.debug('A used instance of webdriver has been destroyed') def click_verify(driver: WebDriver): try: logging.debug("Try to find the Cloudflare verify checkbox...") iframe = driver.find_element(By.XPATH, "//iframe[starts-with(@id, 'cf-chl-widget-')]") driver.switch_to.frame(iframe) checkbox = driver.find_element( by=By.XPATH, value='//*[@id="content"]/div/div/label/input', ) if checkbox: actions = ActionChains(driver) actions.move_to_element_with_offset(checkbox, 5, 7) actions.click(checkbox) actions.perform() logging.debug("Cloudflare verify checkbox found and clicked!") except Exception: logging.debug("Cloudflare verify checkbox not found on the page.") finally: driver.switch_to.default_content() try: logging.debug("Try to find the Cloudflare 'Verify you are human' button...") button = driver.find_element( by=By.XPATH, value="//input[@type='button' and @value='Verify you are human']", ) if button: actions = ActionChains(driver) actions.move_to_element_with_offset(button, 5, 7) actions.click(button) actions.perform() logging.debug("The Cloudflare 'Verify you are human' button found and clicked!") except Exception: logging.debug("The Cloudflare 'Verify you are human' button not found on the page.") time.sleep(2) def get_correct_window(driver: WebDriver) -> WebDriver: if len(driver.window_handles) > 1: for window_handle in driver.window_handles: driver.switch_to.window(window_handle) current_url = driver.current_url if not current_url.startswith("devtools://devtools"): return driver return driver def access_page(driver: WebDriver, url: str) -> None: driver.get(url) driver.start_session() driver.start_session() # required to bypass Cloudflare def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> ChallengeResolutionT: res = ChallengeResolutionT({}) res.status = STATUS_OK res.message = "" # navigate to the page logging.debug(f'Navigating to... {req.url}') if method == 'POST': _post_request(req, driver) else: access_page(driver, req.url) driver = get_correct_window(driver) # set cookies if required if req.cookies is not None and len(req.cookies) > 0: logging.debug(f'Setting cookies...') for cookie in req.cookies: driver.delete_cookie(cookie['name']) driver.add_cookie(cookie) # reload the page if method == 'POST': _post_request(req, driver) else: access_page(driver, req.url) driver = get_correct_window(driver) # wait for the page if utils.get_config_log_html(): logging.debug(f"Response HTML:\n{driver.page_source}") html_element = driver.find_element(By.TAG_NAME, "html") page_title = driver.title # find access denied titles for title in ACCESS_DENIED_TITLES: if title == page_title: raise Exception('Cloudflare has blocked this request. ' 'Probably your IP is banned for this site, check in your web browser.') # find access denied selectors for selector in ACCESS_DENIED_SELECTORS: found_elements = driver.find_elements(By.CSS_SELECTOR, selector) if len(found_elements) > 0: raise Exception('Cloudflare has blocked this request. ' 'Probably your IP is banned for this site, check in your web browser.') # find challenge by title challenge_found = False for title in CHALLENGE_TITLES: if title.lower() == page_title.lower(): challenge_found = True logging.info("Challenge detected. Title found: " + page_title) break if not challenge_found: # find challenge by selectors for selector in CHALLENGE_SELECTORS: found_elements = driver.find_elements(By.CSS_SELECTOR, selector) if len(found_elements) > 0: challenge_found = True logging.info("Challenge detected. Selector found: " + selector) break attempt = 0 if challenge_found: while True: try: attempt = attempt + 1 # wait until the title changes for title in CHALLENGE_TITLES: logging.debug("Waiting for title (attempt " + str(attempt) + "): " + title) WebDriverWait(driver, SHORT_TIMEOUT).until_not(title_is(title)) # then wait until all the selectors disappear for selector in CHALLENGE_SELECTORS: logging.debug("Waiting for selector (attempt " + str(attempt) + "): " + selector) WebDriverWait(driver, SHORT_TIMEOUT).until_not( presence_of_element_located((By.CSS_SELECTOR, selector))) # all elements not found break except TimeoutException: logging.debug("Timeout waiting for selector") click_verify(driver) # update the html (cloudflare reloads the page every 5 s) html_element = driver.find_element(By.TAG_NAME, "html") # waits until cloudflare redirection ends logging.debug("Waiting for redirect") # noinspection PyBroadException try: WebDriverWait(driver, SHORT_TIMEOUT).until(staleness_of(html_element)) except Exception: logging.debug("Timeout waiting for redirect") logging.info("Challenge solved!") res.message = "Challenge solved!" else: logging.info("Challenge not detected!") res.message = "Challenge not detected!" challenge_res = ChallengeResolutionResultT({}) challenge_res.url = driver.current_url challenge_res.status = 200 # todo: fix, selenium not provides this info challenge_res.cookies = driver.get_cookies() challenge_res.userAgent = utils.get_user_agent(driver) if not req.returnOnlyCookies: challenge_res.headers = {} # todo: fix, selenium not provides this info challenge_res.response = driver.page_source res.result = challenge_res return res def _post_request(req: V1RequestBase, driver: WebDriver): post_form = f'
' query_string = req.postData if req.postData[0] != '?' else req.postData[1:] pairs = query_string.split('&') for pair in pairs: parts = pair.split('=') # noinspection PyBroadException try: name = unquote(parts[0]) except Exception: name = parts[0] if name == 'submit': continue # noinspection PyBroadException try: value = unquote(parts[1]) except Exception: value = parts[1] post_form += f'
' post_form += '
' html_content = f""" {post_form} """ driver.get("data:text/html;charset=utf-8,{html_content}".format(html_content=html_content)) driver.start_session() driver.start_session() # required to bypass Cloudflare