Zaws / app /core /service /playwright /playwright_context.py
SilentWraith's picture
Update app/core/service/playwright/playwright_context.py
a9ef651 verified
raw
history blame
4.18 kB
from __future__ import annotations
from typing import TYPE_CHECKING, AsyncContextManager
from playwright.async_api import ( # noqa: F401
Browser,
BrowserContext,
Page,
TimeoutError,
async_playwright,
)
from .models import GetContentModel, PageModel, ScreenshotModel
if TYPE_CHECKING:
from types import TracebackType
class AsyncPlaywrightContext:
HEADERS = "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36" # noqa: E501
def __init__(self) -> None:
self.playwright = None
self.browser = None
self.default_context = None
async def __aenter__(self) -> AsyncContextManager:
if not self.playwright:
self.playwright = await async_playwright().start()
self.browser = await self.playwright.firefox.launch(
firefox_user_prefs={
"extensions.enabledScopes": 1,
"extensions.autoDisableScopes": 1,
"dom.webdriver.enabled": False,
"useAutomationExtension": False,
"general.useragent.override": self.HEADERS,
},
)
self.context = await self.browser.new_context()
return self
async def new_context_page(
self,
screenshot_model: GetContentModel,
browser: Browser,
page_model: PageModel) -> Page:
if not screenshot_model.new_browser:
return await self.default_context.new_page()
new_context = await browser.new_context(
color_scheme=page_model.color_scheme,
java_script_enabled=page_model.java_script_enabled,
no_viewport=page_model.no_viewport,
proxy=page_model.proxy.model_dump() if page_model.proxy else None,
viewport=page_model.viewport.model_dump() if page_model.viewport else None,
)
return await new_context.page()
async def screenshot(
self,
screenshot_model: ScreenshotModel,
page_model: PageModel,
) -> bytes:
page = await self.new_browser_page(
screenshot_model = screenshot_model,
browser=self.browser,
page_model=page_model)
await page.goto(str(screenshot_model.url))
await page.wait_for_timeout(screenshot_model.ms_delay)
screenshot_locator = (
page.locator(screenshot_model.query_selector)
if screenshot_model.query_selector
else None
)
if screenshot_locator:
if screenshot_model.wait_selector:
await screenshot_locator.wait_for()
screenshot_data: bytes = await screenshot_locator.screenshot()
else:
screenshot_data: bytes = await page.screenshot(full_page=screenshot_model.full_page)
await page.close()
return screenshot_data
async def get_content(
self,
get_content_model: GetContentModel,
page_model: PageModel,
) -> str:
page = await self.new_browser_page(
screenshot_model = GetContentModel,
browser=self.browser,
page_model=page_model)
await page.goto(str(get_content_model.url))
await page.wait_for_timeout(get_content_model.ms_delay)
wait_locator = (
await page.locator(get_content_model.query_selector)
if get_content_model.query_selector
else None
)
if wait_locator:
await wait_locator.wait_for()
html = page.content()
await page.close()
return html
async def close_instance(self) -> None:
if self.playwright:
await self.browser.close()
await self.playwright.stop()
self.browser = None
self.playwright = None
async def __aexit__(
self,
typ: type[BaseException] | None,
exc: BaseException | None,
tb: TracebackType | None,
) -> None:
if self.browser:
await self.browser.close()
if self.playwright:
await self.playwright.stop()