Spaces:
Sleeping
Sleeping
from __future__ import annotations | |
from typing import AsyncIterator, Awaitable, ClassVar | |
from playwright.async_api import ( # noqa: F401 | |
Browser, | |
BrowserContext, | |
Page, | |
TimeoutError, | |
async_playwright, | |
) | |
from .models import GetContentModel, PageModel, ScreenshotModel # noqa: TCH001 | |
class AsyncMixin: | |
"""Experimental: making awaitable class.""" | |
async def __ainit__(self) -> None: | |
"""Initialize the class.""" | |
def __await__(self) -> AsyncIterator[Awaitable]: | |
"""Make the class awaitable.""" | |
return self.__ainit__().__await__() | |
class PlaywrightInstance(AsyncMixin): | |
"""This class is designed to keep playwright browser instance open for reusability and scalability handling api requests.""" # noqa: E501 | |
HEADERS = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36" # noqa: E501 | |
FIREFOX_USER_PREFS: ClassVar[dict[str, int | str]] = { | |
"extensions.enabledScopes": 1, | |
"extensions.autoDisableScopes": 1, | |
"dom.webdriver.enabled": False, | |
"useAutomationExtension": False, | |
"general.useragent.override": HEADERS, | |
} | |
def __init__(self) -> None: | |
"""Initialize the class.""" | |
self.playwright: async_playwright | None = None | |
self.browser: Browser | None = None | |
async def __ainit__(self) -> PlaywrightInstance: | |
if not self.playwright: | |
self.playwright = await async_playwright().start() | |
self.browser = await self.playwright.firefox.launch( | |
firefox_user_prefs=self.FIREFOX_USER_PREFS, | |
) | |
return self | |
async def new_context_page( | |
self, | |
browser: Browser, | |
screenshot_model: GetContentModel, | |
page_model: PageModel, | |
) -> tuple[BrowserContext | None, Page]: | |
"""create a brwoser or new browser context page. | |
Parameters: | |
browser (Browser): | |
The Playwright Browser instance to create a new context in. | |
screenshot_model (GetContentModel): | |
A pydantic BaseModel instance containing the configuration for the screenshot. | |
page_model (PageModel): | |
A pydantic BaseModel instance containing the configuration for the page. | |
Returns: | |
tuple: BrowserContext and Page | |
""" | |
params = { | |
"color_scheme": page_model.color_scheme, | |
"java_script_enabled": page_model.java_script_enabled, | |
"no_viewport": page_model.no_viewport, | |
"proxy": page_model.proxy.model_dump() if page_model.proxy else None, | |
"viewport": page_model.viewport.model_dump() if page_model.viewport else None, | |
} | |
if not screenshot_model.new_browser: | |
return None, await browser.new_page(**params) | |
new_context = await browser.new_context(**params) | |
return new_context, await new_context.new_page() | |
async def screenshot( | |
self, | |
screenshot_model: ScreenshotModel, | |
page_model: PageModel, | |
) -> bytes: | |
"""Take a screenshot of a webpage url. | |
Parameters: | |
screenshot_model (ScreenshotModel): | |
A pydantic BaseModel instance containing the configuration for the screenshot. | |
page_model (PageModel): | |
A pydantic BaseModel instance containing the configuration for the page. | |
Returns: | |
bytes: The screenshot data in bytes. | |
""" | |
context, page = await self.new_context_page( | |
screenshot_model=screenshot_model, | |
browser=self.browser, | |
page_model=page_model, | |
) | |
await page.goto(str(screenshot_model.url)) | |
await page.wait_for_timeout(screenshot_model.ms_delay) | |
screenshot_locator = ( | |
page.locator(screenshot_model.query_selector) | |
if screenshot_model.query_selector | |
else None | |
) | |
if screenshot_locator: | |
screenshot_data: bytes = await screenshot_locator.screenshot( | |
type=screenshot_model.image_type, | |
) | |
else: | |
screenshot_data: bytes = await page.screenshot( | |
full_page=screenshot_model.full_page, | |
type=screenshot_model.image_type, | |
) | |
await page.close() | |
if context: | |
await context.close() | |
return screenshot_data | |
async def close_instance(self) -> None: | |
"""For manual closing of playwright if needed.""" | |
if self.playwright: | |
await self.browser.close() | |
await self.playwright.stop() | |