Spaces:
Sleeping
Sleeping
File size: 4,693 Bytes
de68d43 f37cf04 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
from __future__ import annotations
from typing import AsyncIterator, Awaitable, ClassVar
from playwright.async_api import ( # noqa: F401
Browser,
BrowserContext,
Page,
TimeoutError,
async_playwright,
)
from .models import GetContentModel, PageModel, ScreenshotModel # noqa: TCH001
class AsyncMixin:
"""Experimental: making awaitable class."""
async def __ainit__(self) -> None:
"""Initialize the class."""
def __await__(self) -> AsyncIterator[Awaitable]:
"""Make the class awaitable."""
return self.__ainit__().__await__()
class PlaywrightInstance(AsyncMixin):
"""This class is designed to keep playwright browser instance open for reusability and scalability handling api requests.""" # noqa: E501
HEADERS = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36" # noqa: E501
FIREFOX_USER_PREFS: ClassVar[dict[str, int | str]] = {
"extensions.enabledScopes": 1,
"extensions.autoDisableScopes": 1,
"dom.webdriver.enabled": False,
"useAutomationExtension": False,
"general.useragent.override": HEADERS,
}
def __init__(self) -> None:
"""Initialize the class."""
self.playwright: async_playwright | None = None
self.browser: Browser | None = None
async def __ainit__(self) -> PlaywrightInstance:
if not self.playwright:
self.playwright = await async_playwright().start()
self.browser = await self.playwright.firefox.launch(
firefox_user_prefs=self.FIREFOX_USER_PREFS,
)
return self
async def new_context_page(
self,
browser: Browser,
screenshot_model: GetContentModel,
page_model: PageModel,
) -> tuple[BrowserContext | None, Page]:
"""create a brwoser or new browser context page.
Parameters:
browser (Browser):
The Playwright Browser instance to create a new context in.
screenshot_model (GetContentModel):
A pydantic BaseModel instance containing the configuration for the screenshot.
page_model (PageModel):
A pydantic BaseModel instance containing the configuration for the page.
Returns:
tuple: BrowserContext and Page
"""
params = {
"color_scheme": page_model.color_scheme,
"java_script_enabled": page_model.java_script_enabled,
"no_viewport": page_model.no_viewport,
"proxy": page_model.proxy.model_dump() if page_model.proxy else None,
"viewport": page_model.viewport.model_dump() if page_model.viewport else None,
}
if not screenshot_model.new_browser:
return None, await browser.new_page(**params)
new_context = await browser.new_context(**params)
return new_context, await new_context.new_page()
async def screenshot(
self,
screenshot_model: ScreenshotModel,
page_model: PageModel,
) -> bytes:
"""Take a screenshot of a webpage url.
Parameters:
screenshot_model (ScreenshotModel):
A pydantic BaseModel instance containing the configuration for the screenshot.
page_model (PageModel):
A pydantic BaseModel instance containing the configuration for the page.
Returns:
bytes: The screenshot data in bytes.
"""
context, page = await self.new_context_page(
screenshot_model=screenshot_model,
browser=self.browser,
page_model=page_model,
)
await page.goto(str(screenshot_model.url))
await page.wait_for_timeout(screenshot_model.ms_delay)
screenshot_locator = (
page.locator(screenshot_model.query_selector)
if screenshot_model.query_selector
else None
)
if screenshot_locator:
screenshot_data: bytes = await screenshot_locator.screenshot(
type=screenshot_model.image_type,
)
else:
screenshot_data: bytes = await page.screenshot(
full_page=screenshot_model.full_page,
type=screenshot_model.image_type,
)
await page.close()
if context:
await context.close()
return screenshot_data
async def close_instance(self) -> None:
"""For manual closing of playwright if needed."""
if self.playwright:
await self.browser.close()
await self.playwright.stop()
|