File size: 4,693 Bytes
de68d43
 
f37cf04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
from __future__ import annotations

from typing import AsyncIterator, Awaitable, ClassVar

from playwright.async_api import (  # noqa: F401
    Browser,
    BrowserContext,
    Page,
    TimeoutError,
    async_playwright,
)

from .models import GetContentModel, PageModel, ScreenshotModel  # noqa: TCH001


class AsyncMixin:
    """Experimental: making awaitable class."""

    async def __ainit__(self) -> None:
        """Initialize the class."""

    def __await__(self) -> AsyncIterator[Awaitable]:
        """Make the class awaitable."""
        return self.__ainit__().__await__()


class PlaywrightInstance(AsyncMixin):
    """This class is designed to keep playwright browser instance open for reusability and scalability handling api requests."""  # noqa: E501

    HEADERS = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"  # noqa: E501

    FIREFOX_USER_PREFS: ClassVar[dict[str, int | str]] = {
        "extensions.enabledScopes": 1,
        "extensions.autoDisableScopes": 1,
        "dom.webdriver.enabled": False,
        "useAutomationExtension": False,
        "general.useragent.override": HEADERS,
    }

    def __init__(self) -> None:
        """Initialize the class."""
        self.playwright: async_playwright | None = None
        self.browser: Browser | None = None

    async def __ainit__(self) -> PlaywrightInstance:
        if not self.playwright:
            self.playwright = await async_playwright().start()
            self.browser = await self.playwright.firefox.launch(
                firefox_user_prefs=self.FIREFOX_USER_PREFS,
            )
        return self

    async def new_context_page(
        self,
        browser: Browser,
        screenshot_model: GetContentModel,
        page_model: PageModel,
    ) -> tuple[BrowserContext | None, Page]:
        """create a brwoser or new browser context page.

        Parameters:
            browser (Browser):
                The Playwright Browser instance to create a new context in.
            screenshot_model (GetContentModel):
                A pydantic BaseModel instance containing the configuration for the screenshot.
            page_model (PageModel):
                A pydantic BaseModel instance containing the configuration for the page.

        Returns:
            tuple: BrowserContext and Page
        """
        params = {
            "color_scheme": page_model.color_scheme,
            "java_script_enabled": page_model.java_script_enabled,
            "no_viewport": page_model.no_viewport,
            "proxy": page_model.proxy.model_dump() if page_model.proxy else None,
            "viewport": page_model.viewport.model_dump() if page_model.viewport else None,
        }

        if not screenshot_model.new_browser:
            return None, await browser.new_page(**params)

        new_context = await browser.new_context(**params)
        return new_context, await new_context.new_page()

    async def screenshot(
        self,
        screenshot_model: ScreenshotModel,
        page_model: PageModel,
    ) -> bytes:
        """Take a screenshot of a webpage url.

        Parameters:
            screenshot_model (ScreenshotModel):
                A pydantic BaseModel instance containing the configuration for the screenshot.
            page_model (PageModel):
                A pydantic BaseModel instance containing the configuration for the page.

        Returns:
            bytes: The screenshot data in bytes.
        """
        context, page = await self.new_context_page(
            screenshot_model=screenshot_model,
            browser=self.browser,
            page_model=page_model,
        )

        await page.goto(str(screenshot_model.url))
        await page.wait_for_timeout(screenshot_model.ms_delay)

        screenshot_locator = (
            page.locator(screenshot_model.query_selector)
            if screenshot_model.query_selector
            else None
        )

        if screenshot_locator:
            screenshot_data: bytes = await screenshot_locator.screenshot(
                type=screenshot_model.image_type,
            )
        else:
            screenshot_data: bytes = await page.screenshot(
                full_page=screenshot_model.full_page,
                type=screenshot_model.image_type,
            )

        await page.close()

        if context:
            await context.close()

        return screenshot_data

    async def close_instance(self) -> None:
        """For manual closing of playwright if needed."""
        if self.playwright:
            await self.browser.close()
            await self.playwright.stop()