File size: 4,179 Bytes
de68d43
 
 
 
 
 
 
 
 
 
 
 
a9ef651
de68d43
 
 
 
 
 
a9ef651
 
 
 
 
 
 
de68d43
a9ef651
 
 
 
 
 
 
 
 
 
 
 
de68d43
 
a9ef651
 
 
 
 
 
 
 
 
de68d43
 
 
 
 
ed6e2f2
a9ef651
de68d43
 
 
 
 
 
a9ef651
 
 
 
de68d43
 
 
a9ef651
de68d43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9ef651
de68d43
a9ef651
 
 
 
 
de68d43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9ef651
 
 
 
 
 
 
5006d83
 
 
 
 
 
a9ef651
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
from __future__ import annotations

from typing import TYPE_CHECKING, AsyncContextManager

from playwright.async_api import (  # noqa: F401
    Browser,
    BrowserContext,
    Page,
    TimeoutError,
    async_playwright,
)

from .models import GetContentModel, PageModel, ScreenshotModel

if TYPE_CHECKING:
    from types import TracebackType


class AsyncPlaywrightContext:
    HEADERS = "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"  # noqa: E501

    def __init__(self) -> None:
        self.playwright = None
        self.browser = None
        self.default_context = None

    async def __aenter__(self) -> AsyncContextManager:
        if not self.playwright:
            self.playwright = await async_playwright().start()
            self.browser = await self.playwright.firefox.launch(
                firefox_user_prefs={
                    "extensions.enabledScopes": 1,
                    "extensions.autoDisableScopes": 1,
                    "dom.webdriver.enabled": False,
                    "useAutomationExtension": False,
                    "general.useragent.override": self.HEADERS,
                },
            )
            self.context = await self.browser.new_context()
        return self

    async def new_context_page(
        self,
        screenshot_model: GetContentModel,
        browser: Browser,
        page_model: PageModel) -> Page:
        if not screenshot_model.new_browser:
            return await self.default_context.new_page()

        new_context = await browser.new_context(
            color_scheme=page_model.color_scheme,
            java_script_enabled=page_model.java_script_enabled,
            no_viewport=page_model.no_viewport,
            proxy=page_model.proxy.model_dump() if page_model.proxy else None,
            viewport=page_model.viewport.model_dump() if page_model.viewport else None,
        )
        return await new_context.page()

    async def screenshot(
        self,
        screenshot_model: ScreenshotModel,
        page_model: PageModel,
    ) -> bytes:
        page = await self.new_browser_page(
            screenshot_model = screenshot_model,
            browser=self.browser,
            page_model=page_model)

        await page.goto(str(screenshot_model.url))

        await page.wait_for_timeout(screenshot_model.ms_delay)

        screenshot_locator = (
            page.locator(screenshot_model.query_selector)
            if screenshot_model.query_selector
            else None
        )

        if screenshot_locator:
            if screenshot_model.wait_selector:
                await screenshot_locator.wait_for()
            screenshot_data: bytes = await screenshot_locator.screenshot()
        else:
            screenshot_data: bytes = await page.screenshot(full_page=screenshot_model.full_page)

        await page.close()
        return screenshot_data

    async def get_content(
        self,
        get_content_model: GetContentModel,
        page_model: PageModel,
    ) -> str:
        page = await self.new_browser_page(
            screenshot_model = GetContentModel,
            browser=self.browser,
            page_model=page_model)

        await page.goto(str(get_content_model.url))
        await page.wait_for_timeout(get_content_model.ms_delay)

        wait_locator = (
            await page.locator(get_content_model.query_selector)
            if get_content_model.query_selector
            else None
        )

        if wait_locator:
            await wait_locator.wait_for()
        html = page.content()
        await page.close()
        return html

    async def close_instance(self) -> None:
        if self.playwright:
            await self.browser.close()
            await self.playwright.stop()
            self.browser = None
            self.playwright = None

    async def __aexit__(
        self,
        typ: type[BaseException] | None,
        exc: BaseException | None,
        tb: TracebackType | None,
    ) -> None:
        if self.browser:
            await self.browser.close()
        if self.playwright:
            await self.playwright.stop()