SilentWraith commited on
Commit
18ade18
·
verified ·
1 Parent(s): c739443

Update app/core/service/playwright/playwright_context.py

Browse files
app/core/service/playwright/playwright_context.py CHANGED
@@ -1,139 +1,88 @@
1
  from __future__ import annotations
2
 
3
- from typing import AsyncIterator, Awaitable, ClassVar
4
-
5
- from playwright.async_api import ( # noqa: F401
6
- Browser,
7
- BrowserContext,
8
- Page,
9
- TimeoutError,
10
- async_playwright,
11
- )
12
-
13
- from .models import GetContentModel, PageModel, ScreenshotModel # noqa: TCH001
14
-
15
-
16
- class AsyncMixin:
17
- """Experimental: making awaitable class."""
18
-
19
- async def __ainit__(self) -> None:
20
- """Initialize the class."""
21
-
22
- def __await__(self) -> AsyncIterator[Awaitable]:
23
- """Make the class awaitable."""
24
- return self.__ainit__().__await__()
25
-
26
-
27
- class PlaywrightInstance(AsyncMixin):
28
- """This class is designed to keep playwright browser instance open for reusability and scalability handling api requests.""" # noqa: E501
29
-
30
- HEADERS: ClassVar[str] = (
31
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36" # noqa: E501
32
- )
33
-
34
- FIREFOX_USER_PREFS: ClassVar[dict[str, int | str]] = {
35
- "extensions.enabledScopes": 1,
36
- "extensions.autoDisableScopes": 1,
37
- "dom.webdriver.enabled": False,
38
- "useAutomationExtension": False,
39
- "general.useragent.override": HEADERS,
40
- }
41
-
42
- def __init__(self) -> None:
43
- """Initialize the class."""
44
- self.playwright: async_playwright | None = None
45
- self.browser: Browser | None = None
46
-
47
- async def __ainit__(self) -> PlaywrightInstance:
48
- if not self.playwright:
49
- self.playwright = await async_playwright().start()
50
- self.browser = await self.playwright.firefox.launch(
51
- firefox_user_prefs=self.FIREFOX_USER_PREFS,
52
- )
53
- return self
54
-
55
- async def new_context_page(
56
- self,
57
- browser: Browser,
58
- screenshot_model: GetContentModel,
59
- page_model: PageModel,
60
- ) -> tuple[BrowserContext | None, Page]:
61
- """create a brwoser or new browser context page.
62
-
63
- Parameters:
64
- browser (Browser):
65
- The Playwright Browser instance to create a new context in.
66
- screenshot_model (GetContentModel):
67
- A pydantic BaseModel instance containing the configuration for the screenshot.
68
- page_model (PageModel):
69
- A pydantic BaseModel instance containing the configuration for the page.
70
-
71
- Returns:
72
- tuple: BrowserContext and Page
73
- """
74
- params = {
75
- "color_scheme": page_model.color_scheme,
76
- "java_script_enabled": page_model.java_script_enabled,
77
- "no_viewport": page_model.no_viewport,
78
- "proxy": page_model.proxy.model_dump() if page_model.proxy else None,
79
- "viewport": page_model.viewport.model_dump() if page_model.viewport else None,
80
- }
81
-
82
- if not screenshot_model.new_browser:
83
- return None, await browser.new_page(**params)
84
-
85
- new_context = await browser.new_context(**params)
86
- return new_context, await new_context.new_page()
87
-
88
- async def screenshot(
89
- self,
90
- screenshot_model: ScreenshotModel,
91
- page_model: PageModel,
92
- ) -> bytes:
93
- """Take a screenshot of a webpage url.
94
-
95
- Parameters:
96
- screenshot_model (ScreenshotModel):
97
- A pydantic BaseModel instance containing the configuration for the screenshot.
98
- page_model (PageModel):
99
- A pydantic BaseModel instance containing the configuration for the page.
100
-
101
- Returns:
102
- bytes: The screenshot data in bytes.
103
- """
104
- context, page = await self.new_context_page(
105
- screenshot_model=screenshot_model,
106
- browser=self.browser,
107
- page_model=page_model,
108
- )
109
-
110
- await page.goto(str(screenshot_model.url))
111
- await page.wait_for_timeout(screenshot_model.ms_delay)
112
- screenshot_locator = (
113
- page.locator(screenshot_model.query_selector)
114
- if screenshot_model.query_selector
115
- else None
116
- )
117
-
118
- if screenshot_locator:
119
- screenshot_data: bytes = await screenshot_locator.screenshot(
120
- type=screenshot_model.image_type
121
- )
122
- else:
123
- screenshot_data: bytes = await page.screenshot(
124
- full_page=screenshot_model.full_page, type=screenshot_model.image_type
125
- )
126
-
127
- await page.close()
128
-
129
- if context:
130
- await context.close()
131
-
132
- return screenshot_data
133
-
134
- async def close_instance(self) -> None:
135
- """For manual closing of playwright if needed."""
136
- if self.playwright:
137
- await self.browser.close()
138
- await self.playwright.stop()
139
-
 
1
  from __future__ import annotations
2
 
3
+ from typing import Literal
4
+
5
+ from pydantic import BaseModel, Field, HttpUrl
6
+
7
+
8
+ class ViewPortModel(BaseModel):
9
+ """Page viewport
10
+
11
+ Attributes:
12
+ width (int):
13
+ viewport width.
14
+
15
+ height (int):
16
+ viewport height.
17
+ """
18
+
19
+ width: int = 1280
20
+ height: int = 720
21
+
22
+
23
+ class PageModel(BaseModel):
24
+ """Page attrs
25
+
26
+ Attributes:
27
+ color_scheme (Literal["light", "dark", "no-preference"] | None):
28
+ Page color.
29
+
30
+ java_script_enabled (bool | None):
31
+ Whether or not to enable JavaScript in the context. Defaults to true.
32
+
33
+ viewport (ViewPortModel | None):
34
+ Sets a consistent viewport for each page. Defaults to an 1280x720 viewport.
35
+
36
+ no_viewport (bool | None):
37
+ Does not enforce fixed viewport, allows resizing window in the headed mode.
38
+
39
+ proxy (dict | None):
40
+ Proxy to be used for all requests. HTTP and SOCKS proxies are supported. Example: proxy={'server': 'http://proxy.example.com:3128'}
41
+ """ # noqa: E501
42
+
43
+ color_scheme: Literal["light", "dark", "no-preference"] | None = "no-preference"
44
+ java_script_enabled: bool | None = True
45
+ viewport: ViewPortModel | None = None
46
+ no_viewport: bool | None = False
47
+ proxy: dict | None = None
48
+
49
+
50
+ class GetContentModel(BaseModel):
51
+ """Webpage to request and parse.
52
+
53
+ Attributes:
54
+ url (HttpUrl):
55
+ Url to request.
56
+
57
+ new_browser (bool | None):
58
+ Whether you want to make a new browser context or not.
59
+
60
+ query_selector (str | None):
61
+ Used to locate a selector.
62
+
63
+ ms_delay (int):
64
+ A delay before performing a task after requesting the url.
65
+ """
66
+
67
+ url: HttpUrl
68
+ new_browser: bool | None = False
69
+ query_selector: str | None = None
70
+ ms_delay: int = Field(default=0.0, ge=0, le=15_000)
71
+
72
+
73
+ class ScreenshotModel(GetContentModel):
74
+ """Screenshot schemas
75
+
76
+ Attributes:
77
+
78
+ full_page (bool | None): Whether you want a full page screenshot or not.
79
+
80
+ image_type (Literal["png", "jpeg"]):
81
+ The image type of screenshot.
82
+ """
83
+
84
+ full_page: bool | None = Field(
85
+ default = False,
86
+ description = "Whether you want a full page screenshot or not.")
87
+ image_type: Literal["png", "jpeg"] = "jpeg"
88
+