SilentWraith commited on
Commit
5bf8ac9
·
verified ·
1 Parent(s): 8beead4

Update app/core/service/playwright/playwright_context.py

Browse files
app/core/service/playwright/playwright_context.py CHANGED
@@ -1,6 +1,6 @@
1
  from __future__ import annotations
2
 
3
- from typing import TYPE_CHECKING, AsyncContextManager
4
 
5
  from playwright.async_api import ( # noqa: F401
6
  Browser,
@@ -10,66 +10,86 @@ from playwright.async_api import ( # noqa: F401
10
  async_playwright,
11
  )
12
 
13
- from .models import GetContentModel, PageModel, ScreenshotModel
14
 
15
- if TYPE_CHECKING:
16
- from types import TracebackType
17
 
 
 
18
 
19
- class AsyncPlaywrightContext:
20
- HEADERS = "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36" # noqa: E501
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  def __init__(self) -> None:
23
- self.playwright = None
24
- self.browser = None
25
- self.default_context = None
26
 
27
- async def __aenter__(self) -> AsyncContextManager:
 
28
  if not self.playwright:
29
  self.playwright = await async_playwright().start()
30
  self.browser = await self.playwright.firefox.launch(
31
- firefox_user_prefs={
32
- "extensions.enabledScopes": 1,
33
- "extensions.autoDisableScopes": 1,
34
- "dom.webdriver.enabled": False,
35
- "useAutomationExtension": False,
36
- "general.useragent.override": self.HEADERS,
37
- },
38
  )
39
- self.default_context = await self.browser.new_context()
40
  return self
41
 
42
  async def new_context_page(
43
  self,
44
- screenshot_model: GetContentModel,
45
  browser: Browser,
 
46
  page_model: PageModel,
47
- ) -> Page:
48
-
 
 
 
 
 
 
 
 
 
 
 
 
49
  params = {
50
- 'color_scheme': page_model.color_scheme,
51
- 'java_script_enabled': page_model.java_script_enabled,
52
- 'no_viewport': page_model.no_viewport,
53
- 'proxy': page_model.proxy.model_dump() if page_model.proxy else None,
54
- 'viewport': page_model.viewport.model_dump() if page_model.viewport else None,
55
  }
56
-
57
  if not screenshot_model.new_browser:
58
- return await self.browser.new_page(**params)
59
-
60
- new_context = await browser.new_context(**params)
61
- return await new_context.new_page()
62
 
 
 
63
 
64
  async def screenshot(
65
  self,
66
  screenshot_model: ScreenshotModel,
67
  page_model: PageModel,
68
  ) -> bytes:
69
- page = await self.new_context_page(
70
- screenshot_model = screenshot_model,
71
- browser=self.browser,
72
- page_model=page_model)
73
 
74
  await page.goto(str(screenshot_model.url))
75
 
@@ -82,55 +102,20 @@ class AsyncPlaywrightContext:
82
  )
83
 
84
  if screenshot_locator:
85
- if screenshot_model.wait_selector:
86
- await screenshot_locator.wait_for()
87
  screenshot_data: bytes = await screenshot_locator.screenshot()
88
  else:
89
  screenshot_data: bytes = await page.screenshot(full_page=screenshot_model.full_page)
90
 
91
  await page.close()
92
- return screenshot_data
93
 
94
- async def get_content(
95
- self,
96
- get_content_model: GetContentModel,
97
- page_model: PageModel,
98
- ) -> str:
99
- page = await self.new_context_page(
100
- screenshot_model = GetContentModel,
101
- browser=self.browser,
102
- page_model=page_model)
103
-
104
- await page.goto(str(get_content_model.url))
105
- await page.wait_for_timeout(get_content_model.ms_delay)
106
-
107
- wait_locator = (
108
- await page.locator(get_content_model.query_selector)
109
- if get_content_model.query_selector
110
- else None
111
- )
112
 
113
- if wait_locator:
114
- await wait_locator.wait_for()
115
- html = page.content()
116
- await page.close()
117
- return html
118
 
119
  async def close_instance(self) -> None:
 
120
  if self.playwright:
121
  await self.browser.close()
122
  await self.playwright.stop()
123
- self.browser = None
124
- self.playwright = None
125
-
126
- async def __aexit__(
127
- self,
128
- typ: type[BaseException] | None,
129
- exc: BaseException | None,
130
- tb: TracebackType | None,
131
- ) -> None:
132
- print('speed test')
133
- # if self.browser:
134
- # await self.browser.close()
135
- # if self.playwright:
136
- # await self.playwright.stop()
 
1
  from __future__ import annotations
2
 
3
+ from typing import AsyncIterator, Awaitable, ClassVar
4
 
5
  from playwright.async_api import ( # noqa: F401
6
  Browser,
 
10
  async_playwright,
11
  )
12
 
13
+ from .models import GetContentModel, PageModel, ScreenshotModel # noqa: TCH001
14
 
 
 
15
 
16
+ class AsyncMixin:
17
+ """experimental: making awaitable class."""
18
 
19
+ async def ainit(self) -> None:
20
+ pass
21
+
22
+ def __await__(self) -> AsyncIterator[Awaitable]:
23
+ return self.ainit().__await__()
24
+
25
+
26
+ class PlaywrightInstance(AsyncMixin):
27
+ """This class is designed to keep playwright browser instance open for reusability and scalability handling api requests.""" # noqa: E501
28
+
29
+ HEADERS: str = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36" # noqa: E501
30
+
31
+ FIREFOX_USER_PREFS: ClassVar[dict[str, int | str]] = {
32
+ "extensions.enabledScopes": 1,
33
+ "extensions.autoDisableScopes": 1,
34
+ "dom.webdriver.enabled": False,
35
+ "useAutomationExtension": False,
36
+ "general.useragent.override": HEADERS,
37
+ }
38
 
39
  def __init__(self) -> None:
40
+ self.playwright: async_playwright = None
41
+ self.browser: Browser = None
 
42
 
43
+ async def ainit(self) -> PlaywrightInstance:
44
+ """starts playwright and browser instance."""
45
  if not self.playwright:
46
  self.playwright = await async_playwright().start()
47
  self.browser = await self.playwright.firefox.launch(
48
+ firefox_user_prefs=self.FIREFOX_USER_PREFS,
 
 
 
 
 
 
49
  )
 
50
  return self
51
 
52
  async def new_context_page(
53
  self,
 
54
  browser: Browser,
55
+ screenshot_model: GetContentModel,
56
  page_model: PageModel,
57
+ ) -> tuple[BrowserContext, Page]:
58
+ """create a brwoser or new browser context page.
59
+
60
+ Parameters:
61
+ browser (Browser):
62
+ The Playwright Browser instance to create a new context in.
63
+ screenshot_model (GetContentModel):
64
+ A pydantic BaseModel instance containing the configuration for the screenshot.
65
+ page_model (PageModel):
66
+ A pydantic BaseModel instance containing the configuration for the page.
67
+
68
+ Returns:
69
+ tuple: BrowserContext and Page
70
+ """
71
  params = {
72
+ "color_scheme": page_model.color_scheme,
73
+ "java_script_enabled": page_model.java_script_enabled,
74
+ "no_viewport": page_model.no_viewport,
75
+ "proxy": page_model.proxy.model_dump() if page_model.proxy else None,
76
+ "viewport": page_model.viewport.model_dump() if page_model.viewport else None,
77
  }
78
+
79
  if not screenshot_model.new_browser:
80
+ return None, await self.browser.new_page(**params)
 
 
 
81
 
82
+ new_context = await browser.new_context(**params)
83
+ return new_context, await new_context.new_page()
84
 
85
  async def screenshot(
86
  self,
87
  screenshot_model: ScreenshotModel,
88
  page_model: PageModel,
89
  ) -> bytes:
90
+ context, page = await self.new_context_page(
91
+ screenshot_model=screenshot_model, browser=self.browser, page_model=page_model,
92
+ )
 
93
 
94
  await page.goto(str(screenshot_model.url))
95
 
 
102
  )
103
 
104
  if screenshot_locator:
 
 
105
  screenshot_data: bytes = await screenshot_locator.screenshot()
106
  else:
107
  screenshot_data: bytes = await page.screenshot(full_page=screenshot_model.full_page)
108
 
109
  await page.close()
 
110
 
111
+ if context:
112
+ await context.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
+ return screenshot_data
 
 
 
 
115
 
116
  async def close_instance(self) -> None:
117
+ """for manual closing of playwright if needed"""
118
  if self.playwright:
119
  await self.browser.close()
120
  await self.playwright.stop()
121
+