Spaces:
Sleeping
Sleeping
Update app/core/service/playwright/playwright_context.py
Browse files
app/core/service/playwright/playwright_context.py
CHANGED
@@ -10,46 +10,65 @@ from playwright.async_api import ( # noqa: F401
|
|
10 |
async_playwright,
|
11 |
)
|
12 |
|
13 |
-
from .models import GetContentModel, PageModel, ScreenshotModel
|
14 |
|
15 |
if TYPE_CHECKING:
|
16 |
from types import TracebackType
|
17 |
|
18 |
|
19 |
class AsyncPlaywrightContext:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
async def __aenter__(self) -> AsyncContextManager:
|
21 |
-
self.playwright
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
|
|
|
|
31 |
return self
|
32 |
|
33 |
-
async def
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
color_scheme=page_model.color_scheme,
|
36 |
java_script_enabled=page_model.java_script_enabled,
|
37 |
no_viewport=page_model.no_viewport,
|
38 |
proxy=page_model.proxy.model_dump() if page_model.proxy else None,
|
39 |
viewport=page_model.viewport.model_dump() if page_model.viewport else None,
|
40 |
)
|
41 |
-
return
|
42 |
|
43 |
async def screenshot(
|
44 |
self,
|
45 |
screenshot_model: ScreenshotModel,
|
46 |
page_model: PageModel,
|
47 |
) -> bytes:
|
48 |
-
page = await self.new_browser_page(
|
|
|
|
|
|
|
49 |
|
50 |
await page.goto(str(screenshot_model.url))
|
51 |
|
52 |
-
|
53 |
|
54 |
screenshot_locator = (
|
55 |
page.locator(screenshot_model.query_selector)
|
@@ -70,9 +89,13 @@ class AsyncPlaywrightContext:
|
|
70 |
async def get_content(
|
71 |
self,
|
72 |
get_content_model: GetContentModel,
|
|
|
73 |
) -> str:
|
74 |
-
page = await self.new_browser_page(
|
75 |
-
|
|
|
|
|
|
|
76 |
await page.goto(str(get_content_model.url))
|
77 |
await page.wait_for_timeout(get_content_model.ms_delay)
|
78 |
|
@@ -88,14 +111,21 @@ class AsyncPlaywrightContext:
|
|
88 |
await page.close()
|
89 |
return html
|
90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
async def __aexit__(
|
92 |
self,
|
93 |
typ: type[BaseException] | None,
|
94 |
exc: BaseException | None,
|
95 |
tb: TracebackType | None,
|
96 |
) -> None:
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
|
|
10 |
async_playwright,
|
11 |
)
|
12 |
|
13 |
+
from .models import GetContentModel, PageModel, ScreenshotModel
|
14 |
|
15 |
if TYPE_CHECKING:
|
16 |
from types import TracebackType
|
17 |
|
18 |
|
19 |
class AsyncPlaywrightContext:
|
20 |
+
HEADERS = "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36" # noqa: E501
|
21 |
+
|
22 |
+
def __init__(self) -> None:
|
23 |
+
self.playwright = None
|
24 |
+
self.browser = None
|
25 |
+
self.default_context = None
|
26 |
+
|
27 |
async def __aenter__(self) -> AsyncContextManager:
|
28 |
+
if not self.playwright:
|
29 |
+
self.playwright = await async_playwright().start()
|
30 |
+
self.browser = await self.playwright.firefox.launch(
|
31 |
+
firefox_user_prefs={
|
32 |
+
"extensions.enabledScopes": 1,
|
33 |
+
"extensions.autoDisableScopes": 1,
|
34 |
+
"dom.webdriver.enabled": False,
|
35 |
+
"useAutomationExtension": False,
|
36 |
+
"general.useragent.override": self.HEADERS,
|
37 |
+
},
|
38 |
+
)
|
39 |
+
self.context = await self.browser.new_context()
|
40 |
return self
|
41 |
|
42 |
+
async def new_context_page(
|
43 |
+
self,
|
44 |
+
screenshot_model: GetContentModel,
|
45 |
+
browser: Browser,
|
46 |
+
page_model: PageModel) -> Page:
|
47 |
+
if not screenshot_model.new_browser:
|
48 |
+
return await self.default_context.new_page()
|
49 |
+
|
50 |
+
new_context = await browser.new_context(
|
51 |
color_scheme=page_model.color_scheme,
|
52 |
java_script_enabled=page_model.java_script_enabled,
|
53 |
no_viewport=page_model.no_viewport,
|
54 |
proxy=page_model.proxy.model_dump() if page_model.proxy else None,
|
55 |
viewport=page_model.viewport.model_dump() if page_model.viewport else None,
|
56 |
)
|
57 |
+
return await new_context.page()
|
58 |
|
59 |
async def screenshot(
|
60 |
self,
|
61 |
screenshot_model: ScreenshotModel,
|
62 |
page_model: PageModel,
|
63 |
) -> bytes:
|
64 |
+
page = await self.new_browser_page(
|
65 |
+
screenshot_model = screenshot_model,
|
66 |
+
browser=self.browser,
|
67 |
+
page_model=page_model)
|
68 |
|
69 |
await page.goto(str(screenshot_model.url))
|
70 |
|
71 |
+
await page.wait_for_timeout(screenshot_model.ms_delay)
|
72 |
|
73 |
screenshot_locator = (
|
74 |
page.locator(screenshot_model.query_selector)
|
|
|
89 |
async def get_content(
|
90 |
self,
|
91 |
get_content_model: GetContentModel,
|
92 |
+
page_model: PageModel,
|
93 |
) -> str:
|
94 |
+
page = await self.new_browser_page(
|
95 |
+
screenshot_model = GetContentModel,
|
96 |
+
browser=self.browser,
|
97 |
+
page_model=page_model)
|
98 |
+
|
99 |
await page.goto(str(get_content_model.url))
|
100 |
await page.wait_for_timeout(get_content_model.ms_delay)
|
101 |
|
|
|
111 |
await page.close()
|
112 |
return html
|
113 |
|
114 |
+
async def close_instance(self) -> None:
|
115 |
+
if self.playwright:
|
116 |
+
await self.browser.close()
|
117 |
+
await self.playwright.stop()
|
118 |
+
self.browser = None
|
119 |
+
self.playwright = None
|
120 |
+
|
121 |
async def __aexit__(
|
122 |
self,
|
123 |
typ: type[BaseException] | None,
|
124 |
exc: BaseException | None,
|
125 |
tb: TracebackType | None,
|
126 |
) -> None:
|
127 |
+
if self.browser:
|
128 |
+
await self.browser.close()
|
129 |
+
if self.playwright:
|
130 |
+
await self.playwright.stop()
|
131 |
+
|