zhou12189108 commited on
Commit
1488b3f
·
verified ·
1 Parent(s): b048c19

Upload hcaptcha_solver.py

Browse files
Files changed (1) hide show
  1. hcaptcha_solver.py +137 -89
hcaptcha_solver.py CHANGED
@@ -1,8 +1,13 @@
1
- from __future__ import annotations
2
- from loguru import logger
3
- from playwright.async_api import BrowserContext as ASyncContext, async_playwright, Page
4
- from hcaptcha_challenger import ModelHub, install
5
- from hcaptcha_challenger.agents import AgentT, Malenia
 
 
 
 
 
6
 
7
 
8
  async def route_continuation(route, request, host, sitekey):
@@ -17,6 +22,7 @@ async def route_continuation(route, request, host, sitekey):
17
  <html lang="en">
18
  <head>
19
  <title>hCAPTCHA 演示</title>
 
20
  <meta http-equiv="X-UA-Compatible" content="IE=edge">
21
  <meta name="viewport" content="width=device-width, user-scalable=yes">
22
  <script src="https://js.hcaptcha.com/1/api.js" type="text/javascript" async defer></script>
@@ -72,96 +78,121 @@ async def route_continuation(route, request, host, sitekey):
72
  await route.continue_()
73
 
74
 
75
- def patch_modelhub(modelhub: ModelHub):
76
- """
77
- 1. Patching clip_candidates allows you to handle all image classification tasks in self-supervised mode.
78
-
79
- 2. You need to inject hints for all categories that appear in a batch of images
80
-
81
- 3. The ObjectsYaml in the GitHub repository are updated regularly,
82
- but if you find something new, you can imitate the following and patch some hints.
83
-
84
- 4. Note that this should be a regularly changing table.
85
- If after a while certain labels no longer appear, you should not fill them in clip_candidates
86
-
87
- 5. Please note that you only need a moderate number of candidates prompts,
88
- too many prompts will increase the computational complexity
89
- :param modelhub:
90
- :return:
91
- """
92
-
93
- modelhub.clip_candidates.update(
94
- {
95
- "the largest animal in real life": [
96
- "parrot",
97
- "bee",
98
- "ladybug",
99
- "frog",
100
- "crab",
101
- "bat",
102
- "butterfly",
103
- "dragonfly",
104
- "giraffe",
105
- "tiger",
106
- "owl",
107
- "duck"
108
- ]
109
- }
110
- )
111
-
112
-
113
- def prelude(page: Page) -> AgentT:
114
- # 1. You need to deploy sub-thread tasks and actively run `install(upgrade=True)` every 20 minutes
115
- # 2. You need to make sure to run `install(upgrade=True, clip=True)` before each instantiation
116
- install(upgrade=True, clip=True)
117
-
118
- modelhub = ModelHub.from_github_repo()
119
- modelhub.parse_objects()
120
-
121
- # Make arbitrary pre-modifications to modelhub, which is very useful for CLIP models
122
- patch_modelhub(modelhub)
123
-
124
- agent = AgentT.from_page(
125
- # page, the control handle of the Playwright Page
126
- page=page,
127
- # modelhub, Register modelhub externally, and the agent can patch custom configurations
128
- modelhub=modelhub,
129
- tmp_dir="tmp_dir",
130
- # clip, Enable CLIP zero-shot image classification method
131
- clip=True,
132
  )
133
-
134
- return agent
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
 
137
  async def hit_challenge(context: ASyncContext, host, sitekey, times: int = 8):
138
  await context.route('**/*', lambda route, request: route_continuation(route, request, host, sitekey))
139
  page = await context.new_page()
140
-
141
- agent = prelude(page)
142
  await page.goto(f"https://{host}")
143
- logger.info("startup sitelink", url=f"https://{host}")
144
-
145
- await agent.handle_checkbox()
146
-
147
- for pth in range(1, times):
148
- # Handle challenge
149
- result = await agent.execute()
150
- if not agent.qr:
151
- return
152
-
153
- # Post-processing
154
- match result:
155
- case agent.status.CHALLENGE_BACKCALL | agent.status.CHALLENGE_RETRY:
156
- logger.warning(f"retry", pth=pth, ash=agent.ash)
157
- await page.wait_for_timeout(500)
158
- fl = page.frame_locator(agent.HOOK_CHALLENGE)
159
- await fl.locator("//div[@class='refresh button']").click()
160
- case agent.status.CHALLENGE_SUCCESS:
161
- logger.success(f"task done", pth=pth, ash=agent.ash)
162
- rqdata = agent.cr.__dict__
163
- await context.close()
164
- return rqdata["generated_pass_UUID"]
165
 
166
 
167
  async def bytedance(host, sitekey):
@@ -171,6 +202,23 @@ async def bytedance(host, sitekey):
171
  locale="en-US"
172
  )
173
  await Malenia.apply_stealth(context)
 
 
174
 
175
- token = await hit_challenge(context, host, sitekey)
 
 
 
 
 
 
 
 
 
 
 
 
176
  return token
 
 
 
 
1
+ import asyncio
2
+ import base64
3
+ import random
4
+
5
+ import nopecha
6
+ import requests
7
+ from hcaptcha_challenger.agents import Malenia
8
+ from playwright.async_api import BrowserContext as ASyncContext, async_playwright
9
+
10
+ nopecha.api_key = '43dkmk7lmv9qhqzx'
11
 
12
 
13
  async def route_continuation(route, request, host, sitekey):
 
22
  <html lang="en">
23
  <head>
24
  <title>hCAPTCHA 演示</title>
25
+ <meta charset="UTF-8">
26
  <meta http-equiv="X-UA-Compatible" content="IE=edge">
27
  <meta name="viewport" content="width=device-width, user-scalable=yes">
28
  <script src="https://js.hcaptcha.com/1/api.js" type="text/javascript" async defer></script>
 
78
  await route.continue_()
79
 
80
 
81
+ def url_to_base64(url):
82
+ try:
83
+ # 获取 URL 的内容
84
+ response = requests.get(url)
85
+ # 将内容转换为 base64
86
+ content_base64 = base64.b64encode(response.content).decode('utf-8')
87
+
88
+ return content_base64
89
+
90
+ except requests.exceptions.RequestException as e:
91
+ print(f"Error fetching URL: {e}")
92
+ return None
93
+
94
+
95
+ async def on_response(response, page):
96
+ if response.url.startswith("https://api.hcaptcha.com/getcaptcha"):
97
+ # 获取响应内容
98
+ data = await response.json()
99
+ # 打印响应内容
100
+ print(data)
101
+ examples_urls = data.get("requester_question_example")
102
+ examples = [url_to_base64(i) for i in examples_urls] if examples_urls else None
103
+ if data.get("request_type") == "image_label_binary":
104
+ await classify_click(page, data, 0, examples)
105
+ elif data.get("request_type") == "image_label_area_select":
106
+ # await asyncio.sleep(random.uniform(0.1, 0.3))
107
+ # await page.wait_for_selector("//iframe[contains(@title, 'hCaptcha challenge')]")
108
+ # frame_challenge = page.frame_locator("//iframe[contains(@title, 'hCaptcha challenge')]")
109
+ # await frame_challenge.locator("//div[@class='refresh button']").click()
110
+ await area_click(page, data, 0, examples)
111
+ else:
112
+ await asyncio.sleep(random.uniform(0.1, 0.3))
113
+ await page.wait_for_selector("//iframe[contains(@title, 'hCaptcha challenge')]")
114
+ frame_challenge = page.frame_locator("//iframe[contains(@title, 'hCaptcha challenge')]")
115
+ await frame_challenge.locator("//div[@class='refresh button']").click()
116
+ elif response.url.startswith("https://api.hcaptcha.com/checkcaptcha"):
117
+ data0 = await response.json()
118
+ if data0.get("pass"):
119
+ global tasks, token
120
+ await page.close()
121
+ token = data0.get("generated_pass_UUID")
122
+ tasks.cancel()
123
+ print(data0)
124
+
125
+
126
+ async def classify_click(page, data, round0, examples):
127
+ await page.wait_for_selector("//iframe[contains(@title, 'hCaptcha challenge')]")
128
+ frame_challenge = page.frame_locator("//iframe[contains(@title, 'hCaptcha challenge')]")
129
+ samples = frame_challenge.locator("//div[@class='task-image']")
130
+ await frame_challenge.locator("//div[@tabindex='0']").nth(0).wait_for()
131
+ count = await samples.count()
132
+ print(count)
133
+ clicks = nopecha.Recognition.solve(
134
+ type='hcaptcha',
135
+ task=data.get("requester_question").get("en"),
136
+ image_urls=[data.get("tasklist")[i]["datapoint_uri"] for i in (range(9) if round0 == 0 else range(9, 18))],
137
+ image_examples=examples
138
  )
139
+ for i in range(count):
140
+ sample = samples.nth(i)
141
+ await sample.wait_for()
142
+ if clicks[i]:
143
+ print("try to click")
144
+ await sample.click(delay=200)
145
+ print(clicks)
146
+ await asyncio.sleep(random.uniform(0.1, 0.3))
147
+ fl = frame_challenge.locator("//div[@class='button-submit button']")
148
+ await fl.click()
149
+ if round0 == 0:
150
+ await classify_click(page, data, 1, examples)
151
+
152
+
153
+ async def area_click(page, data, round0, examples):
154
+ await page.wait_for_selector("//iframe[contains(@title, 'hCaptcha challenge')]")
155
+ frame_challenge = page.frame_locator("//iframe[contains(@title, 'hCaptcha challenge')]")
156
+ locator = frame_challenge.locator("//div[@class='challenge-view']//canvas")
157
+ await locator.wait_for(state="visible")
158
+ image = await locator.screenshot()
159
+ if examples:
160
+ clicks = nopecha.Recognition.solve(
161
+ type='hcaptcha_area_select',
162
+ task=data.get("requester_question").get("en"),
163
+ image_data=[url_to_base64(data.get("tasklist")[0]["datapoint_uri"])],
164
+ image_examples=examples
165
+ )
166
+ else:
167
+ clicks = nopecha.Recognition.solve(
168
+ type='hcaptcha_area_select',
169
+ task=data.get("requester_question").get("en"),
170
+ image_data=[url_to_base64(data.get("tasklist")[0]["datapoint_uri"])]
171
+ )
172
+ print(clicks)
173
+ print(clicks["x"], clicks["y"])
174
+ print("try to click")
175
+ bounds = await locator.bounding_box()
176
+ print(bounds)
177
+ await locator.click(delay=200, position={"x": int(bounds["width"] * clicks["x"] / 100),
178
+ "y": int(bounds["height"] * clicks["y"] / 100)})
179
+ print("done")
180
+
181
+ fl = frame_challenge.locator("//div[@class='button-submit button']")
182
+ await fl.click()
183
+ await asyncio.sleep(random.uniform(0.1, 0.3))
184
+ if round0 == 0:
185
+ await area_click(page, data, 1, examples)
186
 
187
 
188
  async def hit_challenge(context: ASyncContext, host, sitekey, times: int = 8):
189
  await context.route('**/*', lambda route, request: route_continuation(route, request, host, sitekey))
190
  page = await context.new_page()
191
+ page.on('response', lambda response: on_response(response, page))
 
192
  await page.goto(f"https://{host}")
193
+ checkbox = page.frame_locator("//iframe[contains(@title,'checkbox')]")
194
+ await checkbox.locator("#checkbox").click()
195
+ await asyncio.sleep(3000)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
 
197
 
198
  async def bytedance(host, sitekey):
 
202
  locale="en-US"
203
  )
204
  await Malenia.apply_stealth(context)
205
+ await hit_challenge(context, host, sitekey)
206
+
207
 
208
+ question = {}
209
+ tasks = None
210
+ token = None
211
+
212
+
213
+ async def main(host,key):
214
+ global tasks, token
215
+ try:
216
+ tasks = asyncio.gather(bytedance(host,key),
217
+ return_exceptions=True)
218
+ await tasks
219
+ except asyncio.CancelledError:
220
+ print("task done")
221
  return token
222
+
223
+
224
+