Spaces:
Sleeping
Sleeping
github-actions[bot]
commited on
Commit
·
be63dad
1
Parent(s):
799a782
Update from GitHub Actions
Browse files
main.py
CHANGED
@@ -6,6 +6,8 @@ from fastapi.responses import HTMLResponse, StreamingResponse
|
|
6 |
from typing import Optional
|
7 |
import uvicorn
|
8 |
import asyncio
|
|
|
|
|
9 |
|
10 |
app = FastAPI(
|
11 |
title="ScraperProxy",
|
@@ -391,11 +393,37 @@ async def proxy(request: Request):
|
|
391 |
'https': proxy
|
392 |
}
|
393 |
# 测试代理是否生效
|
394 |
-
response = scraper.get('https://httpbin.org/ip')
|
395 |
-
print(response.text)
|
396 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
397 |
|
398 |
-
|
399 |
# 获取请求体
|
400 |
body = await request.body()
|
401 |
|
@@ -427,10 +455,11 @@ async def proxy(request: Request):
|
|
427 |
headers.pop("content-length", None)
|
428 |
headers.pop("user-agent", None)
|
429 |
print(f"{headers}")
|
|
|
430 |
# 构建请求参数
|
431 |
request_kwargs = {
|
432 |
"url": target_url,
|
433 |
-
"headers":
|
434 |
"params": params,
|
435 |
"stream": stream_request # 设置stream参数
|
436 |
}
|
|
|
6 |
from typing import Optional
|
7 |
import uvicorn
|
8 |
import asyncio
|
9 |
+
from urllib.parse import urlparse
|
10 |
+
import time
|
11 |
|
12 |
app = FastAPI(
|
13 |
title="ScraperProxy",
|
|
|
393 |
'https': proxy
|
394 |
}
|
395 |
# 测试代理是否生效
|
396 |
+
# response = scraper.get('https://httpbin.org/ip')
|
397 |
+
# print(response.text)
|
398 |
|
399 |
+
# 获取home_url
|
400 |
+
home_url = request.query_params.get("home")
|
401 |
+
if not home_url:
|
402 |
+
# 从target_url中提取home_url
|
403 |
+
parsed_url = urlparse(target_url)
|
404 |
+
home_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
|
405 |
+
|
406 |
+
# 重试获取主页响应
|
407 |
+
max_retries = 5
|
408 |
+
retry_delay = 1 # 重试间隔秒数
|
409 |
+
home_response = None
|
410 |
+
|
411 |
+
for attempt in range(max_retries):
|
412 |
+
try:
|
413 |
+
home_response = scraper.get(home_url, headers={"sec-fetch-dest": "document"})
|
414 |
+
print(f"主页响应 (尝试 {attempt + 1}): {home_response.status_code}")
|
415 |
+
|
416 |
+
if home_response.status_code == 200:
|
417 |
+
break
|
418 |
+
|
419 |
+
if attempt < max_retries - 1: # 如果不是最后一次尝试
|
420 |
+
time.sleep(retry_delay)
|
421 |
+
|
422 |
+
except Exception as e:
|
423 |
+
print(f"主页请求失败 (尝试 {attempt + 1}): {str(e)}")
|
424 |
+
if attempt < max_retries - 1:
|
425 |
+
time.sleep(retry_delay)
|
426 |
|
|
|
427 |
# 获取请求体
|
428 |
body = await request.body()
|
429 |
|
|
|
455 |
headers.pop("content-length", None)
|
456 |
headers.pop("user-agent", None)
|
457 |
print(f"{headers}")
|
458 |
+
|
459 |
# 构建请求参数
|
460 |
request_kwargs = {
|
461 |
"url": target_url,
|
462 |
+
"headers": {"sec-fetch-dest": "document"},
|
463 |
"params": params,
|
464 |
"stream": stream_request # 设置stream参数
|
465 |
}
|