|
import os |
|
import time |
|
import uvicorn |
|
from fastapi import FastAPI, HTTPException |
|
from starlette.responses import HTMLResponse |
|
|
|
from pydantic import BaseModel |
|
from DrissionPage import ChromiumPage |
|
|
|
|
|
import threading |
|
import subprocess |
|
import os |
|
|
|
|
|
def start_chrome(): |
|
|
|
script_path = "cd /app && bash init_chrome.sh >> /dev/null 2>&1" |
|
print('run init_chrome.sh') |
|
while 1: |
|
try: |
|
os.popen(script_path) |
|
except Exception as e: |
|
print(e) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
chrome_thread = threading.Thread(target=start_chrome) |
|
chrome_thread.start() |
|
|
|
|
|
|
|
|
|
|
|
|
|
while 1: |
|
try: |
|
browser = ChromiumPage('127.0.0.1:9200') |
|
break |
|
|
|
except Exception as e: |
|
print("err",e) |
|
time.sleep(10) |
|
|
|
|
|
|
|
|
|
class FetchRequest(BaseModel): |
|
url: str |
|
|
|
app = FastAPI() |
|
|
|
@app.get("/", response_class=HTMLResponse) |
|
async def read_root(): |
|
""" |
|
首页路由(/)的作用是为用户提供 API 相关的基本信息和使用说明。 |
|
该路由返回一个美化过的 HTML 页面,提供 API 的描述和如何使用。 |
|
""" |
|
html_content = """ |
|
<!DOCTYPE html> |
|
<html lang="en"> |
|
<head> |
|
<meta charset="UTF-8"> |
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
<title>DrissionPage API Documentation</title> |
|
<style> |
|
body { |
|
font-family: Arial, sans-serif; |
|
background-color: #f4f4f9; |
|
color: #333; |
|
margin: 0; |
|
padding: 0; |
|
} |
|
.container { |
|
width: 80%; |
|
margin: 0 auto; |
|
padding: 30px; |
|
} |
|
header { |
|
background-color: #4CAF50; |
|
color: white; |
|
padding: 20px 0; |
|
text-align: center; |
|
} |
|
h1 { |
|
margin: 0; |
|
font-size: 36px; |
|
} |
|
.section { |
|
margin-top: 30px; |
|
} |
|
.section h2 { |
|
color: #333; |
|
font-size: 24px; |
|
} |
|
.section p { |
|
font-size: 16px; |
|
line-height: 1.6; |
|
color: #555; |
|
} |
|
.code-block { |
|
background-color: #f5f5f5; |
|
padding: 15px; |
|
border-radius: 5px; |
|
margin-top: 10px; |
|
font-family: monospace; |
|
white-space: pre-wrap; |
|
word-wrap: break-word; |
|
} |
|
.note { |
|
background-color: #ffeb3b; |
|
padding: 10px; |
|
border-radius: 5px; |
|
margin-top: 20px; |
|
} |
|
footer { |
|
text-align: center; |
|
margin-top: 50px; |
|
font-size: 14px; |
|
color: #777; |
|
} |
|
</style> |
|
</head> |
|
<body> |
|
<div class="container"> |
|
<header> |
|
<h1>DrissionPage API Documentation</h1> |
|
</header> |
|
|
|
<div class="section"> |
|
<h2>Welcome!</h2> |
|
<p>Welcome to the DrissionPage API! This API allows you to fetch webpages, capture screenshots, and return the source HTML of the page. Below is the API usage and details.</p> |
|
</div> |
|
|
|
<div class="section"> |
|
<h2>Usage</h2> |
|
<h3>POST /fetch</h3> |
|
<p>This endpoint allows you to fetch a webpage and get a screenshot along with the raw HTML content.</p> |
|
<h4>Request</h4> |
|
<p>The request should be a JSON object containing the URL to fetch:</p> |
|
<div class="code-block"> |
|
{ |
|
"url": "https://example.com" |
|
} |
|
</div> |
|
|
|
<h4>Response</h4> |
|
<p>The response will contain two fields: `jpg` (a base64-encoded image of the webpage screenshot) and `source` (the raw HTML of the page).</p> |
|
<div class="code-block"> |
|
{ |
|
"jpg": "base64_encoded_image_string_here", |
|
"source": "<html>...</html>" |
|
} |
|
</div> |
|
</div> |
|
|
|
<div class="note"> |
|
<p><strong>Note:</strong> Make sure the browser is running and accessible at the specified address.</p> |
|
</div> |
|
|
|
<footer> |
|
<p>API Documentation by DrissionPage</p> |
|
</footer> |
|
</div> |
|
</body> |
|
</html> |
|
""" |
|
return HTMLResponse(content=html_content) |
|
|
|
|
|
|
|
@app.post("/fetch") |
|
async def fetch_page(request: FetchRequest): |
|
url = request.url |
|
|
|
|
|
listen_url = url.split('?')[0].split('#')[0][8:] |
|
|
|
try: |
|
browser.listen.start(listen_url) |
|
browser.get(url) |
|
|
|
|
|
packet = browser.listen.wait(timeout=5, count=1) |
|
|
|
|
|
screenshot = browser.get_screenshot(as_base64=True) |
|
|
|
|
|
source = packet.response.raw_body if packet else "" |
|
|
|
|
|
browser.listen.stop() |
|
|
|
return { |
|
"jpg": screenshot, |
|
"source": source, |
|
} |
|
|
|
except Exception as e: |
|
raise HTTPException(status_code=500, detail=f"Error fetching the page: {e}") |
|
|
|
if __name__ == "__main__": |
|
uvicorn.run(app, host='0.0.0.0', port=8000, workers=1) |