File size: 5,644 Bytes
315d050 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 |
import os
import time
import uvicorn
from fastapi import FastAPI, HTTPException
from starlette.responses import HTMLResponse
from pydantic import BaseModel
from DrissionPage import ChromiumPage
import threading
import subprocess
import os
# 定义启动 Chrome 的函数
def start_chrome():
# 设置脚本路径
script_path = "cd /app && bash init_chrome.sh >> /dev/null 2>&1"
print('run init_chrome.sh')
while 1:
try:
os.popen(script_path)
except Exception as e:
print(e)
# 创建并启动线程
chrome_thread = threading.Thread(target=start_chrome)
chrome_thread.start()
while 1:
try:
browser = ChromiumPage('127.0.0.1:9200')
break
except Exception as e:
print("err",e)
time.sleep(10)
# Pydantic 模型定义
class FetchRequest(BaseModel):
url: str
app = FastAPI()
@app.get("/", response_class=HTMLResponse)
async def read_root():
"""
首页路由(/)的作用是为用户提供 API 相关的基本信息和使用说明。
该路由返回一个美化过的 HTML 页面,提供 API 的描述和如何使用。
"""
html_content = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>DrissionPage API Documentation</title>
<style>
body {
font-family: Arial, sans-serif;
background-color: #f4f4f9;
color: #333;
margin: 0;
padding: 0;
}
.container {
width: 80%;
margin: 0 auto;
padding: 30px;
}
header {
background-color: #4CAF50;
color: white;
padding: 20px 0;
text-align: center;
}
h1 {
margin: 0;
font-size: 36px;
}
.section {
margin-top: 30px;
}
.section h2 {
color: #333;
font-size: 24px;
}
.section p {
font-size: 16px;
line-height: 1.6;
color: #555;
}
.code-block {
background-color: #f5f5f5;
padding: 15px;
border-radius: 5px;
margin-top: 10px;
font-family: monospace;
white-space: pre-wrap;
word-wrap: break-word;
}
.note {
background-color: #ffeb3b;
padding: 10px;
border-radius: 5px;
margin-top: 20px;
}
footer {
text-align: center;
margin-top: 50px;
font-size: 14px;
color: #777;
}
</style>
</head>
<body>
<div class="container">
<header>
<h1>DrissionPage API Documentation</h1>
</header>
<div class="section">
<h2>Welcome!</h2>
<p>Welcome to the DrissionPage API! This API allows you to fetch webpages, capture screenshots, and return the source HTML of the page. Below is the API usage and details.</p>
</div>
<div class="section">
<h2>Usage</h2>
<h3>POST /fetch</h3>
<p>This endpoint allows you to fetch a webpage and get a screenshot along with the raw HTML content.</p>
<h4>Request</h4>
<p>The request should be a JSON object containing the URL to fetch:</p>
<div class="code-block">
{
"url": "https://example.com"
}
</div>
<h4>Response</h4>
<p>The response will contain two fields: `jpg` (a base64-encoded image of the webpage screenshot) and `source` (the raw HTML of the page).</p>
<div class="code-block">
{
"jpg": "base64_encoded_image_string_here",
"source": "<html>...</html>"
}
</div>
</div>
<div class="note">
<p><strong>Note:</strong> Make sure the browser is running and accessible at the specified address.</p>
</div>
<footer>
<p>API Documentation by DrissionPage</p>
</footer>
</div>
</body>
</html>
"""
return HTMLResponse(content=html_content)
@app.post("/fetch")
async def fetch_page(request: FetchRequest):
url = request.url
# 去掉 URL 中的查询参数和锚点
listen_url = url.split('?')[0].split('#')[0][8:]
try:
browser.listen.start(listen_url)
browser.get(url)
# 等待数据包返回
packet = browser.listen.wait(timeout=5, count=1)
# 获取页面截图(base64 格式)
screenshot = browser.get_screenshot(as_base64=True)
# 获取页面原始内容
source = packet.response.raw_body if packet else ""
# 停止监听
browser.listen.stop()
return {
"jpg": screenshot,
"source": source,
}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error fetching the page: {e}")
if __name__ == "__main__":
uvicorn.run(app, host='0.0.0.0', port=8000, workers=1) |