Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import time
|
3 |
+
import uvicorn
|
4 |
+
from fastapi import FastAPI, HTTPException
|
5 |
+
from starlette.responses import HTMLResponse
|
6 |
+
|
7 |
+
from pydantic import BaseModel
|
8 |
+
from DrissionPage import ChromiumPage
|
9 |
+
|
10 |
+
|
11 |
+
import threading
|
12 |
+
import subprocess
|
13 |
+
import os
|
14 |
+
|
15 |
+
# 定义启动 Chrome 的函数
|
16 |
+
def start_chrome():
|
17 |
+
# 设置脚本路径
|
18 |
+
script_path = "cd /app && bash init_chrome.sh >> /dev/null 2>&1"
|
19 |
+
print('run init_chrome.sh')
|
20 |
+
while 1:
|
21 |
+
try:
|
22 |
+
os.popen(script_path)
|
23 |
+
except Exception as e:
|
24 |
+
print(e)
|
25 |
+
|
26 |
+
|
27 |
+
|
28 |
+
|
29 |
+
|
30 |
+
# 确保 /app/cache 目录存在
|
31 |
+
cache_dir = "/app/cache"
|
32 |
+
if not os.path.exists(cache_dir):
|
33 |
+
os.makedirs(cache_dir) # 创建目录
|
34 |
+
|
35 |
+
|
36 |
+
|
37 |
+
# 创建并启动线程
|
38 |
+
chrome_thread = threading.Thread(target=start_chrome)
|
39 |
+
chrome_thread.start()
|
40 |
+
|
41 |
+
|
42 |
+
|
43 |
+
|
44 |
+
|
45 |
+
|
46 |
+
while 1:
|
47 |
+
try:
|
48 |
+
browser = ChromiumPage('127.0.0.1:9200')
|
49 |
+
break
|
50 |
+
|
51 |
+
except Exception as e:
|
52 |
+
print("err",e)
|
53 |
+
time.sleep(10)
|
54 |
+
|
55 |
+
|
56 |
+
|
57 |
+
# Pydantic 模型定义
|
58 |
+
class FetchRequest(BaseModel):
|
59 |
+
url: str
|
60 |
+
|
61 |
+
app = FastAPI()
|
62 |
+
|
63 |
+
@app.get("/", response_class=HTMLResponse)
|
64 |
+
async def read_root():
|
65 |
+
"""
|
66 |
+
首页路由(/)的作用是为用户提供 API 相关的基本信息和使用说明。
|
67 |
+
该路由返回一个美化过的 HTML 页面,提供 API 的描述和如何使用。
|
68 |
+
"""
|
69 |
+
html_content = """
|
70 |
+
<!DOCTYPE html>
|
71 |
+
<html lang="en">
|
72 |
+
<head>
|
73 |
+
<meta charset="UTF-8">
|
74 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
75 |
+
<title>DrissionPage API Documentation</title>
|
76 |
+
<style>
|
77 |
+
body {
|
78 |
+
font-family: Arial, sans-serif;
|
79 |
+
background-color: #f4f4f9;
|
80 |
+
color: #333;
|
81 |
+
margin: 0;
|
82 |
+
padding: 0;
|
83 |
+
}
|
84 |
+
.container {
|
85 |
+
width: 80%;
|
86 |
+
margin: 0 auto;
|
87 |
+
padding: 30px;
|
88 |
+
}
|
89 |
+
header {
|
90 |
+
background-color: #4CAF50;
|
91 |
+
color: white;
|
92 |
+
padding: 20px 0;
|
93 |
+
text-align: center;
|
94 |
+
}
|
95 |
+
h1 {
|
96 |
+
margin: 0;
|
97 |
+
font-size: 36px;
|
98 |
+
}
|
99 |
+
.section {
|
100 |
+
margin-top: 30px;
|
101 |
+
}
|
102 |
+
.section h2 {
|
103 |
+
color: #333;
|
104 |
+
font-size: 24px;
|
105 |
+
}
|
106 |
+
.section p {
|
107 |
+
font-size: 16px;
|
108 |
+
line-height: 1.6;
|
109 |
+
color: #555;
|
110 |
+
}
|
111 |
+
.code-block {
|
112 |
+
background-color: #f5f5f5;
|
113 |
+
padding: 15px;
|
114 |
+
border-radius: 5px;
|
115 |
+
margin-top: 10px;
|
116 |
+
font-family: monospace;
|
117 |
+
white-space: pre-wrap;
|
118 |
+
word-wrap: break-word;
|
119 |
+
}
|
120 |
+
.note {
|
121 |
+
background-color: #ffeb3b;
|
122 |
+
padding: 10px;
|
123 |
+
border-radius: 5px;
|
124 |
+
margin-top: 20px;
|
125 |
+
}
|
126 |
+
footer {
|
127 |
+
text-align: center;
|
128 |
+
margin-top: 50px;
|
129 |
+
font-size: 14px;
|
130 |
+
color: #777;
|
131 |
+
}
|
132 |
+
</style>
|
133 |
+
</head>
|
134 |
+
<body>
|
135 |
+
<div class="container">
|
136 |
+
<header>
|
137 |
+
<h1>DrissionPage API Documentation</h1>
|
138 |
+
</header>
|
139 |
+
|
140 |
+
<div class="section">
|
141 |
+
<h2>Welcome!</h2>
|
142 |
+
<p>Welcome to the DrissionPage API! This API allows you to fetch webpages, capture screenshots, and return the source HTML of the page. Below is the API usage and details.</p>
|
143 |
+
</div>
|
144 |
+
|
145 |
+
<div class="section">
|
146 |
+
<h2>Usage</h2>
|
147 |
+
<h3>POST /fetch</h3>
|
148 |
+
<p>This endpoint allows you to fetch a webpage and get a screenshot along with the raw HTML content.</p>
|
149 |
+
<h4>Request</h4>
|
150 |
+
<p>The request should be a JSON object containing the URL to fetch:</p>
|
151 |
+
<div class="code-block">
|
152 |
+
{
|
153 |
+
"url": "https://example.com"
|
154 |
+
}
|
155 |
+
</div>
|
156 |
+
|
157 |
+
<h4>Response</h4>
|
158 |
+
<p>The response will contain two fields: `jpg` (a base64-encoded image of the webpage screenshot) and `source` (the raw HTML of the page).</p>
|
159 |
+
<div class="code-block">
|
160 |
+
{
|
161 |
+
"jpg": "base64_encoded_image_string_here",
|
162 |
+
"source": "<html>...</html>"
|
163 |
+
}
|
164 |
+
</div>
|
165 |
+
</div>
|
166 |
+
|
167 |
+
<div class="note">
|
168 |
+
<p><strong>Note:</strong> Make sure the browser is running and accessible at the specified address.</p>
|
169 |
+
</div>
|
170 |
+
|
171 |
+
<footer>
|
172 |
+
<p>API Documentation by DrissionPage</p>
|
173 |
+
</footer>
|
174 |
+
</div>
|
175 |
+
</body>
|
176 |
+
</html>
|
177 |
+
"""
|
178 |
+
return HTMLResponse(content=html_content)
|
179 |
+
|
180 |
+
|
181 |
+
|
182 |
+
@app.post("/fetch")
|
183 |
+
async def fetch_page(request: FetchRequest):
|
184 |
+
url = request.url
|
185 |
+
|
186 |
+
# 去掉 URL 中的查询参数和锚点
|
187 |
+
listen_url = url.split('?')[0].split('#')[0][8:]
|
188 |
+
|
189 |
+
try:
|
190 |
+
browser.listen.start(listen_url)
|
191 |
+
browser.get(url)
|
192 |
+
|
193 |
+
# 等待数据包返回
|
194 |
+
packet = browser.listen.wait(timeout=5, count=1)
|
195 |
+
|
196 |
+
# 获取页面截图(base64 格式)
|
197 |
+
screenshot = browser.get_screenshot(as_base64=True)
|
198 |
+
|
199 |
+
# 获取页面原始内容
|
200 |
+
source = packet.response.raw_body if packet else ""
|
201 |
+
|
202 |
+
# 停止监听
|
203 |
+
browser.listen.stop()
|
204 |
+
|
205 |
+
return {
|
206 |
+
"jpg": screenshot,
|
207 |
+
"source": source,
|
208 |
+
}
|
209 |
+
|
210 |
+
except Exception as e:
|
211 |
+
raise HTTPException(status_code=500, detail=f"Error fetching the page: {e}")
|
212 |
+
|
213 |
+
if __name__ == "__main__":
|
214 |
+
uvicorn.run(app, host='0.0.0.0', port=8000, workers=1)
|