qiqi657s commited on
Commit
315d050
·
verified ·
1 Parent(s): 284c083

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +214 -0
app.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import uvicorn
4
+ from fastapi import FastAPI, HTTPException
5
+ from starlette.responses import HTMLResponse
6
+
7
+ from pydantic import BaseModel
8
+ from DrissionPage import ChromiumPage
9
+
10
+
11
+ import threading
12
+ import subprocess
13
+ import os
14
+
15
+ # 定义启动 Chrome 的函数
16
+ def start_chrome():
17
+ # 设置脚本路径
18
+ script_path = "cd /app && bash init_chrome.sh >> /dev/null 2>&1"
19
+ print('run init_chrome.sh')
20
+ while 1:
21
+ try:
22
+ os.popen(script_path)
23
+ except Exception as e:
24
+ print(e)
25
+
26
+
27
+
28
+
29
+
30
+ # 确保 /app/cache 目录存在
31
+ cache_dir = "/app/cache"
32
+ if not os.path.exists(cache_dir):
33
+ os.makedirs(cache_dir) # 创建目录
34
+
35
+
36
+
37
+ # 创建并启动线程
38
+ chrome_thread = threading.Thread(target=start_chrome)
39
+ chrome_thread.start()
40
+
41
+
42
+
43
+
44
+
45
+
46
+ while 1:
47
+ try:
48
+ browser = ChromiumPage('127.0.0.1:9200')
49
+ break
50
+
51
+ except Exception as e:
52
+ print("err",e)
53
+ time.sleep(10)
54
+
55
+
56
+
57
+ # Pydantic 模型定义
58
+ class FetchRequest(BaseModel):
59
+ url: str
60
+
61
+ app = FastAPI()
62
+
63
+ @app.get("/", response_class=HTMLResponse)
64
+ async def read_root():
65
+ """
66
+ 首页路由(/)的作用是为用户提供 API 相关的基本信息和使用说明。
67
+ 该路由返回一个美化过的 HTML 页面,提供 API 的描述和如何使用。
68
+ """
69
+ html_content = """
70
+ <!DOCTYPE html>
71
+ <html lang="en">
72
+ <head>
73
+ <meta charset="UTF-8">
74
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
75
+ <title>DrissionPage API Documentation</title>
76
+ <style>
77
+ body {
78
+ font-family: Arial, sans-serif;
79
+ background-color: #f4f4f9;
80
+ color: #333;
81
+ margin: 0;
82
+ padding: 0;
83
+ }
84
+ .container {
85
+ width: 80%;
86
+ margin: 0 auto;
87
+ padding: 30px;
88
+ }
89
+ header {
90
+ background-color: #4CAF50;
91
+ color: white;
92
+ padding: 20px 0;
93
+ text-align: center;
94
+ }
95
+ h1 {
96
+ margin: 0;
97
+ font-size: 36px;
98
+ }
99
+ .section {
100
+ margin-top: 30px;
101
+ }
102
+ .section h2 {
103
+ color: #333;
104
+ font-size: 24px;
105
+ }
106
+ .section p {
107
+ font-size: 16px;
108
+ line-height: 1.6;
109
+ color: #555;
110
+ }
111
+ .code-block {
112
+ background-color: #f5f5f5;
113
+ padding: 15px;
114
+ border-radius: 5px;
115
+ margin-top: 10px;
116
+ font-family: monospace;
117
+ white-space: pre-wrap;
118
+ word-wrap: break-word;
119
+ }
120
+ .note {
121
+ background-color: #ffeb3b;
122
+ padding: 10px;
123
+ border-radius: 5px;
124
+ margin-top: 20px;
125
+ }
126
+ footer {
127
+ text-align: center;
128
+ margin-top: 50px;
129
+ font-size: 14px;
130
+ color: #777;
131
+ }
132
+ </style>
133
+ </head>
134
+ <body>
135
+ <div class="container">
136
+ <header>
137
+ <h1>DrissionPage API Documentation</h1>
138
+ </header>
139
+
140
+ <div class="section">
141
+ <h2>Welcome!</h2>
142
+ <p>Welcome to the DrissionPage API! This API allows you to fetch webpages, capture screenshots, and return the source HTML of the page. Below is the API usage and details.</p>
143
+ </div>
144
+
145
+ <div class="section">
146
+ <h2>Usage</h2>
147
+ <h3>POST /fetch</h3>
148
+ <p>This endpoint allows you to fetch a webpage and get a screenshot along with the raw HTML content.</p>
149
+ <h4>Request</h4>
150
+ <p>The request should be a JSON object containing the URL to fetch:</p>
151
+ <div class="code-block">
152
+ {
153
+ "url": "https://example.com"
154
+ }
155
+ </div>
156
+
157
+ <h4>Response</h4>
158
+ <p>The response will contain two fields: `jpg` (a base64-encoded image of the webpage screenshot) and `source` (the raw HTML of the page).</p>
159
+ <div class="code-block">
160
+ {
161
+ "jpg": "base64_encoded_image_string_here",
162
+ "source": "<html>...</html>"
163
+ }
164
+ </div>
165
+ </div>
166
+
167
+ <div class="note">
168
+ <p><strong>Note:</strong> Make sure the browser is running and accessible at the specified address.</p>
169
+ </div>
170
+
171
+ <footer>
172
+ <p>API Documentation by DrissionPage</p>
173
+ </footer>
174
+ </div>
175
+ </body>
176
+ </html>
177
+ """
178
+ return HTMLResponse(content=html_content)
179
+
180
+
181
+
182
+ @app.post("/fetch")
183
+ async def fetch_page(request: FetchRequest):
184
+ url = request.url
185
+
186
+ # 去掉 URL 中的查询参数和锚点
187
+ listen_url = url.split('?')[0].split('#')[0][8:]
188
+
189
+ try:
190
+ browser.listen.start(listen_url)
191
+ browser.get(url)
192
+
193
+ # 等待数据包返回
194
+ packet = browser.listen.wait(timeout=5, count=1)
195
+
196
+ # 获取页面截图(base64 格式)
197
+ screenshot = browser.get_screenshot(as_base64=True)
198
+
199
+ # 获取页面原始内容
200
+ source = packet.response.raw_body if packet else ""
201
+
202
+ # 停止监听
203
+ browser.listen.stop()
204
+
205
+ return {
206
+ "jpg": screenshot,
207
+ "source": source,
208
+ }
209
+
210
+ except Exception as e:
211
+ raise HTTPException(status_code=500, detail=f"Error fetching the page: {e}")
212
+
213
+ if __name__ == "__main__":
214
+ uvicorn.run(app, host='0.0.0.0', port=8000, workers=1)