File size: 5,644 Bytes
315d050
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
import os
import time
import uvicorn
from fastapi import FastAPI, HTTPException
from starlette.responses import HTMLResponse

from pydantic import BaseModel
from DrissionPage import ChromiumPage


import threading
import subprocess
import os

# 定义启动 Chrome 的函数
def start_chrome():
    # 设置脚本路径
    script_path = "cd /app && bash init_chrome.sh >> /dev/null 2>&1"
    print('run init_chrome.sh')
    while 1:
        try:
            os.popen(script_path)
        except Exception as e:
            print(e)

    






# 创建并启动线程
chrome_thread = threading.Thread(target=start_chrome)
chrome_thread.start()






while 1:
    try:
        browser = ChromiumPage('127.0.0.1:9200')
        break

    except Exception as e:
        print("err",e)
        time.sleep(10)



# Pydantic 模型定义
class FetchRequest(BaseModel):
    url: str

app = FastAPI()

@app.get("/", response_class=HTMLResponse)
async def read_root():
    """
    首页路由(/)的作用是为用户提供 API 相关的基本信息和使用说明。
    该路由返回一个美化过的 HTML 页面,提供 API 的描述和如何使用。
    """
    html_content = """
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>DrissionPage API Documentation</title>
        <style>
            body {
                font-family: Arial, sans-serif;
                background-color: #f4f4f9;
                color: #333;
                margin: 0;
                padding: 0;
            }
            .container {
                width: 80%;
                margin: 0 auto;
                padding: 30px;
            }
            header {
                background-color: #4CAF50;
                color: white;
                padding: 20px 0;
                text-align: center;
            }
            h1 {
                margin: 0;
                font-size: 36px;
            }
            .section {
                margin-top: 30px;
            }
            .section h2 {
                color: #333;
                font-size: 24px;
            }
            .section p {
                font-size: 16px;
                line-height: 1.6;
                color: #555;
            }
            .code-block {
                background-color: #f5f5f5;
                padding: 15px;
                border-radius: 5px;
                margin-top: 10px;
                font-family: monospace;
                white-space: pre-wrap;
                word-wrap: break-word;
            }
            .note {
                background-color: #ffeb3b;
                padding: 10px;
                border-radius: 5px;
                margin-top: 20px;
            }
            footer {
                text-align: center;
                margin-top: 50px;
                font-size: 14px;
                color: #777;
            }
        </style>
    </head>
    <body>
        <div class="container">
            <header>
                <h1>DrissionPage API Documentation</h1>
            </header>

            <div class="section">
                <h2>Welcome!</h2>
                <p>Welcome to the DrissionPage API! This API allows you to fetch webpages, capture screenshots, and return the source HTML of the page. Below is the API usage and details.</p>
            </div>

            <div class="section">
                <h2>Usage</h2>
                <h3>POST /fetch</h3>
                <p>This endpoint allows you to fetch a webpage and get a screenshot along with the raw HTML content.</p>
                <h4>Request</h4>
                <p>The request should be a JSON object containing the URL to fetch:</p>
                <div class="code-block">
                    {
                        "url": "https://example.com"
                    }
                </div>

                <h4>Response</h4>
                <p>The response will contain two fields: `jpg` (a base64-encoded image of the webpage screenshot) and `source` (the raw HTML of the page).</p>
                <div class="code-block">
                    {
                        "jpg": "base64_encoded_image_string_here",
                        "source": "<html>...</html>"
                    }
                </div>
            </div>

            <div class="note">
                <p><strong>Note:</strong> Make sure the browser is running and accessible at the specified address.</p>
            </div>

            <footer>
                <p>API Documentation by DrissionPage</p>
            </footer>
        </div>
    </body>
    </html>
    """
    return HTMLResponse(content=html_content)



@app.post("/fetch")
async def fetch_page(request: FetchRequest):
    url = request.url

    # 去掉 URL 中的查询参数和锚点
    listen_url = url.split('?')[0].split('#')[0][8:]

    try:
        browser.listen.start(listen_url)
        browser.get(url)

        # 等待数据包返回
        packet = browser.listen.wait(timeout=5, count=1)

        # 获取页面截图(base64 格式)
        screenshot = browser.get_screenshot(as_base64=True)

        # 获取页面原始内容
        source = packet.response.raw_body if packet else ""

        # 停止监听
        browser.listen.stop()

        return {
            "jpg": screenshot,
            "source": source,
        }

    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error fetching the page: {e}")

if __name__ == "__main__":
    uvicorn.run(app, host='0.0.0.0', port=8000, workers=1)