import os import time import uvicorn from fastapi import FastAPI, HTTPException from starlette.responses import HTMLResponse from pydantic import BaseModel from DrissionPage import ChromiumPage import threading import subprocess import os # 定义启动 Chrome 的函数 def start_chrome(): # 设置脚本路径 script_path = "cd /app && bash init_chrome.sh >> /dev/null 2>&1" print('run init_chrome.sh') while 1: try: os.popen(script_path) except Exception as e: print(e) # 创建并启动线程 chrome_thread = threading.Thread(target=start_chrome) chrome_thread.start() while 1: try: browser = ChromiumPage('127.0.0.1:9200') break except Exception as e: print("err",e) time.sleep(10) # Pydantic 模型定义 class FetchRequest(BaseModel): url: str app = FastAPI() @app.get("/", response_class=HTMLResponse) async def read_root(): """ 首页路由(/)的作用是为用户提供 API 相关的基本信息和使用说明。 该路由返回一个美化过的 HTML 页面,提供 API 的描述和如何使用。 """ html_content = """ DrissionPage API Documentation

DrissionPage API Documentation

Welcome!

Welcome to the DrissionPage API! This API allows you to fetch webpages, capture screenshots, and return the source HTML of the page. Below is the API usage and details.

Usage

POST /fetch

This endpoint allows you to fetch a webpage and get a screenshot along with the raw HTML content.

Request

The request should be a JSON object containing the URL to fetch:

{ "url": "https://example.com" }

Response

The response will contain two fields: `jpg` (a base64-encoded image of the webpage screenshot) and `source` (the raw HTML of the page).

{ "jpg": "base64_encoded_image_string_here", "source": "..." }

Note: Make sure the browser is running and accessible at the specified address.

""" return HTMLResponse(content=html_content) @app.post("/fetch") async def fetch_page(request: FetchRequest): url = request.url # 去掉 URL 中的查询参数和锚点 listen_url = url.split('?')[0].split('#')[0][8:] try: browser.listen.start(listen_url) browser.get(url) # 等待数据包返回 packet = browser.listen.wait(timeout=5, count=1) # 获取页面截图(base64 格式) screenshot = browser.get_screenshot(as_base64=True) # 获取页面原始内容 source = packet.response.raw_body if packet else "" # 停止监听 browser.listen.stop() return { "jpg": screenshot, "source": source, } except Exception as e: raise HTTPException(status_code=500, detail=f"Error fetching the page: {e}") if __name__ == "__main__": uvicorn.run(app, host='0.0.0.0', port=8000, workers=1)