get_mhtml / main.py
ttttdiva's picture
Update main.py
193553d verified
import os
from fastapi import FastAPI
from playwright.sync_api import sync_playwright
# ↑ 同期版だと "It looks like you are using Playwright Sync API inside the asyncio loop"
# と怒られる場合がある。
# その場合は下記の async版を使い、エンドポイントも async def にする。
# from playwright.async_api import async_playwright
app = FastAPI()
@app.get("/")
def read_root():
return {
"message": "Visit /save_mhtml to capture MHTML. Then check the dev mode to see if the file is there."
}
@app.get("/save_mhtml")
def save_mhtml():
"""
/save_mhtml にアクセスしたタイミングで外部URLを読み込み、MHTMLを保存する。
ここで失敗しても起動時には関係ないので、Devモードが落ちるリスクが減る。
"""
url = "https://civitai.com/models/1055452/akashicpulse"
output_path = "akashicpulse.mhtml"
try:
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
page.goto(url, wait_until="networkidle", timeout=60000)
cdp_session = page.context.new_cdp_session(page)
snapshot = cdp_session.send("Page.captureSnapshot", {"format": "mhtml"})
browser.close()
with open(output_path, "w", encoding="utf-8") as f:
f.write(snapshot)
return {
"status": "success",
"message": f"MHTML saved to {os.path.abspath(output_path)}"
}
except Exception as e:
return {
"status": "error",
"message": str(e)
}