site-get / app.py
Arkm20's picture
Update app.py
efeb730 verified
from fastapi import FastAPI, Response
from playwright.sync_api import sync_playwright
import os
import io
app = FastAPI()
# Define a function to get the headers from an initial request
def get_custom_headers(url: str):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
"Accept-Language": "en-US,en;q=0.9",
"Connection": "keep-alive",
"Referer": url,
}
return headers
@app.get("/fetch")
def fetch_html(url: str):
try:
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
# Set custom headers before navigating to the page
headers = get_custom_headers(url)
page.set_extra_http_headers(headers)
# Modify the goto call with options
page.goto(url, timeout=15000, wait_until="networkidle")
# Otherwise, return the HTML content
html = page.content()
browser.close()
return {"html": html}
except Exception as e:
return {"error": str(e)}
@app.get("/download")
def download_image(url: str):
try:
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
# Set custom headers before navigating to the page
headers = get_custom_headers(url)
page.set_extra_http_headers(headers)
# Navigate to the URL and wait for network idle
page.goto(url, timeout=15000, wait_until="networkidle")
# Select the first image on the page
img_element = page.query_selector("img")
if img_element:
# Take a screenshot of the first image element
screenshot = img_element.screenshot()
browser.close()
# Return the image as a response
return Response(content=screenshot, media_type="image/png")
else:
browser.close()
return {"error": "No image found on the page"}
except Exception as e:
return {"error": str(e)}
if __name__ == "__main__":
import uvicorn
os.system("playwright install-deps")
os.system("playwright install chromium")
uvicorn.run(app, host="0.0.0.0", port=7860)