from fastapi import FastAPI, Response from playwright.sync_api import sync_playwright import os import io app = FastAPI() # Define a function to get the headers from an initial request def get_custom_headers(url: str): headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", "Accept-Language": "en-US,en;q=0.9", "Connection": "keep-alive", "Referer": url, } return headers @app.get("/fetch") def fetch_html(url: str): try: with sync_playwright() as p: browser = p.chromium.launch(headless=True) page = browser.new_page() # Set custom headers before navigating to the page headers = get_custom_headers(url) page.set_extra_http_headers(headers) # Modify the goto call with options page.goto(url, timeout=15000, wait_until="networkidle") # Otherwise, return the HTML content html = page.content() browser.close() return {"html": html} except Exception as e: return {"error": str(e)} @app.get("/download") def download_image(url: str): try: with sync_playwright() as p: browser = p.chromium.launch(headless=True) page = browser.new_page() # Set custom headers before navigating to the page headers = get_custom_headers(url) page.set_extra_http_headers(headers) # Navigate to the URL and wait for network idle page.goto(url, timeout=15000, wait_until="networkidle") # Select the first image on the page img_element = page.query_selector("img") if img_element: # Take a screenshot of the first image element screenshot = img_element.screenshot() browser.close() # Return the image as a response return Response(content=screenshot, media_type="image/png") else: browser.close() return {"error": "No image found on the page"} except Exception as e: return {"error": str(e)} if __name__ == "__main__": import uvicorn os.system("playwright install-deps") os.system("playwright install chromium") uvicorn.run(app, host="0.0.0.0", port=7860)