File size: 2,512 Bytes
e4114fe
90218db
 
e4114fe
09f5fa1
 
90218db
1dd555c
 
 
 
 
 
 
 
 
 
09f5fa1
 
 
90218db
 
 
1dd555c
 
 
 
 
9a8dd4c
b25d2cd
e4114fe
 
90218db
 
 
 
000eb85
3d94f74
bd5ae68
3b74c54
bd5ae68
 
 
 
efeb730
bd5ae68
 
 
efeb730
bd5ae68
 
efeb730
 
 
 
 
 
 
3b74c54
 
efeb730
 
3b74c54
 
efeb730
bd5ae68
 
 
3d94f74
 
9a8dd4c
90218db
1dd555c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from fastapi import FastAPI, Response
from playwright.sync_api import sync_playwright
import os
import io

app = FastAPI()

# Define a function to get the headers from an initial request
def get_custom_headers(url: str):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
        "Accept-Language": "en-US,en;q=0.9",
        "Connection": "keep-alive",
        "Referer": url,
    }
    return headers

@app.get("/fetch")
def fetch_html(url: str):
    try:
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True)
            page = browser.new_page()
            
            # Set custom headers before navigating to the page
            headers = get_custom_headers(url)
            page.set_extra_http_headers(headers)
            
            # Modify the goto call with options
            page.goto(url, timeout=15000, wait_until="networkidle")
            
            # Otherwise, return the HTML content
            html = page.content()
            browser.close()
        return {"html": html}
    except Exception as e:
        return {"error": str(e)}

@app.get("/download")
def download_image(url: str):
    try:
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True)
            page = browser.new_page()
            
            # Set custom headers before navigating to the page
            headers = get_custom_headers(url)
            page.set_extra_http_headers(headers)
            
            # Navigate to the URL and wait for network idle
            page.goto(url, timeout=15000, wait_until="networkidle")
            
            # Select the first image on the page
            img_element = page.query_selector("img")
            
            if img_element:
                # Take a screenshot of the first image element
                screenshot = img_element.screenshot()
                browser.close()
                
                # Return the image as a response
                return Response(content=screenshot, media_type="image/png")
            else:
                browser.close()
                return {"error": "No image found on the page"}
    except Exception as e:
        return {"error": str(e)}

if __name__ == "__main__":
    import uvicorn
    os.system("playwright install-deps")
    os.system("playwright install chromium")
    uvicorn.run(app, host="0.0.0.0", port=7860)