Update app.py
Browse files
app.py
CHANGED
@@ -4,12 +4,30 @@ import os
|
|
4 |
|
5 |
app = FastAPI()
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
@app.get("/fetch")
|
8 |
def fetch_html(url: str):
|
9 |
try:
|
10 |
with sync_playwright() as p:
|
11 |
browser = p.chromium.launch(headless=True)
|
12 |
page = browser.new_page()
|
|
|
|
|
|
|
|
|
|
|
13 |
page.goto(url, timeout=15000)
|
14 |
html = page.content()
|
15 |
browser.close()
|
@@ -20,6 +38,5 @@ def fetch_html(url: str):
|
|
20 |
|
21 |
if __name__ == "__main__":
|
22 |
import uvicorn
|
23 |
-
os.system("playwright install-deps")
|
24 |
os.system("playwright install chromium")
|
25 |
-
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
4 |
|
5 |
app = FastAPI()
|
6 |
|
7 |
+
# Define a function to get the headers from an initial request
|
8 |
+
def get_custom_headers(url: str):
|
9 |
+
# Example headers you can customize based on the website
|
10 |
+
headers = {
|
11 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
12 |
+
"Accept-Language": "en-US,en;q=0.9",
|
13 |
+
"Connection": "keep-alive",
|
14 |
+
"Referer": url,
|
15 |
+
}
|
16 |
+
if "jannyai.com" in url:
|
17 |
+
headers["Host"] = "jannyai.com"
|
18 |
+
return headers
|
19 |
+
|
20 |
@app.get("/fetch")
|
21 |
def fetch_html(url: str):
|
22 |
try:
|
23 |
with sync_playwright() as p:
|
24 |
browser = p.chromium.launch(headless=True)
|
25 |
page = browser.new_page()
|
26 |
+
|
27 |
+
# Set custom headers before navigating to the page
|
28 |
+
headers = get_custom_headers(url)
|
29 |
+
page.set_extra_http_headers(headers)
|
30 |
+
|
31 |
page.goto(url, timeout=15000)
|
32 |
html = page.content()
|
33 |
browser.close()
|
|
|
38 |
|
39 |
if __name__ == "__main__":
|
40 |
import uvicorn
|
|
|
41 |
os.system("playwright install chromium")
|
42 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|