Arkm20 commited on
Commit
1dd555c
·
verified ·
1 Parent(s): abed200

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -2
app.py CHANGED
@@ -4,12 +4,30 @@ import os
4
 
5
  app = FastAPI()
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  @app.get("/fetch")
8
  def fetch_html(url: str):
9
  try:
10
  with sync_playwright() as p:
11
  browser = p.chromium.launch(headless=True)
12
  page = browser.new_page()
 
 
 
 
 
13
  page.goto(url, timeout=15000)
14
  html = page.content()
15
  browser.close()
@@ -20,6 +38,5 @@ def fetch_html(url: str):
20
 
21
  if __name__ == "__main__":
22
  import uvicorn
23
- os.system("playwright install-deps")
24
  os.system("playwright install chromium")
25
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
4
 
5
  app = FastAPI()
6
 
7
+ # Define a function to get the headers from an initial request
8
+ def get_custom_headers(url: str):
9
+ # Example headers you can customize based on the website
10
+ headers = {
11
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
12
+ "Accept-Language": "en-US,en;q=0.9",
13
+ "Connection": "keep-alive",
14
+ "Referer": url,
15
+ }
16
+ if "jannyai.com" in url:
17
+ headers["Host"] = "jannyai.com"
18
+ return headers
19
+
20
  @app.get("/fetch")
21
  def fetch_html(url: str):
22
  try:
23
  with sync_playwright() as p:
24
  browser = p.chromium.launch(headless=True)
25
  page = browser.new_page()
26
+
27
+ # Set custom headers before navigating to the page
28
+ headers = get_custom_headers(url)
29
+ page.set_extra_http_headers(headers)
30
+
31
  page.goto(url, timeout=15000)
32
  html = page.content()
33
  browser.close()
 
38
 
39
  if __name__ == "__main__":
40
  import uvicorn
 
41
  os.system("playwright install chromium")
42
+ uvicorn.run(app, host="0.0.0.0", port=7860)