Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -134,7 +134,11 @@ def extract_background_from_url(url: str):
|
|
134 |
@app.get("/extract-page-text/")
|
135 |
def extract_page_content(url: str, max_char: int = 5000):
|
136 |
try:
|
137 |
-
|
|
|
|
|
|
|
|
|
138 |
response.raise_for_status()
|
139 |
|
140 |
soup = BeautifulSoup(response.text, 'html.parser')
|
|
|
134 |
@app.get("/extract-page-text/")
|
135 |
def extract_page_content(url: str, max_char: int = 5000):
|
136 |
try:
|
137 |
+
headers = {
|
138 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
139 |
+
"Accept": "application/pdf"
|
140 |
+
}
|
141 |
+
response = requests.get(url, headers=headers, timeout=20, verify=False)
|
142 |
response.raise_for_status()
|
143 |
|
144 |
soup = BeautifulSoup(response.text, 'html.parser')
|