Spaces:
Running
Running
Commit
·
a871aff
1
Parent(s):
9183d8e
Update extract_text.py to include User-Agent header in PDF download requests
Browse files- extract_text.py +3 -3
extract_text.py
CHANGED
@@ -6,10 +6,10 @@ def download_pdf(url, id):
|
|
6 |
id = id.replace("/", "-")
|
7 |
directory = "downloads"
|
8 |
os.makedirs(directory, exist_ok=True)
|
9 |
-
file_path = os.path.join(directory, f"{id}.pdf")
|
10 |
try:
|
11 |
-
response = requests.get(url)
|
12 |
-
response.raise_for_status()
|
13 |
with open(file_path, "wb") as file:
|
14 |
file.write(response.content)
|
15 |
except Exception as e:
|
|
|
6 |
id = id.replace("/", "-")
|
7 |
directory = "downloads"
|
8 |
os.makedirs(directory, exist_ok=True)
|
9 |
+
file_path = os.path.join(directory, f"{id}.pdf")
|
10 |
try:
|
11 |
+
response = requests.get(url, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"})
|
12 |
+
response.raise_for_status()
|
13 |
with open(file_path, "wb") as file:
|
14 |
file.write(response.content)
|
15 |
except Exception as e:
|