Spaces:
Running
Running
:boom: [Fix] WebpageFetcher: raise timeout when request.get hangs
Browse files- networks/webpage_fetcher.py +16 -7
networks/webpage_fetcher.py
CHANGED
|
@@ -22,18 +22,27 @@ class WebpageFetcher:
|
|
| 22 |
return False
|
| 23 |
|
| 24 |
def send_request(self):
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
def save_response(self):
|
| 32 |
if not self.output_path.exists():
|
| 33 |
self.output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 34 |
logger.success(f"Saving to: [{self.output_path}]")
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
def fetch(self, url, overwrite=False, output_parent=None):
|
| 39 |
self.url = url
|
|
|
|
| 22 |
return False
|
| 23 |
|
| 24 |
def send_request(self):
|
| 25 |
+
try:
|
| 26 |
+
self.request_response = requests.get(
|
| 27 |
+
url=self.url,
|
| 28 |
+
headers=REQUESTS_HEADERS,
|
| 29 |
+
proxies=self.enver.requests_proxies,
|
| 30 |
+
timeout=15,
|
| 31 |
+
)
|
| 32 |
+
except:
|
| 33 |
+
logger.warn(f"Failed to fetch: [{self.url}]")
|
| 34 |
+
self.request_response = None
|
| 35 |
|
| 36 |
def save_response(self):
|
| 37 |
if not self.output_path.exists():
|
| 38 |
self.output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 39 |
logger.success(f"Saving to: [{self.output_path}]")
|
| 40 |
+
|
| 41 |
+
if self.request_response is None:
|
| 42 |
+
return
|
| 43 |
+
else:
|
| 44 |
+
with open(self.output_path, "wb") as wf:
|
| 45 |
+
wf.write(self.request_response.content)
|
| 46 |
|
| 47 |
def fetch(self, url, overwrite=False, output_parent=None):
|
| 48 |
self.url = url
|