Parse URL authentication (#3424)
Browse files* Parse URL authentication
* urllib.parse.unquote()
* improved error handling
* improved error handling
* remove %3F
* update check_file()
- utils/general.py +3 -1
- utils/google_utils.py +10 -7
utils/general.py
CHANGED
@@ -9,6 +9,7 @@ import random
|
|
9 |
import re
|
10 |
import subprocess
|
11 |
import time
|
|
|
12 |
from itertools import repeat
|
13 |
from multiprocessing.pool import ThreadPool
|
14 |
from pathlib import Path
|
@@ -183,7 +184,8 @@ def check_file(file):
|
|
183 |
if Path(file).is_file() or file == '': # exists
|
184 |
return file
|
185 |
elif file.startswith(('http://', 'https://')): # download
|
186 |
-
url, file = file, Path(file).name
|
|
|
187 |
print(f'Downloading {url} to {file}...')
|
188 |
torch.hub.download_url_to_file(url, file)
|
189 |
assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}' # check
|
|
|
9 |
import re
|
10 |
import subprocess
|
11 |
import time
|
12 |
+
import urllib
|
13 |
from itertools import repeat
|
14 |
from multiprocessing.pool import ThreadPool
|
15 |
from pathlib import Path
|
|
|
184 |
if Path(file).is_file() or file == '': # exists
|
185 |
return file
|
186 |
elif file.startswith(('http://', 'https://')): # download
|
187 |
+
url, file = file, Path(urllib.parse.unquote(str(file))).name # url, file (decode '%2F' to '/' etc.)
|
188 |
+
file = file.split('?')[0] # parse authentication https://url.com/file.txt?auth...
|
189 |
print(f'Downloading {url} to {file}...')
|
190 |
torch.hub.download_url_to_file(url, file)
|
191 |
assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}' # check
|
utils/google_utils.py
CHANGED
@@ -4,6 +4,7 @@ import os
|
|
4 |
import platform
|
5 |
import subprocess
|
6 |
import time
|
|
|
7 |
from pathlib import Path
|
8 |
|
9 |
import requests
|
@@ -19,30 +20,32 @@ def gsutil_getsize(url=''):
|
|
19 |
def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''):
|
20 |
# Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes
|
21 |
file = Path(file)
|
22 |
-
|
|
|
23 |
print(f'Downloading {url} to {file}...')
|
24 |
torch.hub.download_url_to_file(url, str(file))
|
25 |
-
assert file.exists() and file.stat().st_size > min_bytes # check
|
26 |
-
except Exception as e: #
|
27 |
file.unlink(missing_ok=True) # remove partial downloads
|
28 |
-
print(f'
|
29 |
os.system(f"curl -L '{url2 or url}' -o '{file}' --retry 3 -C -") # curl download, retry and resume on fail
|
30 |
finally:
|
31 |
if not file.exists() or file.stat().st_size < min_bytes: # check
|
32 |
file.unlink(missing_ok=True) # remove partial downloads
|
33 |
-
print(f
|
34 |
print('')
|
35 |
|
36 |
|
37 |
-
def attempt_download(file, repo='ultralytics/yolov5'):
|
38 |
# Attempt file download if does not exist
|
39 |
file = Path(str(file).strip().replace("'", ''))
|
40 |
|
41 |
if not file.exists():
|
42 |
# URL specified
|
43 |
-
name = file.name
|
44 |
if str(file).startswith(('http:/', 'https:/')): # download
|
45 |
url = str(file).replace(':/', '://') # Pathlib turns :// -> :/
|
|
|
46 |
safe_download(file=name, url=url, min_bytes=1E5)
|
47 |
return name
|
48 |
|
|
|
4 |
import platform
|
5 |
import subprocess
|
6 |
import time
|
7 |
+
import urllib
|
8 |
from pathlib import Path
|
9 |
|
10 |
import requests
|
|
|
20 |
def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''):
|
21 |
# Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes
|
22 |
file = Path(file)
|
23 |
+
assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}"
|
24 |
+
try: # url1
|
25 |
print(f'Downloading {url} to {file}...')
|
26 |
torch.hub.download_url_to_file(url, str(file))
|
27 |
+
assert file.exists() and file.stat().st_size > min_bytes, assert_msg # check
|
28 |
+
except Exception as e: # url2
|
29 |
file.unlink(missing_ok=True) # remove partial downloads
|
30 |
+
print(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...')
|
31 |
os.system(f"curl -L '{url2 or url}' -o '{file}' --retry 3 -C -") # curl download, retry and resume on fail
|
32 |
finally:
|
33 |
if not file.exists() or file.stat().st_size < min_bytes: # check
|
34 |
file.unlink(missing_ok=True) # remove partial downloads
|
35 |
+
print(f"ERROR: {assert_msg}\n{error_msg}")
|
36 |
print('')
|
37 |
|
38 |
|
39 |
+
def attempt_download(file, repo='ultralytics/yolov5'): # from utils.google_utils import *; attempt_download()
|
40 |
# Attempt file download if does not exist
|
41 |
file = Path(str(file).strip().replace("'", ''))
|
42 |
|
43 |
if not file.exists():
|
44 |
# URL specified
|
45 |
+
name = Path(urllib.parse.unquote(str(file))).name # decode '%2F' to '/' etc.
|
46 |
if str(file).startswith(('http:/', 'https:/')): # download
|
47 |
url = str(file).replace(':/', '://') # Pathlib turns :// -> :/
|
48 |
+
name = name.split('?')[0] # parse authentication https://url.com/file.txt?auth...
|
49 |
safe_download(file=name, url=url, min_bytes=1E5)
|
50 |
return name
|
51 |
|