glenn-jocher commited on
Commit
f8651c3
·
unverified ·
1 Parent(s): 3cb9ad4

Parse URL authentication (#3424)

Browse files

* Parse URL authentication

* urllib.parse.unquote()

* improved error handling

* improved error handling

* remove %3F

* update check_file()

Files changed (2) hide show
  1. utils/general.py +3 -1
  2. utils/google_utils.py +10 -7
utils/general.py CHANGED
@@ -9,6 +9,7 @@ import random
9
  import re
10
  import subprocess
11
  import time
 
12
  from itertools import repeat
13
  from multiprocessing.pool import ThreadPool
14
  from pathlib import Path
@@ -183,7 +184,8 @@ def check_file(file):
183
  if Path(file).is_file() or file == '': # exists
184
  return file
185
  elif file.startswith(('http://', 'https://')): # download
186
- url, file = file, Path(file).name
 
187
  print(f'Downloading {url} to {file}...')
188
  torch.hub.download_url_to_file(url, file)
189
  assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}' # check
 
9
  import re
10
  import subprocess
11
  import time
12
+ import urllib
13
  from itertools import repeat
14
  from multiprocessing.pool import ThreadPool
15
  from pathlib import Path
 
184
  if Path(file).is_file() or file == '': # exists
185
  return file
186
  elif file.startswith(('http://', 'https://')): # download
187
+ url, file = file, Path(urllib.parse.unquote(str(file))).name # url, file (decode '%2F' to '/' etc.)
188
+ file = file.split('?')[0] # parse authentication https://url.com/file.txt?auth...
189
  print(f'Downloading {url} to {file}...')
190
  torch.hub.download_url_to_file(url, file)
191
  assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}' # check
utils/google_utils.py CHANGED
@@ -4,6 +4,7 @@ import os
4
  import platform
5
  import subprocess
6
  import time
 
7
  from pathlib import Path
8
 
9
  import requests
@@ -19,30 +20,32 @@ def gsutil_getsize(url=''):
19
  def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''):
20
  # Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes
21
  file = Path(file)
22
- try: # GitHub
 
23
  print(f'Downloading {url} to {file}...')
24
  torch.hub.download_url_to_file(url, str(file))
25
- assert file.exists() and file.stat().st_size > min_bytes # check
26
- except Exception as e: # GCP
27
  file.unlink(missing_ok=True) # remove partial downloads
28
- print(f'Download error: {e}\nRe-attempting {url2 or url} to {file}...')
29
  os.system(f"curl -L '{url2 or url}' -o '{file}' --retry 3 -C -") # curl download, retry and resume on fail
30
  finally:
31
  if not file.exists() or file.stat().st_size < min_bytes: # check
32
  file.unlink(missing_ok=True) # remove partial downloads
33
- print(f'ERROR: Download failure: {error_msg or url}')
34
  print('')
35
 
36
 
37
- def attempt_download(file, repo='ultralytics/yolov5'):
38
  # Attempt file download if does not exist
39
  file = Path(str(file).strip().replace("'", ''))
40
 
41
  if not file.exists():
42
  # URL specified
43
- name = file.name
44
  if str(file).startswith(('http:/', 'https:/')): # download
45
  url = str(file).replace(':/', '://') # Pathlib turns :// -> :/
 
46
  safe_download(file=name, url=url, min_bytes=1E5)
47
  return name
48
 
 
4
  import platform
5
  import subprocess
6
  import time
7
+ import urllib
8
  from pathlib import Path
9
 
10
  import requests
 
20
  def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''):
21
  # Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes
22
  file = Path(file)
23
+ assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}"
24
+ try: # url1
25
  print(f'Downloading {url} to {file}...')
26
  torch.hub.download_url_to_file(url, str(file))
27
+ assert file.exists() and file.stat().st_size > min_bytes, assert_msg # check
28
+ except Exception as e: # url2
29
  file.unlink(missing_ok=True) # remove partial downloads
30
+ print(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...')
31
  os.system(f"curl -L '{url2 or url}' -o '{file}' --retry 3 -C -") # curl download, retry and resume on fail
32
  finally:
33
  if not file.exists() or file.stat().st_size < min_bytes: # check
34
  file.unlink(missing_ok=True) # remove partial downloads
35
+ print(f"ERROR: {assert_msg}\n{error_msg}")
36
  print('')
37
 
38
 
39
+ def attempt_download(file, repo='ultralytics/yolov5'): # from utils.google_utils import *; attempt_download()
40
  # Attempt file download if does not exist
41
  file = Path(str(file).strip().replace("'", ''))
42
 
43
  if not file.exists():
44
  # URL specified
45
+ name = Path(urllib.parse.unquote(str(file))).name # decode '%2F' to '/' etc.
46
  if str(file).startswith(('http:/', 'https:/')): # download
47
  url = str(file).replace(':/', '://') # Pathlib turns :// -> :/
48
+ name = name.split('?')[0] # parse authentication https://url.com/file.txt?auth...
49
  safe_download(file=name, url=url, min_bytes=1E5)
50
  return name
51