Replace `os.system('unzip file.zip')` -> `ZipFile.extractall()` (#4919)
Browse files* Replace `os.system('unzip file.zip')` -> `ZipFile.extractall()`
* Cleanup
- utils/datasets.py +3 -2
- utils/downloads.py +3 -2
- utils/general.py +10 -8
utils/datasets.py
CHANGED
@@ -15,6 +15,7 @@ from itertools import repeat
|
|
15 |
from multiprocessing.pool import ThreadPool, Pool
|
16 |
from pathlib import Path
|
17 |
from threading import Thread
|
|
|
18 |
|
19 |
import cv2
|
20 |
import numpy as np
|
@@ -928,8 +929,8 @@ def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False, profil
|
|
928 |
# Unzip data.zip TODO: CONSTRAINT: path/to/abc.zip MUST unzip to 'path/to/abc/'
|
929 |
if str(path).endswith('.zip'): # path is data.zip
|
930 |
assert Path(path).is_file(), f'Error unzipping {path}, file not found'
|
931 |
-
|
932 |
-
dir = path.with_suffix('') # dataset directory
|
933 |
return True, str(dir), next(dir.rglob('*.yaml')) # zipped, data_dir, yaml_path
|
934 |
else: # path is data.yaml
|
935 |
return False, None, path
|
|
|
15 |
from multiprocessing.pool import ThreadPool, Pool
|
16 |
from pathlib import Path
|
17 |
from threading import Thread
|
18 |
+
from zipfile import ZipFile
|
19 |
|
20 |
import cv2
|
21 |
import numpy as np
|
|
|
929 |
# Unzip data.zip TODO: CONSTRAINT: path/to/abc.zip MUST unzip to 'path/to/abc/'
|
930 |
if str(path).endswith('.zip'): # path is data.zip
|
931 |
assert Path(path).is_file(), f'Error unzipping {path}, file not found'
|
932 |
+
ZipFile(path).extractall(path=path.parent) # unzip
|
933 |
+
dir = path.with_suffix('') # dataset directory == zip name
|
934 |
return True, str(dir), next(dir.rglob('*.yaml')) # zipped, data_dir, yaml_path
|
935 |
else: # path is data.yaml
|
936 |
return False, None, path
|
utils/downloads.py
CHANGED
@@ -9,6 +9,7 @@ import subprocess
|
|
9 |
import time
|
10 |
import urllib
|
11 |
from pathlib import Path
|
|
|
12 |
|
13 |
import requests
|
14 |
import torch
|
@@ -104,8 +105,8 @@ def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'):
|
|
104 |
# Unzip if archive
|
105 |
if file.suffix == '.zip':
|
106 |
print('unzipping... ', end='')
|
107 |
-
|
108 |
-
file.unlink() # remove zip
|
109 |
|
110 |
print(f'Done ({time.time() - t:.1f}s)')
|
111 |
return r
|
|
|
9 |
import time
|
10 |
import urllib
|
11 |
from pathlib import Path
|
12 |
+
from zipfile import ZipFile
|
13 |
|
14 |
import requests
|
15 |
import torch
|
|
|
105 |
# Unzip if archive
|
106 |
if file.suffix == '.zip':
|
107 |
print('unzipping... ', end='')
|
108 |
+
ZipFile(file).extractall(path=file.parent) # unzip
|
109 |
+
file.unlink() # remove zip
|
110 |
|
111 |
print(f'Done ({time.time() - t:.1f}s)')
|
112 |
return r
|
utils/general.py
CHANGED
@@ -18,6 +18,7 @@ from itertools import repeat
|
|
18 |
from multiprocessing.pool import ThreadPool
|
19 |
from pathlib import Path
|
20 |
from subprocess import check_output
|
|
|
21 |
|
22 |
import cv2
|
23 |
import numpy as np
|
@@ -353,17 +354,19 @@ def check_dataset(data, autodownload=True):
|
|
353 |
if s and autodownload: # download script
|
354 |
if s.startswith('http') and s.endswith('.zip'): # URL
|
355 |
f = Path(s).name # filename
|
356 |
-
print(f'Downloading {s} ...')
|
357 |
torch.hub.download_url_to_file(s, f)
|
358 |
root = path.parent if 'path' in data else '..' # unzip directory i.e. '../'
|
359 |
Path(root).mkdir(parents=True, exist_ok=True) # create root
|
360 |
-
|
|
|
|
|
361 |
elif s.startswith('bash '): # bash script
|
362 |
print(f'Running {s} ...')
|
363 |
r = os.system(s)
|
364 |
else: # python script
|
365 |
r = exec(s, {'yaml': data}) # return None
|
366 |
-
print(
|
367 |
else:
|
368 |
raise Exception('Dataset not found.')
|
369 |
|
@@ -393,12 +396,11 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1):
|
|
393 |
if unzip and f.suffix in ('.zip', '.gz'):
|
394 |
print(f'Unzipping {f}...')
|
395 |
if f.suffix == '.zip':
|
396 |
-
|
397 |
elif f.suffix == '.gz':
|
398 |
-
|
399 |
-
if delete:
|
400 |
-
|
401 |
-
os.system(s)
|
402 |
|
403 |
dir = Path(dir)
|
404 |
dir.mkdir(parents=True, exist_ok=True) # make directory
|
|
|
18 |
from multiprocessing.pool import ThreadPool
|
19 |
from pathlib import Path
|
20 |
from subprocess import check_output
|
21 |
+
from zipfile import ZipFile
|
22 |
|
23 |
import cv2
|
24 |
import numpy as np
|
|
|
354 |
if s and autodownload: # download script
|
355 |
if s.startswith('http') and s.endswith('.zip'): # URL
|
356 |
f = Path(s).name # filename
|
357 |
+
print(f'Downloading {s} to {f}...')
|
358 |
torch.hub.download_url_to_file(s, f)
|
359 |
root = path.parent if 'path' in data else '..' # unzip directory i.e. '../'
|
360 |
Path(root).mkdir(parents=True, exist_ok=True) # create root
|
361 |
+
ZipFile(f).extractall(path=root) # unzip
|
362 |
+
Path(f).unlink() # remove zip
|
363 |
+
r = None # success
|
364 |
elif s.startswith('bash '): # bash script
|
365 |
print(f'Running {s} ...')
|
366 |
r = os.system(s)
|
367 |
else: # python script
|
368 |
r = exec(s, {'yaml': data}) # return None
|
369 |
+
print(f"Dataset autodownload {f'success, saved to {root}' if r in (0, None) else 'failure'}")
|
370 |
else:
|
371 |
raise Exception('Dataset not found.')
|
372 |
|
|
|
396 |
if unzip and f.suffix in ('.zip', '.gz'):
|
397 |
print(f'Unzipping {f}...')
|
398 |
if f.suffix == '.zip':
|
399 |
+
ZipFile(f).extractall(path=dir) # unzip
|
400 |
elif f.suffix == '.gz':
|
401 |
+
os.system(f'tar xfz {f} --directory {f.parent}') # unzip
|
402 |
+
if delete:
|
403 |
+
f.unlink() # remove zip
|
|
|
404 |
|
405 |
dir = Path(dir)
|
406 |
dir.mkdir(parents=True, exist_ok=True) # make directory
|