Skip to content

Commit c3ae4e4

Browse files
authored
Multi-threaded VisDrone and VOC downloads (#7108)
* Multi-threaded VOC download * Update VOC.yaml * Update * Update general.py * Update general.py
1 parent ecc2c7b commit c3ae4e4

File tree

7 files changed

+13
-6
lines changed

7 files changed

+13
-6
lines changed

data/GlobalWheat2020.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ names: ['wheat_head'] # class names
3434
download: |
3535
from utils.general import download, Path
3636
37+
3738
# Download
3839
dir = Path(yaml['path']) # dataset root dir
3940
urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',

data/Objects365.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ download: |
6565
6666
from utils.general import Path, download, np, xyxy2xywhn
6767
68+
6869
# Make Directories
6970
dir = Path(yaml['path']) # dataset root dir
7071
for p in 'images', 'labels':

data/SKU-110K.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ download: |
2424
from tqdm import tqdm
2525
from utils.general import np, pd, Path, download, xyxy2xywh
2626
27+
2728
# Download
2829
dir = Path(yaml['path']) # dataset root dir
2930
parent = Path(dir.parent) # download dir

data/VOC.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ download: |
6262
urls = [url + 'VOCtrainval_06-Nov-2007.zip', # 446MB, 5012 images
6363
url + 'VOCtest_06-Nov-2007.zip', # 438MB, 4953 images
6464
url + 'VOCtrainval_11-May-2012.zip'] # 1.95GB, 17126 images
65-
download(urls, dir=dir / 'images', delete=False)
65+
download(urls, dir=dir / 'images', delete=False, threads=3)
6666
6767
# Convert
6868
path = dir / f'images/VOCdevkit'

data/VisDrone.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ download: |
5454
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
5555
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',
5656
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip']
57-
download(urls, dir=dir)
57+
download(urls, dir=dir, threads=4)
5858
5959
# Convert
6060
for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':

data/coco.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 't
3030
download: |
3131
from utils.general import download, Path
3232
33+
3334
# Download labels
3435
segments = False # segment or box labels
3536
dir = Path(yaml['path']) # dataset root dir

utils/general.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -449,8 +449,9 @@ def check_dataset(data, autodownload=True):
449449
if val:
450450
val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
451451
if not all(x.exists() for x in val):
452-
LOGGER.info('\nDataset not found, missing paths: %s' % [str(x) for x in val if not x.exists()])
452+
LOGGER.info(emojis('\nDataset not found ⚠️, missing paths %s' % [str(x) for x in val if not x.exists()]))
453453
if s and autodownload: # download script
454+
t = time.time()
454455
root = path.parent if 'path' in data else '..' # unzip directory i.e. '../'
455456
if s.startswith('http') and s.endswith('.zip'): # URL
456457
f = Path(s).name # filename
@@ -465,9 +466,11 @@ def check_dataset(data, autodownload=True):
465466
r = os.system(s)
466467
else: # python script
467468
r = exec(s, {'yaml': data}) # return None
468-
LOGGER.info(f"Dataset autodownload {f'success, saved to {root}' if r in (0, None) else 'failure'}\n")
469+
dt = f'({round(time.time() - t, 1)}s)'
470+
s = f"success ✅ {dt}, saved to {colorstr('bold', root)}" if r in (0, None) else f"failure {dt} ❌"
471+
LOGGER.info(emojis(f"Dataset download {s}"))
469472
else:
470-
raise Exception('Dataset not found.')
473+
raise Exception(emojis('Dataset not found ❌'))
471474

472475
return data # dictionary
473476

@@ -491,7 +494,7 @@ def download_one(url, dir):
491494
if curl:
492495
os.system(f"curl -L '{url}' -o '{f}' --retry 9 -C -") # curl download, retry and resume on fail
493496
else:
494-
torch.hub.download_url_to_file(url, f, progress=True) # torch download
497+
torch.hub.download_url_to_file(url, f, progress=threads == 1) # torch download
495498
if unzip and f.suffix in ('.zip', '.gz'):
496499
LOGGER.info(f'Unzipping {f}...')
497500
if f.suffix == '.zip':

0 commit comments

Comments
 (0)