-
-
Notifications
You must be signed in to change notification settings - Fork 17.3k
update expt name comment and folder parsing for training #978
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
360d360
9f63d84
5e9a72b
27ddecd
4cebfc8
e10f144
6f56e4a
aabe4f7
8a32d25
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -6,6 +6,7 @@ | |||||||||
import shutil | ||||||||||
import time | ||||||||||
from pathlib import Path | ||||||||||
from pprint import pprint | ||||||||||
|
||||||||||
import numpy as np | ||||||||||
import torch.distributed as dist | ||||||||||
|
@@ -207,7 +208,8 @@ def train(hyp, opt, device, tb_writer=None): | |||||||||
results = (0, 0, 0, 0, 0, 0, 0) # P, R, [email protected], [email protected], val_loss(box, obj, cls) | ||||||||||
scheduler.last_epoch = start_epoch - 1 # do not move | ||||||||||
scaler = amp.GradScaler(enabled=cuda) | ||||||||||
logger.info('Image sizes %g train, %g test\nUsing %g dataloader workers\nLogging results to %s\n' | ||||||||||
logger.info('Image sizes %g train, %g test\n' | ||||||||||
'Using %g dataloader workers\nLogging results to %s\n' | ||||||||||
'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, log_dir, epochs)) | ||||||||||
for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ | ||||||||||
model.train() | ||||||||||
|
@@ -393,7 +395,7 @@ def train(hyp, opt, device, tb_writer=None): | |||||||||
parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') | ||||||||||
parser.add_argument('--cache-images', action='store_true', help='cache images for faster training') | ||||||||||
parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training') | ||||||||||
parser.add_argument('--name', default='', help='renames results.txt to results_name.txt if supplied') | ||||||||||
parser.add_argument('--name', default='', help='renames experiment folder exp{N} to exp{N}_{name} if supplied') | ||||||||||
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') | ||||||||||
parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') | ||||||||||
parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') | ||||||||||
|
@@ -440,15 +442,15 @@ def train(hyp, opt, device, tb_writer=None): | |||||||||
assert opt.batch_size % opt.world_size == 0, '--batch-size must be multiple of CUDA device count' | ||||||||||
opt.batch_size = opt.total_batch_size // opt.world_size | ||||||||||
|
||||||||||
logger.info(opt) | ||||||||||
pprint(vars(opt)) | ||||||||||
with open(opt.hyp) as f: | ||||||||||
hyp = yaml.load(f, Loader=yaml.FullLoader) # load hyps | ||||||||||
|
||||||||||
# Train | ||||||||||
if not opt.evolve: | ||||||||||
tb_writer = None | ||||||||||
if opt.global_rank in [-1, 0]: | ||||||||||
logger.info('Start Tensorboard with "tensorboard --logdir %s", view at http://localhost:6006/' % opt.logdir) | ||||||||||
logger.info(f'Start Tensorboard with "tensorboard --logdir {opt.logdir}", view at http://localhost:6006/') | ||||||||||
tb_writer = SummaryWriter(log_dir=log_dir) # runs/exp0 | ||||||||||
|
||||||||||
train(hyp, opt, device, tb_writer) | ||||||||||
|
@@ -470,7 +472,8 @@ def train(hyp, opt, device, tb_writer=None): | |||||||||
'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight | ||||||||||
'iou_t': (0, 0.1, 0.7), # IoU training threshold | ||||||||||
'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold | ||||||||||
'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore) | ||||||||||
# temp fix for https://github.com/ultralytics/yolov5/issues/607#issuecomment-692589883 | ||||||||||
# 'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore) | ||||||||||
'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5) | ||||||||||
'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction) | ||||||||||
'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction) | ||||||||||
|
@@ -488,11 +491,11 @@ def train(hyp, opt, device, tb_writer=None): | |||||||||
assert opt.local_rank == -1, 'DDP mode not implemented for --evolve' | ||||||||||
opt.notest, opt.nosave = True, True # only test/save final epoch | ||||||||||
# ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices | ||||||||||
yaml_file = Path('runs/evolve/hyp_evolved.yaml') # save best result here | ||||||||||
yaml_file = Path(opt.logdir) / 'evolve' / 'hyp_evolved.yaml' # save best result here | ||||||||||
Borda marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||
if opt.bucket: | ||||||||||
os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists | ||||||||||
|
||||||||||
for _ in range(300): # generations to evolve | ||||||||||
for _ in tqdm(range(300), desc='perform evolve >>'): # generations to evolve | ||||||||||
|
# Hyperparameter Evolution Results | |
# Generations: 306 | |
# P R mAP.5 mAP.5:.95 box obj cls | |
# Metrics: 0.6 0.936 0.896 0.684 0.0115 0.00805 0.00146 |
This method also helps you keep track of distributed evolution progress using the example in #607, where multiple single-GPU processes can evolve to the same central evolve.txt and hyp file.
Uh oh!
There was an error while loading. Please reload this page.