Commit
•
00917a6
1
Parent(s):
0ada058
update expt name comment and folder parsing for training (#978)
Browse files* comment
* fix parsing
* fix evolve
* folder
* tqdm
* Update train.py
* Update train.py
* reinstate anchors into meta dict
anchor evolution is working correctly now
* reinstate logger
prefer the single line readout for concise logging, which helps simplify notebook and tutorials etc.
Co-authored-by: Glenn Jocher <[email protected]>
- train.py +7 -6
- utils/general.py +7 -3
train.py
CHANGED
@@ -207,7 +207,8 @@ def train(hyp, opt, device, tb_writer=None):
|
|
207 |
results = (0, 0, 0, 0, 0, 0, 0) # P, R, [email protected], [email protected], val_loss(box, obj, cls)
|
208 |
scheduler.last_epoch = start_epoch - 1 # do not move
|
209 |
scaler = amp.GradScaler(enabled=cuda)
|
210 |
-
logger.info('Image sizes %g train, %g test\
|
|
|
211 |
'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, log_dir, epochs))
|
212 |
for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------
|
213 |
model.train()
|
@@ -393,7 +394,7 @@ if __name__ == '__main__':
|
|
393 |
parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
|
394 |
parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')
|
395 |
parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
|
396 |
-
parser.add_argument('--name', default='', help='renames
|
397 |
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
398 |
parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
|
399 |
parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
|
@@ -448,7 +449,7 @@ if __name__ == '__main__':
|
|
448 |
if not opt.evolve:
|
449 |
tb_writer = None
|
450 |
if opt.global_rank in [-1, 0]:
|
451 |
-
logger.info('Start Tensorboard with "tensorboard --logdir
|
452 |
tb_writer = SummaryWriter(log_dir=log_dir) # runs/exp0
|
453 |
|
454 |
train(hyp, opt, device, tb_writer)
|
@@ -488,7 +489,7 @@ if __name__ == '__main__':
|
|
488 |
assert opt.local_rank == -1, 'DDP mode not implemented for --evolve'
|
489 |
opt.notest, opt.nosave = True, True # only test/save final epoch
|
490 |
# ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices
|
491 |
-
yaml_file = Path('
|
492 |
if opt.bucket:
|
493 |
os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists
|
494 |
|
@@ -532,5 +533,5 @@ if __name__ == '__main__':
|
|
532 |
|
533 |
# Plot results
|
534 |
plot_evolution(yaml_file)
|
535 |
-
print('Hyperparameter evolution complete. Best results saved as:
|
536 |
-
'hyperparameters: $ python train.py --hyp
|
|
|
207 |
results = (0, 0, 0, 0, 0, 0, 0) # P, R, [email protected], [email protected], val_loss(box, obj, cls)
|
208 |
scheduler.last_epoch = start_epoch - 1 # do not move
|
209 |
scaler = amp.GradScaler(enabled=cuda)
|
210 |
+
logger.info('Image sizes %g train, %g test\n'
|
211 |
+
'Using %g dataloader workers\nLogging results to %s\n'
|
212 |
'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, log_dir, epochs))
|
213 |
for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------
|
214 |
model.train()
|
|
|
394 |
parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
|
395 |
parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')
|
396 |
parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
|
397 |
+
parser.add_argument('--name', default='', help='renames experiment folder exp{N} to exp{N}_{name} if supplied')
|
398 |
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
399 |
parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
|
400 |
parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
|
|
|
449 |
if not opt.evolve:
|
450 |
tb_writer = None
|
451 |
if opt.global_rank in [-1, 0]:
|
452 |
+
logger.info(f'Start Tensorboard with "tensorboard --logdir {opt.logdir}", view at http://localhost:6006/')
|
453 |
tb_writer = SummaryWriter(log_dir=log_dir) # runs/exp0
|
454 |
|
455 |
train(hyp, opt, device, tb_writer)
|
|
|
489 |
assert opt.local_rank == -1, 'DDP mode not implemented for --evolve'
|
490 |
opt.notest, opt.nosave = True, True # only test/save final epoch
|
491 |
# ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices
|
492 |
+
yaml_file = Path(opt.logdir) / 'evolve' / 'hyp_evolved.yaml' # save best result here
|
493 |
if opt.bucket:
|
494 |
os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists
|
495 |
|
|
|
533 |
|
534 |
# Plot results
|
535 |
plot_evolution(yaml_file)
|
536 |
+
print(f'Hyperparameter evolution complete. Best results saved as: {yaml_file}\n'
|
537 |
+
f'Command to train a new model with these hyperparameters: $ python train.py --hyp {yaml_file}')
|
utils/general.py
CHANGED
@@ -7,6 +7,7 @@ import random
|
|
7 |
import shutil
|
8 |
import subprocess
|
9 |
import time
|
|
|
10 |
from contextlib import contextmanager
|
11 |
from copy import copy
|
12 |
from pathlib import Path
|
@@ -952,9 +953,12 @@ def increment_dir(dir, comment=''):
|
|
952 |
# Increments a directory runs/exp1 --> runs/exp2_comment
|
953 |
n = 0 # number
|
954 |
dir = str(Path(dir)) # os-agnostic
|
955 |
-
|
956 |
-
if
|
957 |
-
|
|
|
|
|
|
|
958 |
return dir + str(n) + ('_' + comment if comment else '')
|
959 |
|
960 |
|
|
|
7 |
import shutil
|
8 |
import subprocess
|
9 |
import time
|
10 |
+
import re
|
11 |
from contextlib import contextmanager
|
12 |
from copy import copy
|
13 |
from pathlib import Path
|
|
|
953 |
# Increments a directory runs/exp1 --> runs/exp2_comment
|
954 |
n = 0 # number
|
955 |
dir = str(Path(dir)) # os-agnostic
|
956 |
+
dirs = sorted(glob.glob(dir + '*')) # directories
|
957 |
+
if dirs:
|
958 |
+
matches = [re.search(r"exp(\d+)", d) for d in dirs]
|
959 |
+
idxs = [int(m.groups()[0]) for m in matches if m]
|
960 |
+
if idxs:
|
961 |
+
n = max(idxs) + 1 # increment
|
962 |
return dir + str(n) + ('_' + comment if comment else '')
|
963 |
|
964 |
|