Jirka Borovec glenn-jocher commited on
Commit
00917a6
1 Parent(s): 0ada058

update expt name comment and folder parsing for training (#978)

Browse files

* comment

* fix parsing

* fix evolve

* folder

* tqdm

* Update train.py

* Update train.py

* reinstate anchors into meta dict

anchor evolution is working correctly now

* reinstate logger

prefer the single line readout for concise logging, which helps simplify notebook and tutorials etc.

Co-authored-by: Glenn Jocher <[email protected]>

Files changed (2) hide show
  1. train.py +7 -6
  2. utils/general.py +7 -3
train.py CHANGED
@@ -207,7 +207,8 @@ def train(hyp, opt, device, tb_writer=None):
207
  results = (0, 0, 0, 0, 0, 0, 0) # P, R, [email protected], [email protected], val_loss(box, obj, cls)
208
  scheduler.last_epoch = start_epoch - 1 # do not move
209
  scaler = amp.GradScaler(enabled=cuda)
210
- logger.info('Image sizes %g train, %g test\nUsing %g dataloader workers\nLogging results to %s\n'
 
211
  'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, log_dir, epochs))
212
  for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------
213
  model.train()
@@ -393,7 +394,7 @@ if __name__ == '__main__':
393
  parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
394
  parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')
395
  parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
396
- parser.add_argument('--name', default='', help='renames results.txt to results_name.txt if supplied')
397
  parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
398
  parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
399
  parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
@@ -448,7 +449,7 @@ if __name__ == '__main__':
448
  if not opt.evolve:
449
  tb_writer = None
450
  if opt.global_rank in [-1, 0]:
451
- logger.info('Start Tensorboard with "tensorboard --logdir %s", view at http://localhost:6006/' % opt.logdir)
452
  tb_writer = SummaryWriter(log_dir=log_dir) # runs/exp0
453
 
454
  train(hyp, opt, device, tb_writer)
@@ -488,7 +489,7 @@ if __name__ == '__main__':
488
  assert opt.local_rank == -1, 'DDP mode not implemented for --evolve'
489
  opt.notest, opt.nosave = True, True # only test/save final epoch
490
  # ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices
491
- yaml_file = Path('runs/evolve/hyp_evolved.yaml') # save best result here
492
  if opt.bucket:
493
  os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists
494
 
@@ -532,5 +533,5 @@ if __name__ == '__main__':
532
 
533
  # Plot results
534
  plot_evolution(yaml_file)
535
- print('Hyperparameter evolution complete. Best results saved as: %s\nCommand to train a new model with these '
536
- 'hyperparameters: $ python train.py --hyp %s' % (yaml_file, yaml_file))
 
207
  results = (0, 0, 0, 0, 0, 0, 0) # P, R, [email protected], [email protected], val_loss(box, obj, cls)
208
  scheduler.last_epoch = start_epoch - 1 # do not move
209
  scaler = amp.GradScaler(enabled=cuda)
210
+ logger.info('Image sizes %g train, %g test\n'
211
+ 'Using %g dataloader workers\nLogging results to %s\n'
212
  'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, log_dir, epochs))
213
  for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------
214
  model.train()
 
394
  parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
395
  parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')
396
  parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
397
+ parser.add_argument('--name', default='', help='renames experiment folder exp{N} to exp{N}_{name} if supplied')
398
  parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
399
  parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
400
  parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
 
449
  if not opt.evolve:
450
  tb_writer = None
451
  if opt.global_rank in [-1, 0]:
452
+ logger.info(f'Start Tensorboard with "tensorboard --logdir {opt.logdir}", view at http://localhost:6006/')
453
  tb_writer = SummaryWriter(log_dir=log_dir) # runs/exp0
454
 
455
  train(hyp, opt, device, tb_writer)
 
489
  assert opt.local_rank == -1, 'DDP mode not implemented for --evolve'
490
  opt.notest, opt.nosave = True, True # only test/save final epoch
491
  # ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices
492
+ yaml_file = Path(opt.logdir) / 'evolve' / 'hyp_evolved.yaml' # save best result here
493
  if opt.bucket:
494
  os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists
495
 
 
533
 
534
  # Plot results
535
  plot_evolution(yaml_file)
536
+ print(f'Hyperparameter evolution complete. Best results saved as: {yaml_file}\n'
537
+ f'Command to train a new model with these hyperparameters: $ python train.py --hyp {yaml_file}')
utils/general.py CHANGED
@@ -7,6 +7,7 @@ import random
7
  import shutil
8
  import subprocess
9
  import time
 
10
  from contextlib import contextmanager
11
  from copy import copy
12
  from pathlib import Path
@@ -952,9 +953,12 @@ def increment_dir(dir, comment=''):
952
  # Increments a directory runs/exp1 --> runs/exp2_comment
953
  n = 0 # number
954
  dir = str(Path(dir)) # os-agnostic
955
- d = sorted(glob.glob(dir + '*')) # directories
956
- if len(d):
957
- n = max([int(x[len(dir):x.rfind('_') if '_' in Path(x).name else None]) for x in d]) + 1 # increment
 
 
 
958
  return dir + str(n) + ('_' + comment if comment else '')
959
 
960
 
 
7
  import shutil
8
  import subprocess
9
  import time
10
+ import re
11
  from contextlib import contextmanager
12
  from copy import copy
13
  from pathlib import Path
 
953
  # Increments a directory runs/exp1 --> runs/exp2_comment
954
  n = 0 # number
955
  dir = str(Path(dir)) # os-agnostic
956
+ dirs = sorted(glob.glob(dir + '*')) # directories
957
+ if dirs:
958
+ matches = [re.search(r"exp(\d+)", d) for d in dirs]
959
+ idxs = [int(m.groups()[0]) for m in matches if m]
960
+ if idxs:
961
+ n = max(idxs) + 1 # increment
962
  return dir + str(n) + ('_' + comment if comment else '')
963
 
964