stefan34567 glenn-jocher commited on
Commit
187f7c2
1 Parent(s): 96fcde4

Change optimizer parameters group method (#1239)

Browse files

* Change optimizer parameters group method

* Add torch nn

* Change isinstance method(torch.Tensor to nn.Parameter)

* parameter freeze fix, PEP8 reformat

* freeze bug fix

Co-authored-by: Glenn Jocher <[email protected]>

Files changed (1) hide show
  1. train.py +14 -14
train.py CHANGED
@@ -10,6 +10,7 @@ from warnings import warn
10
  import math
11
  import numpy as np
12
  import torch.distributed as dist
 
13
  import torch.nn.functional as F
14
  import torch.optim as optim
15
  import torch.optim.lr_scheduler as lr_scheduler
@@ -80,12 +81,12 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
80
  model = Model(opt.cfg, ch=3, nc=nc).to(device) # create
81
 
82
  # Freeze
83
- freeze = ['', ] # parameter names to freeze (full or partial)
84
- if any(freeze):
85
- for k, v in model.named_parameters():
86
- if any(x in k for x in freeze):
87
- print('freezing %s' % k)
88
- v.requires_grad = False
89
 
90
  # Optimizer
91
  nbs = 64 # nominal batch size
@@ -93,14 +94,13 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
93
  hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay
94
 
95
  pg0, pg1, pg2 = [], [], [] # optimizer parameter groups
96
- for k, v in model.named_parameters():
97
- v.requires_grad = True
98
- if '.bias' in k:
99
- pg2.append(v) # biases
100
- elif '.weight' in k and '.bn' not in k:
101
- pg1.append(v) # apply weight decay
102
- else:
103
- pg0.append(v) # all else
104
 
105
  if opt.adam:
106
  optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum
 
10
  import math
11
  import numpy as np
12
  import torch.distributed as dist
13
+ import torch.nn as nn
14
  import torch.nn.functional as F
15
  import torch.optim as optim
16
  import torch.optim.lr_scheduler as lr_scheduler
 
81
  model = Model(opt.cfg, ch=3, nc=nc).to(device) # create
82
 
83
  # Freeze
84
+ freeze = [] # parameter names to freeze (full or partial)
85
+ for k, v in model.named_parameters():
86
+ v.requires_grad = True # train all layers
87
+ if any(x in k for x in freeze):
88
+ print('freezing %s' % k)
89
+ v.requires_grad = False
90
 
91
  # Optimizer
92
  nbs = 64 # nominal batch size
 
94
  hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay
95
 
96
  pg0, pg1, pg2 = [], [], [] # optimizer parameter groups
97
+ for k, v in model.named_modules():
98
+ if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):
99
+ pg2.append(v.bias) # biases
100
+ if isinstance(v, nn.BatchNorm2d):
101
+ pg0.append(v.weight) # no decay
102
+ elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):
103
+ pg1.append(v.weight) # apply decay
 
104
 
105
  if opt.adam:
106
  optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum