Commit
•
187f7c2
1
Parent(s):
96fcde4
Change optimizer parameters group method (#1239)
Browse files* Change optimizer parameters group method
* Add torch nn
* Change isinstance method(torch.Tensor to nn.Parameter)
* parameter freeze fix, PEP8 reformat
* freeze bug fix
Co-authored-by: Glenn Jocher <[email protected]>
train.py
CHANGED
@@ -10,6 +10,7 @@ from warnings import warn
|
|
10 |
import math
|
11 |
import numpy as np
|
12 |
import torch.distributed as dist
|
|
|
13 |
import torch.nn.functional as F
|
14 |
import torch.optim as optim
|
15 |
import torch.optim.lr_scheduler as lr_scheduler
|
@@ -80,12 +81,12 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
|
|
80 |
model = Model(opt.cfg, ch=3, nc=nc).to(device) # create
|
81 |
|
82 |
# Freeze
|
83 |
-
freeze = [
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
|
90 |
# Optimizer
|
91 |
nbs = 64 # nominal batch size
|
@@ -93,14 +94,13 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
|
|
93 |
hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay
|
94 |
|
95 |
pg0, pg1, pg2 = [], [], [] # optimizer parameter groups
|
96 |
-
for k, v in model.
|
97 |
-
v.
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
pg0.append(v) # all else
|
104 |
|
105 |
if opt.adam:
|
106 |
optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum
|
|
|
10 |
import math
|
11 |
import numpy as np
|
12 |
import torch.distributed as dist
|
13 |
+
import torch.nn as nn
|
14 |
import torch.nn.functional as F
|
15 |
import torch.optim as optim
|
16 |
import torch.optim.lr_scheduler as lr_scheduler
|
|
|
81 |
model = Model(opt.cfg, ch=3, nc=nc).to(device) # create
|
82 |
|
83 |
# Freeze
|
84 |
+
freeze = [] # parameter names to freeze (full or partial)
|
85 |
+
for k, v in model.named_parameters():
|
86 |
+
v.requires_grad = True # train all layers
|
87 |
+
if any(x in k for x in freeze):
|
88 |
+
print('freezing %s' % k)
|
89 |
+
v.requires_grad = False
|
90 |
|
91 |
# Optimizer
|
92 |
nbs = 64 # nominal batch size
|
|
|
94 |
hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay
|
95 |
|
96 |
pg0, pg1, pg2 = [], [], [] # optimizer parameter groups
|
97 |
+
for k, v in model.named_modules():
|
98 |
+
if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):
|
99 |
+
pg2.append(v.bias) # biases
|
100 |
+
if isinstance(v, nn.BatchNorm2d):
|
101 |
+
pg0.append(v.weight) # no decay
|
102 |
+
elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):
|
103 |
+
pg1.append(v.weight) # apply decay
|
|
|
104 |
|
105 |
if opt.adam:
|
106 |
optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum
|