Spaces:
Runtime error
Runtime error
# Copyright (c) Microsoft Corporation. | |
# Licensed under the MIT License. | |
import time | |
from collections import OrderedDict | |
from options.train_options import TrainOptions | |
from data.data_loader import CreateDataLoader | |
from models.mapping_model import Pix2PixHDModel_Mapping | |
import util.util as util | |
from util.visualizer import Visualizer | |
import os | |
import numpy as np | |
import torch | |
import torchvision.utils as vutils | |
from torch.autograd import Variable | |
import datetime | |
import random | |
opt = TrainOptions().parse() | |
visualizer = Visualizer(opt) | |
iter_path = os.path.join(opt.checkpoints_dir, opt.name, 'iter.txt') | |
if opt.continue_train: | |
try: | |
start_epoch, epoch_iter = np.loadtxt(iter_path , delimiter=',', dtype=int) | |
except: | |
start_epoch, epoch_iter = 1, 0 | |
visualizer.print_save('Resuming from epoch %d at iteration %d' % (start_epoch-1, epoch_iter)) | |
else: | |
start_epoch, epoch_iter = 1, 0 | |
if opt.which_epoch != "latest": | |
start_epoch=int(opt.which_epoch) | |
visualizer.print_save('Notice : Resuming from epoch %d at iteration %d' % (start_epoch - 1, epoch_iter)) | |
opt.start_epoch=start_epoch | |
### temp for continue train unfixed decoder | |
data_loader = CreateDataLoader(opt) | |
dataset = data_loader.load_data() | |
dataset_size = len(dataset) * opt.batchSize | |
print('#training images = %d' % dataset_size) | |
model = Pix2PixHDModel_Mapping() | |
model.initialize(opt) | |
path = os.path.join(opt.checkpoints_dir, opt.name, 'model.txt') | |
fd = open(path, 'w') | |
if opt.use_skip_model: | |
fd.write(str(model.mapping_net)) | |
fd.close() | |
else: | |
fd.write(str(model.netG_A)) | |
fd.write(str(model.mapping_net)) | |
fd.close() | |
if opt.isTrain and len(opt.gpu_ids) > 1: | |
model = torch.nn.DataParallel(model, device_ids=opt.gpu_ids) | |
total_steps = (start_epoch-1) * dataset_size + epoch_iter | |
display_delta = total_steps % opt.display_freq | |
print_delta = total_steps % opt.print_freq | |
save_delta = total_steps % opt.save_latest_freq | |
### used for recovering training | |
for epoch in range(start_epoch, opt.niter + opt.niter_decay + 1): | |
epoch_s_t=datetime.datetime.now() | |
epoch_start_time = time.time() | |
if epoch != start_epoch: | |
epoch_iter = epoch_iter % dataset_size | |
for i, data in enumerate(dataset, start=epoch_iter): | |
iter_start_time = time.time() | |
total_steps += opt.batchSize | |
epoch_iter += opt.batchSize | |
# whether to collect output images | |
save_fake = total_steps % opt.display_freq == display_delta | |
############## Forward Pass ###################### | |
#print(pair) | |
losses, generated = model(Variable(data['label']), Variable(data['inst']), | |
Variable(data['image']), Variable(data['feat']), infer=save_fake) | |
# sum per device losses | |
losses = [ torch.mean(x) if not isinstance(x, int) else x for x in losses ] | |
loss_dict = dict(zip(model.module.loss_names, losses)) | |
# calculate final loss scalar | |
loss_D = (loss_dict['D_fake'] + loss_dict['D_real']) * 0.5 | |
loss_G = loss_dict['G_GAN'] + loss_dict.get('G_GAN_Feat',0) + loss_dict.get('G_VGG',0) + loss_dict.get('G_Feat_L2', 0) +loss_dict.get('Smooth_L1', 0)+loss_dict.get('G_Feat_L2_Stage_1',0) | |
#loss_G = loss_dict['G_Feat_L2'] | |
############### Backward Pass #################### | |
# update generator weights | |
model.module.optimizer_mapping.zero_grad() | |
loss_G.backward() | |
model.module.optimizer_mapping.step() | |
# update discriminator weights | |
model.module.optimizer_D.zero_grad() | |
loss_D.backward() | |
model.module.optimizer_D.step() | |
############## Display results and errors ########## | |
### print out errors | |
if i == 0 or total_steps % opt.print_freq == print_delta: | |
errors = {k: v.data if not isinstance(v, int) else v for k, v in loss_dict.items()} | |
t = (time.time() - iter_start_time) / opt.batchSize | |
visualizer.print_current_errors(epoch, epoch_iter, errors, t,model.module.old_lr) | |
visualizer.plot_current_errors(errors, total_steps) | |
### display output images | |
if save_fake: | |
if not os.path.exists(opt.outputs_dir + opt.name): | |
os.makedirs(opt.outputs_dir + opt.name) | |
imgs_num = 5 | |
if opt.NL_use_mask: | |
mask=data['inst'][:imgs_num] | |
mask=mask.repeat(1,3,1,1) | |
imgs = torch.cat((data['label'][:imgs_num], mask,generated.data.cpu()[:imgs_num], data['image'][:imgs_num]), 0) | |
else: | |
imgs = torch.cat((data['label'][:imgs_num], generated.data.cpu()[:imgs_num], data['image'][:imgs_num]), 0) | |
imgs=(imgs+1.)/2.0 ## de-normalize | |
try: | |
image_grid = vutils.save_image(imgs, opt.outputs_dir + opt.name + '/' + str(epoch) + '_' + str(total_steps) + '.png', | |
nrow=imgs_num, padding=0, normalize=True) | |
except OSError as err: | |
print(err) | |
if epoch_iter >= dataset_size: | |
break | |
# end of epoch | |
epoch_e_t=datetime.datetime.now() | |
iter_end_time = time.time() | |
print('End of epoch %d / %d \t Time Taken: %s' % | |
(epoch, opt.niter + opt.niter_decay, str(epoch_e_t-epoch_s_t))) | |
### save model for this epoch | |
if epoch % opt.save_epoch_freq == 0: | |
print('saving the model at the end of epoch %d, iters %d' % (epoch, total_steps)) | |
model.module.save('latest') | |
model.module.save(epoch) | |
np.savetxt(iter_path, (epoch+1, 0), delimiter=',', fmt='%d') | |
### instead of only training the local enhancer, train the entire network after certain iterations | |
if (opt.niter_fix_global != 0) and (epoch == opt.niter_fix_global): | |
model.module.update_fixed_params() | |
### linearly decay learning rate after certain iterations | |
if epoch > opt.niter: | |
model.module.update_learning_rate() |