From 92c56e470f4aaa96e698c6d5c7aef4687965facc Mon Sep 17 00:00:00 2001 From: kkshmz Date: Mon, 21 Oct 2019 17:46:08 +0900 Subject: [PATCH 01/14] change scipy to PIL as scipy.io.imwrite is deprecated in 0.19 and current version started to normalize all images so that min(data) become black and max(data) become white, making need for more code, etc. not cool. --- util/visualizer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util/visualizer.py b/util/visualizer.py index 584ac45d..87f2fde2 100755 --- a/util/visualizer.py +++ b/util/visualizer.py @@ -4,7 +4,7 @@ import time from . import util from . import html -import scipy.misc +from PIL import Image try: from StringIO import StringIO # Python 2.7 except ImportError: @@ -43,7 +43,7 @@ def display_current_results(self, visuals, epoch, step): s = StringIO() except: s = BytesIO() - scipy.misc.toimage(image_numpy).save(s, format="jpeg") + Image.fromarray(image_numpy).save(s, format="jpeg") # Create an Image object img_sum = self.tf.Summary.Image(encoded_image_string=s.getvalue(), height=image_numpy.shape[0], width=image_numpy.shape[1]) # Create a Summary value From 1414723838842bbcdc04d057a077e1fd08a89aa4 Mon Sep 17 00:00:00 2001 From: Simon Hessner Date: Thu, 26 Aug 2021 14:28:53 +0100 Subject: [PATCH 02/14] Use math instead of fractions in order to be compatible with Python 3.9 --- train.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/train.py b/train.py index acedac25..8d2655b2 100755 --- a/train.py +++ b/train.py @@ -5,8 +5,8 @@ from torch.autograd import Variable from collections import OrderedDict from subprocess import call -import fractions -def lcm(a,b): return abs(a * b)/fractions.gcd(a,b) if a and b else 0 +import math +def lcm(a,b): return abs(a * b)/math.gcd(a,b) if a and b else 0 from options.train_options import TrainOptions from data.data_loader import CreateDataLoader From 67dd8592bb96a072ef5c44ab0ca161bcaeb6015c Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Sun, 20 Feb 2022 15:12:38 +0100 Subject: [PATCH 03/14] fix exceptions, no printing --- data/custom_dataset_data_loader.py | 2 +- data/data_loader.py | 2 +- models/base_model.py | 2 +- models/networks.py | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/data/custom_dataset_data_loader.py b/data/custom_dataset_data_loader.py index 0b982541..b73cdc55 100755 --- a/data/custom_dataset_data_loader.py +++ b/data/custom_dataset_data_loader.py @@ -7,7 +7,7 @@ def CreateDataset(opt): from data.aligned_dataset import AlignedDataset dataset = AlignedDataset() - print("dataset [%s] was created" % (dataset.name())) + #print("dataset [%s] was created" % (dataset.name())) dataset.initialize(opt) return dataset diff --git a/data/data_loader.py b/data/data_loader.py index 2a4433a2..0217311e 100755 --- a/data/data_loader.py +++ b/data/data_loader.py @@ -2,6 +2,6 @@ def CreateDataLoader(opt): from data.custom_dataset_data_loader import CustomDatasetDataLoader data_loader = CustomDatasetDataLoader() - print(data_loader.name()) + #print(data_loader.name()) data_loader.initialize(opt) return data_loader diff --git a/models/base_model.py b/models/base_model.py index f3f6b535..31a77244 100755 --- a/models/base_model.py +++ b/models/base_model.py @@ -55,7 +55,7 @@ def load_network(self, network, network_label, epoch_label, save_dir=''): if not os.path.isfile(save_path): print('%s not exists yet!' % save_path) if network_label == 'G': - raise('Generator must exist!') + raise(Exception('Generator must exist!')) else: #network.load_state_dict(torch.load(save_path)) try: diff --git a/models/networks.py b/models/networks.py index ee05d85d..b66acd8b 100755 --- a/models/networks.py +++ b/models/networks.py @@ -35,8 +35,8 @@ def define_G(input_nc, output_nc, ngf, netG, n_downsample_global=3, n_blocks_glo elif netG == 'encoder': netG = Encoder(input_nc, output_nc, ngf, n_downsample_global, norm_layer) else: - raise('generator not implemented!') - print(netG) + raise(Exception('generator not implemented!')) + #print(netG) if len(gpu_ids) > 0: assert(torch.cuda.is_available()) netG.cuda(gpu_ids[0]) @@ -46,7 +46,7 @@ def define_G(input_nc, output_nc, ngf, netG, n_downsample_global=3, n_blocks_glo def define_D(input_nc, ndf, n_layers_D, norm='instance', use_sigmoid=False, num_D=1, getIntermFeat=False, gpu_ids=[]): norm_layer = get_norm_layer(norm_type=norm) netD = MultiscaleDiscriminator(input_nc, ndf, n_layers_D, norm_layer, use_sigmoid, num_D, getIntermFeat) - print(netD) + #print(netD) if len(gpu_ids) > 0: assert(torch.cuda.is_available()) netD.cuda(gpu_ids[0]) From 7b03ebed620ef135d440e4fe00d5d5ad2a403117 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Sun, 20 Feb 2022 15:14:03 +0100 Subject: [PATCH 04/14] BaseOptions: allow passing args and silent --- options/base_options.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/options/base_options.py b/options/base_options.py index 0d5e769e..4e0450d6 100755 --- a/options/base_options.py +++ b/options/base_options.py @@ -62,10 +62,10 @@ def initialize(self): self.initialized = True - def parse(self, save=True): + def parse(self, args=None, save=True, silent=False): if not self.initialized: self.initialize() - self.opt = self.parser.parse_args() + self.opt = self.parser.parse_args(args=args) self.opt.isTrain = self.isTrain # train or test str_ids = self.opt.gpu_ids.split(',') @@ -81,10 +81,11 @@ def parse(self, save=True): args = vars(self.opt) - print('------------ Options -------------') - for k, v in sorted(args.items()): - print('%s: %s' % (str(k), str(v))) - print('-------------- End ----------------') + if not silent: + print('------------ Options -------------') + for k, v in sorted(args.items()): + print('%s: %s' % (str(k), str(v))) + print('-------------- End ----------------') # save to the disk expr_dir = os.path.join(self.opt.checkpoints_dir, self.opt.name) From f7a88fd812d7f91e533672ee288f3334f6f64e47 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Sun, 20 Feb 2022 15:17:18 +0100 Subject: [PATCH 05/14] update to PyTorch >1.0 (no 'Variable'), allow CPU-only --- models/pix2pixHD_model.py | 49 +++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/models/pix2pixHD_model.py b/models/pix2pixHD_model.py index fafdec0b..20a1e253 100755 --- a/models/pix2pixHD_model.py +++ b/models/pix2pixHD_model.py @@ -1,7 +1,6 @@ import numpy as np import torch import os -from torch.autograd import Variable from util.image_pool import ImagePool from .base_model import BaseModel from . import networks @@ -108,35 +107,36 @@ def initialize(self, opt): params = list(self.netD.parameters()) self.optimizer_D = torch.optim.Adam(params, lr=opt.lr, betas=(opt.beta1, 0.999)) - def encode_input(self, label_map, inst_map=None, real_image=None, feat_map=None, infer=False): + def encode_input(self, label_map, inst_map=None, real_image=None, feat_map=None): if self.opt.label_nc == 0: - input_label = label_map.data.cuda() + input_label = label_map else: # create one-hot vector for label map size = label_map.size() oneHot_size = (size[0], self.opt.label_nc, size[2], size[3]) - input_label = torch.cuda.FloatTensor(torch.Size(oneHot_size)).zero_() - input_label = input_label.scatter_(1, label_map.data.long().cuda(), 1.0) + input_label = self.Tensor(torch.Size(oneHot_size)).zero_() + input_label = input_label.scatter_(1, label_map.long(), 1.0) if self.opt.data_type == 16: input_label = input_label.half() + if len(self.gpu_ids): + input_label = input_label.cuda() + inst_map = inst_map.cuda() # get edges from instance map if not self.opt.no_instance: - inst_map = inst_map.data.cuda() edge_map = self.get_edges(inst_map) - input_label = torch.cat((input_label, edge_map), dim=1) - input_label = Variable(input_label, volatile=infer) + input_label = torch.cat((input_label, edge_map), dim=1) # real images for training - if real_image is not None: - real_image = Variable(real_image.data.cuda()) + if real_image is not None and len(self.gpu_ids): + real_image = real_image.cuda() # instance map for feature encoding if self.use_features: # get precomputed feature maps - if self.opt.load_features: - feat_map = Variable(feat_map.data.cuda()) - if self.opt.label_feat: + if self.opt.load_features and len(self.gpu_ids): + feat_map = feat_map.cuda() + if self.opt.label_feat and len(self.gpu_ids): inst_map = label_map.cuda() return input_label, inst_map, real_image, feat_map @@ -194,8 +194,8 @@ def forward(self, label, inst, image, feat, infer=False): def inference(self, label, inst, image=None): # Encode Inputs - image = Variable(image) if image is not None else None - input_label, inst_map, real_image, _ = self.encode_input(Variable(label), Variable(inst), image, infer=True) + image = image if image is not None else None + input_label, inst_map, real_image, _ = self.encode_input(label, inst, image) # Fake Generation if self.use_features: @@ -209,10 +209,7 @@ def inference(self, label, inst, image=None): else: input_concat = input_label - if torch.__version__.startswith('0.4'): - with torch.no_grad(): - fake_image = self.netG.forward(input_concat) - else: + with torch.no_grad(): fake_image = self.netG.forward(input_concat) return fake_image @@ -238,11 +235,14 @@ def sample_features(self, inst): return feat_map def encode_features(self, image, inst): - image = Variable(image.cuda(), volatile=True) + if len(self.gpu_ids): + image = image.cuda() + inst = inst.cuda() + with torch.no_grad(): + feat_map = self.netE.forward(image, inst).cpu() feat_num = self.opt.feat_num h, w = inst.size()[2], inst.size()[3] block_num = 32 - feat_map = self.netE.forward(image, inst.cuda()) inst_np = inst.cpu().numpy().astype(int) feature = {} for i in range(self.opt.label_nc): @@ -254,13 +254,16 @@ def encode_features(self, image, inst): idx = idx[num//2,:] val = np.zeros((1, feat_num+1)) for k in range(feat_num): - val[0, k] = feat_map[idx[0], idx[1] + k, idx[2], idx[3]].data[0] + val[0, k] = feat_map[idx[0], idx[1] + k, idx[2], idx[3]].item() val[0, feat_num] = float(num) / (h * w // block_num) feature[label] = np.append(feature[label], val, axis=0) return feature def get_edges(self, t): - edge = torch.cuda.ByteTensor(t.size()).zero_() + edge = torch.ByteTensor(t.size()) + if len(self.gpu_ids): + edge = edge.cuda() + edge = edge.zero_() edge[:,:,:,1:] = edge[:,:,:,1:] | (t[:,:,:,1:] != t[:,:,:,:-1]) edge[:,:,:,:-1] = edge[:,:,:,:-1] | (t[:,:,:,1:] != t[:,:,:,:-1]) edge[:,:,1:,:] = edge[:,:,1:,:] | (t[:,:,1:,:] != t[:,:,:-1,:]) From e524de235b251adddee6ca2bcbd31115a834077c Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Sun, 20 Feb 2022 15:18:06 +0100 Subject: [PATCH 06/14] update to TorchVision --- data/base_dataset.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/data/base_dataset.py b/data/base_dataset.py index ece8813d..f47cba5b 100755 --- a/data/base_dataset.py +++ b/data/base_dataset.py @@ -34,7 +34,9 @@ def get_transform(opt, params, method=Image.BICUBIC, normalize=True): transform_list = [] if 'resize' in opt.resize_or_crop: osize = [opt.loadSize, opt.loadSize] - transform_list.append(transforms.Scale(osize, method)) + # torchvision says we should use transforms.InterpolationMode, but does not export this properly + # so we have to rely on the automatic conversion from PIL.Image enum, with warning + transform_list.append(transforms.Resize(osize, interpolation=method)) elif 'scale_width' in opt.resize_or_crop: transform_list.append(transforms.Lambda(lambda img: __scale_width(img, opt.loadSize, method))) From 8b0af0c948abdb2b577c2b1a370f5915b20115a9 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 24 Mar 2022 00:03:46 +0100 Subject: [PATCH 07/14] forgot to move label_map to GPU, too --- models/pix2pixHD_model.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/models/pix2pixHD_model.py b/models/pix2pixHD_model.py index 20a1e253..29a708e2 100755 --- a/models/pix2pixHD_model.py +++ b/models/pix2pixHD_model.py @@ -111,7 +111,9 @@ def encode_input(self, label_map, inst_map=None, real_image=None, feat_map=None) if self.opt.label_nc == 0: input_label = label_map else: - # create one-hot vector for label map + if len(self.gpu_ids): + label_map = label_map.cuda() + # create one-hot vector for label map size = label_map.size() oneHot_size = (size[0], self.opt.label_nc, size[2], size[3]) input_label = self.Tensor(torch.Size(oneHot_size)).zero_() From 407e2eb655ba1d5edc4138d897467fab22e1db89 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 23 Mar 2022 20:31:10 +0100 Subject: [PATCH 08/14] mv code to pkg pix2pixhd --- {data => pix2pixhd/data}/__init__.py | 0 {data => pix2pixhd/data}/aligned_dataset.py | 0 {data => pix2pixhd/data}/base_data_loader.py | 0 {data => pix2pixhd/data}/base_dataset.py | 0 {data => pix2pixhd/data}/custom_dataset_data_loader.py | 0 {data => pix2pixhd/data}/data_loader.py | 0 {data => pix2pixhd/data}/image_folder.py | 0 encode_features.py => pix2pixhd/encode_features.py | 0 {models => pix2pixhd/models}/__init__.py | 0 {models => pix2pixhd/models}/base_model.py | 0 {models => pix2pixhd/models}/models.py | 0 {models => pix2pixhd/models}/networks.py | 0 {models => pix2pixhd/models}/pix2pixHD_model.py | 0 {models => pix2pixhd/models}/ui_model.py | 0 {options => pix2pixhd/options}/__init__.py | 0 {options => pix2pixhd/options}/base_options.py | 0 {options => pix2pixhd/options}/test_options.py | 0 {options => pix2pixhd/options}/train_options.py | 0 .../precompute_feature_maps.py | 0 run_engine.py => pix2pixhd/run_engine.py | 0 test.py => pix2pixhd/test.py | 0 train.py => pix2pixhd/train.py | 0 {util => pix2pixhd/util}/__init__.py | 0 {util => pix2pixhd/util}/html.py | 0 {util => pix2pixhd/util}/image_pool.py | 0 {util => pix2pixhd/util}/util.py | 0 {util => pix2pixhd/util}/visualizer.py | 0 27 files changed, 0 insertions(+), 0 deletions(-) rename {data => pix2pixhd/data}/__init__.py (100%) rename {data => pix2pixhd/data}/aligned_dataset.py (100%) rename {data => pix2pixhd/data}/base_data_loader.py (100%) rename {data => pix2pixhd/data}/base_dataset.py (100%) rename {data => pix2pixhd/data}/custom_dataset_data_loader.py (100%) rename {data => pix2pixhd/data}/data_loader.py (100%) rename {data => pix2pixhd/data}/image_folder.py (100%) rename encode_features.py => pix2pixhd/encode_features.py (100%) rename {models => pix2pixhd/models}/__init__.py (100%) rename {models => pix2pixhd/models}/base_model.py (100%) rename {models => pix2pixhd/models}/models.py (100%) rename {models => pix2pixhd/models}/networks.py (100%) rename {models => pix2pixhd/models}/pix2pixHD_model.py (100%) rename {models => pix2pixhd/models}/ui_model.py (100%) rename {options => pix2pixhd/options}/__init__.py (100%) rename {options => pix2pixhd/options}/base_options.py (100%) rename {options => pix2pixhd/options}/test_options.py (100%) rename {options => pix2pixhd/options}/train_options.py (100%) rename precompute_feature_maps.py => pix2pixhd/precompute_feature_maps.py (100%) rename run_engine.py => pix2pixhd/run_engine.py (100%) rename test.py => pix2pixhd/test.py (100%) rename train.py => pix2pixhd/train.py (100%) rename {util => pix2pixhd/util}/__init__.py (100%) rename {util => pix2pixhd/util}/html.py (100%) rename {util => pix2pixhd/util}/image_pool.py (100%) rename {util => pix2pixhd/util}/util.py (100%) rename {util => pix2pixhd/util}/visualizer.py (100%) diff --git a/data/__init__.py b/pix2pixhd/data/__init__.py similarity index 100% rename from data/__init__.py rename to pix2pixhd/data/__init__.py diff --git a/data/aligned_dataset.py b/pix2pixhd/data/aligned_dataset.py similarity index 100% rename from data/aligned_dataset.py rename to pix2pixhd/data/aligned_dataset.py diff --git a/data/base_data_loader.py b/pix2pixhd/data/base_data_loader.py similarity index 100% rename from data/base_data_loader.py rename to pix2pixhd/data/base_data_loader.py diff --git a/data/base_dataset.py b/pix2pixhd/data/base_dataset.py similarity index 100% rename from data/base_dataset.py rename to pix2pixhd/data/base_dataset.py diff --git a/data/custom_dataset_data_loader.py b/pix2pixhd/data/custom_dataset_data_loader.py similarity index 100% rename from data/custom_dataset_data_loader.py rename to pix2pixhd/data/custom_dataset_data_loader.py diff --git a/data/data_loader.py b/pix2pixhd/data/data_loader.py similarity index 100% rename from data/data_loader.py rename to pix2pixhd/data/data_loader.py diff --git a/data/image_folder.py b/pix2pixhd/data/image_folder.py similarity index 100% rename from data/image_folder.py rename to pix2pixhd/data/image_folder.py diff --git a/encode_features.py b/pix2pixhd/encode_features.py similarity index 100% rename from encode_features.py rename to pix2pixhd/encode_features.py diff --git a/models/__init__.py b/pix2pixhd/models/__init__.py similarity index 100% rename from models/__init__.py rename to pix2pixhd/models/__init__.py diff --git a/models/base_model.py b/pix2pixhd/models/base_model.py similarity index 100% rename from models/base_model.py rename to pix2pixhd/models/base_model.py diff --git a/models/models.py b/pix2pixhd/models/models.py similarity index 100% rename from models/models.py rename to pix2pixhd/models/models.py diff --git a/models/networks.py b/pix2pixhd/models/networks.py similarity index 100% rename from models/networks.py rename to pix2pixhd/models/networks.py diff --git a/models/pix2pixHD_model.py b/pix2pixhd/models/pix2pixHD_model.py similarity index 100% rename from models/pix2pixHD_model.py rename to pix2pixhd/models/pix2pixHD_model.py diff --git a/models/ui_model.py b/pix2pixhd/models/ui_model.py similarity index 100% rename from models/ui_model.py rename to pix2pixhd/models/ui_model.py diff --git a/options/__init__.py b/pix2pixhd/options/__init__.py similarity index 100% rename from options/__init__.py rename to pix2pixhd/options/__init__.py diff --git a/options/base_options.py b/pix2pixhd/options/base_options.py similarity index 100% rename from options/base_options.py rename to pix2pixhd/options/base_options.py diff --git a/options/test_options.py b/pix2pixhd/options/test_options.py similarity index 100% rename from options/test_options.py rename to pix2pixhd/options/test_options.py diff --git a/options/train_options.py b/pix2pixhd/options/train_options.py similarity index 100% rename from options/train_options.py rename to pix2pixhd/options/train_options.py diff --git a/precompute_feature_maps.py b/pix2pixhd/precompute_feature_maps.py similarity index 100% rename from precompute_feature_maps.py rename to pix2pixhd/precompute_feature_maps.py diff --git a/run_engine.py b/pix2pixhd/run_engine.py similarity index 100% rename from run_engine.py rename to pix2pixhd/run_engine.py diff --git a/test.py b/pix2pixhd/test.py similarity index 100% rename from test.py rename to pix2pixhd/test.py diff --git a/train.py b/pix2pixhd/train.py similarity index 100% rename from train.py rename to pix2pixhd/train.py diff --git a/util/__init__.py b/pix2pixhd/util/__init__.py similarity index 100% rename from util/__init__.py rename to pix2pixhd/util/__init__.py diff --git a/util/html.py b/pix2pixhd/util/html.py similarity index 100% rename from util/html.py rename to pix2pixhd/util/html.py diff --git a/util/image_pool.py b/pix2pixhd/util/image_pool.py similarity index 100% rename from util/image_pool.py rename to pix2pixhd/util/image_pool.py diff --git a/util/util.py b/pix2pixhd/util/util.py similarity index 100% rename from util/util.py rename to pix2pixhd/util/util.py diff --git a/util/visualizer.py b/pix2pixhd/util/visualizer.py similarity index 100% rename from util/visualizer.py rename to pix2pixhd/util/visualizer.py From 048c24f70c5a127dc696a40bc70b3cb0892e4e94 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 23 Mar 2022 23:33:19 +0100 Subject: [PATCH 09/14] =?UTF-8?q?provide=20setuptools=20packaging=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - rewrite all local imports to relative paths - reorder imports correctly - add setup.py with CLI entry_points for top-level scripts - rewrite shell scripts to use CLIs - update README --- README.md | 34 +++++++++++--------- pix2pixhd/__init__.py | 0 pix2pixhd/data/aligned_dataset.py | 6 ++-- pix2pixhd/data/base_data_loader.py | 2 +- pix2pixhd/data/base_dataset.py | 6 ++-- pix2pixhd/data/custom_dataset_data_loader.py | 4 +-- pix2pixhd/data/data_loader.py | 2 +- pix2pixhd/data/image_folder.py | 4 +-- pix2pixhd/encode_features.py | 16 ++++++--- pix2pixhd/models/base_model.py | 4 +-- pix2pixhd/models/networks.py | 8 ++--- pix2pixhd/models/pix2pixHD_model.py | 4 +-- pix2pixhd/models/ui_model.py | 11 +++---- pix2pixhd/options/base_options.py | 2 +- pix2pixhd/precompute_feature_maps.py | 16 ++++++--- pix2pixhd/run_engine.py | 2 -- pix2pixhd/test.py | 22 ++++++++----- pix2pixhd/train.py | 25 ++++++++------ pix2pixhd/util/visualizer.py | 4 +-- requirements.txt | 8 +++++ scripts/test_1024p.sh | 2 +- scripts/test_1024p_feat.sh | 4 +-- scripts/test_512p.sh | 3 +- scripts/test_512p_feat.sh | 5 +-- scripts/train_1024p_12G.sh | 3 +- scripts/train_1024p_24G.sh | 3 +- scripts/train_1024p_feat_12G.sh | 5 +-- scripts/train_1024p_feat_24G.sh | 5 +-- scripts/train_512p.sh | 3 +- scripts/train_512p_feat.sh | 3 +- scripts/train_512p_fp16.sh | 3 +- scripts/train_512p_fp16_multigpu.sh | 3 +- scripts/train_512p_multigpu.sh | 3 +- setup.py | 34 ++++++++++++++++++++ 34 files changed, 168 insertions(+), 91 deletions(-) create mode 100644 pix2pixhd/__init__.py create mode 100644 requirements.txt create mode 100644 setup.py diff --git a/README.md b/README.md index 7c3315cd..2e2c8948 100755 --- a/README.md +++ b/README.md @@ -52,20 +52,16 @@ Pytorch implementation of our method for high-resolution (e.g. 2048x1024) photor ## Prerequisites - Linux or macOS - Python 2 or 3 -- NVIDIA GPU (11G memory or larger) + CUDA cuDNN +- [optionally:] NVIDIA GPU (11G memory or larger) + CUDA cuDNN ## Getting Started ### Installation -- Install PyTorch and dependencies from http://pytorch.org -- Install python libraries [dominate](https://github.com/Knio/dominate). -```bash -pip install dominate -``` +- Create a [virtual environment](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/), and activate it - Clone this repo: -```bash -git clone https://github.com/NVIDIA/pix2pixHD -cd pix2pixHD -``` + + git clone https://github.com/NVIDIA/pix2pixHD + pip install pix2pixHD + ### Testing @@ -74,12 +70,16 @@ cd pix2pixHD - Test the model (`bash ./scripts/test_1024p.sh`): ```bash #!./scripts/test_1024p.sh -python test.py --name label2city_1024p --netG local --ngf 32 --resize_or_crop none +pix2pixhd-test --name label2city_1024p --netG local --ngf 32 --resize_or_crop none ``` The test results will be saved to a html file here: `./results/label2city_1024p/test_latest/index.html`. More example scripts can be found in the `scripts` directory. +For other options, cf. + + pix2pixhd-test --help + ### Dataset - We use the Cityscapes dataset. To train a model on the full dataset, please download it from the [official website](https://www.cityscapes-dataset.com/) (registration required). @@ -90,16 +90,20 @@ After downloading, please put it under the `datasets` folder in the same way the - Train a model at 1024 x 512 resolution (`bash ./scripts/train_512p.sh`): ```bash #!./scripts/train_512p.sh -python train.py --name label2city_512p +pix2pixhd-train --name label2city_512p ``` - To view training results, please checkout intermediate results in `./checkpoints/label2city_512p/web/index.html`. If you have tensorflow installed, you can see tensorboard logs in `./checkpoints/label2city_512p/logs` by adding `--tf_log` to the training scripts. +For other options, cf. + + pix2pixhd-train --help + ### Multi-GPU training - Train a model using multiple GPUs (`bash ./scripts/train_512p_multigpu.sh`): ```bash #!./scripts/train_512p_multigpu.sh -python train.py --name label2city_512p --batchSize 8 --gpu_ids 0,1,2,3,4,5,6,7 +pix2pixhd-train --name label2city_512p --batchSize 8 --gpu_ids 0,1,2,3,4,5,6,7 ``` Note: this is not tested and we trained our model using single GPU only. Please use at your own discretion. @@ -108,7 +112,7 @@ Note: this is not tested and we trained our model using single GPU only. Please - You can then train the model by adding `--fp16`. For example, ```bash #!./scripts/train_512p_fp16.sh -python -m torch.distributed.launch train.py --name label2city_512p --fp16 +python -m torch.distributed.launch pix2pixhd/train.py --name label2city_512p --fp16 ``` In our test case, it trains about 80% faster with AMP on a Volta machine. @@ -131,7 +135,7 @@ In our test case, it trains about 80% faster with AMP on a Volta machine. If you find this useful for your research, please use the following. -``` +```bibtex @inproceedings{wang2018pix2pixHD, title={High-Resolution Image Synthesis and Semantic Manipulation with Conditional GANs}, author={Ting-Chun Wang and Ming-Yu Liu and Jun-Yan Zhu and Andrew Tao and Jan Kautz and Bryan Catanzaro}, diff --git a/pix2pixhd/__init__.py b/pix2pixhd/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pix2pixhd/data/aligned_dataset.py b/pix2pixhd/data/aligned_dataset.py index 29785c19..63ff6d35 100755 --- a/pix2pixhd/data/aligned_dataset.py +++ b/pix2pixhd/data/aligned_dataset.py @@ -1,7 +1,7 @@ import os.path -from data.base_dataset import BaseDataset, get_params, get_transform, normalize -from data.image_folder import make_dataset from PIL import Image +from .base_dataset import BaseDataset, get_params, get_transform, normalize +from .image_folder import make_dataset class AlignedDataset(BaseDataset): def initialize(self, opt): @@ -73,4 +73,4 @@ def __len__(self): return len(self.A_paths) // self.opt.batchSize * self.opt.batchSize def name(self): - return 'AlignedDataset' \ No newline at end of file + return 'AlignedDataset' diff --git a/pix2pixhd/data/base_data_loader.py b/pix2pixhd/data/base_data_loader.py index 0e1deb55..b94e40e2 100755 --- a/pix2pixhd/data/base_data_loader.py +++ b/pix2pixhd/data/base_data_loader.py @@ -7,7 +7,7 @@ def initialize(self, opt): self.opt = opt pass - def load_data(): + def load_data(self): return None diff --git a/pix2pixhd/data/base_dataset.py b/pix2pixhd/data/base_dataset.py index f47cba5b..ee08b499 100755 --- a/pix2pixhd/data/base_dataset.py +++ b/pix2pixhd/data/base_dataset.py @@ -1,8 +1,8 @@ -import torch.utils.data as data +import random from PIL import Image -import torchvision.transforms as transforms import numpy as np -import random +import torch.utils.data as data +import torchvision.transforms as transforms class BaseDataset(data.Dataset): def __init__(self): diff --git a/pix2pixhd/data/custom_dataset_data_loader.py b/pix2pixhd/data/custom_dataset_data_loader.py index b73cdc55..6f05b346 100755 --- a/pix2pixhd/data/custom_dataset_data_loader.py +++ b/pix2pixhd/data/custom_dataset_data_loader.py @@ -1,10 +1,10 @@ import torch.utils.data -from data.base_data_loader import BaseDataLoader +from .base_data_loader import BaseDataLoader def CreateDataset(opt): dataset = None - from data.aligned_dataset import AlignedDataset + from .aligned_dataset import AlignedDataset dataset = AlignedDataset() #print("dataset [%s] was created" % (dataset.name())) diff --git a/pix2pixhd/data/data_loader.py b/pix2pixhd/data/data_loader.py index 0217311e..d77b69e0 100755 --- a/pix2pixhd/data/data_loader.py +++ b/pix2pixhd/data/data_loader.py @@ -1,6 +1,6 @@ def CreateDataLoader(opt): - from data.custom_dataset_data_loader import CustomDatasetDataLoader + from .custom_dataset_data_loader import CustomDatasetDataLoader data_loader = CustomDatasetDataLoader() #print(data_loader.name()) data_loader.initialize(opt) diff --git a/pix2pixhd/data/image_folder.py b/pix2pixhd/data/image_folder.py index df0141f1..d217657e 100755 --- a/pix2pixhd/data/image_folder.py +++ b/pix2pixhd/data/image_folder.py @@ -4,9 +4,9 @@ # Modified the original code so that it also loads images from the current # directory as well as the subdirectories ############################################################################### -import torch.utils.data as data -from PIL import Image import os +from PIL import Image +import torch.utils.data as data IMG_EXTENSIONS = [ '.jpg', '.JPG', '.jpeg', '.JPEG', diff --git a/pix2pixhd/encode_features.py b/pix2pixhd/encode_features.py index 158c85a0..8558f494 100755 --- a/pix2pixhd/encode_features.py +++ b/pix2pixhd/encode_features.py @@ -1,8 +1,8 @@ -from options.train_options import TrainOptions -from data.data_loader import CreateDataLoader -from models.models import create_model -import numpy as np import os +import numpy as np +from .options.train_options import TrainOptions +from .data.data_loader import CreateDataLoader +from .models.models import create_model opt = TrainOptions().parse() opt.nThreads = 1 @@ -51,4 +51,10 @@ centers[label] = kmeans.cluster_centers_ save_name = os.path.join(save_path, name + '_clustered_%03d.npy' % opt.n_clusters) np.save(save_name, centers) -print('saving to %s' % save_name) \ No newline at end of file +print('saving to %s' % save_name) + +def main(): + pass + +if __name__ == "__main__": + main() diff --git a/pix2pixhd/models/base_model.py b/pix2pixhd/models/base_model.py index 31a77244..486a9155 100755 --- a/pix2pixhd/models/base_model.py +++ b/pix2pixhd/models/base_model.py @@ -1,6 +1,6 @@ import os -import torch import sys +import torch class BaseModel(torch.nn.Module): def name(self): @@ -55,7 +55,7 @@ def load_network(self, network, network_label, epoch_label, save_dir=''): if not os.path.isfile(save_path): print('%s not exists yet!' % save_path) if network_label == 'G': - raise(Exception('Generator must exist!')) + raise(NotImplementedError('Generator must exist!')) else: #network.load_state_dict(torch.load(save_path)) try: diff --git a/pix2pixhd/models/networks.py b/pix2pixhd/models/networks.py index b66acd8b..512c8eac 100755 --- a/pix2pixhd/models/networks.py +++ b/pix2pixhd/models/networks.py @@ -1,8 +1,9 @@ +import functools +import numpy as np import torch import torch.nn as nn -import functools from torch.autograd import Variable -import numpy as np +from torchvision import models ############################################################################### # Functions @@ -35,7 +36,7 @@ def define_G(input_nc, output_nc, ngf, netG, n_downsample_global=3, n_blocks_glo elif netG == 'encoder': netG = Encoder(input_nc, output_nc, ngf, n_downsample_global, norm_layer) else: - raise(Exception('generator not implemented!')) + raise(NotImplementedError('generator [%s] not implemented!' % netG)) #print(netG) if len(gpu_ids) > 0: assert(torch.cuda.is_available()) @@ -382,7 +383,6 @@ def forward(self, input): else: return self.model(input) -from torchvision import models class Vgg19(torch.nn.Module): def __init__(self, requires_grad=False): super(Vgg19, self).__init__() diff --git a/pix2pixhd/models/pix2pixHD_model.py b/pix2pixhd/models/pix2pixHD_model.py index 29a708e2..af5fef52 100755 --- a/pix2pixhd/models/pix2pixHD_model.py +++ b/pix2pixhd/models/pix2pixHD_model.py @@ -1,7 +1,7 @@ +import os import numpy as np import torch -import os -from util.image_pool import ImagePool +from ..util.image_pool import ImagePool from .base_model import BaseModel from . import networks diff --git a/pix2pixhd/models/ui_model.py b/pix2pixhd/models/ui_model.py index c5b34335..1a5537cb 100755 --- a/pix2pixhd/models/ui_model.py +++ b/pix2pixhd/models/ui_model.py @@ -1,10 +1,9 @@ -import torch -from torch.autograd import Variable from collections import OrderedDict -import numpy as np -import os from PIL import Image -import util.util as util +import numpy as np +import torch +from torch.autograd import Variable +from ..util import util from .base_model import BaseModel from . import networks @@ -344,4 +343,4 @@ def get_current_visuals(self, getLabel=False): label = util.tensor2label(self.net_input.data[0], self.opt.label_nc) dict_list += [('label', label)] - return OrderedDict(dict_list) \ No newline at end of file + return OrderedDict(dict_list) diff --git a/pix2pixhd/options/base_options.py b/pix2pixhd/options/base_options.py index 4e0450d6..b0316792 100755 --- a/pix2pixhd/options/base_options.py +++ b/pix2pixhd/options/base_options.py @@ -1,7 +1,7 @@ import argparse import os -from util import util import torch +from ..util import util class BaseOptions(): def __init__(self): diff --git a/pix2pixhd/precompute_feature_maps.py b/pix2pixhd/precompute_feature_maps.py index 8836ea2c..170c504f 100755 --- a/pix2pixhd/precompute_feature_maps.py +++ b/pix2pixhd/precompute_feature_maps.py @@ -1,10 +1,10 @@ -from options.train_options import TrainOptions -from data.data_loader import CreateDataLoader -from models.models import create_model import os -import util.util as util from torch.autograd import Variable import torch.nn as nn +from .options.train_options import TrainOptions +from .data.data_loader import CreateDataLoader +from .models.models import create_model +from .util import util opt = TrainOptions().parse() opt.nThreads = 1 @@ -30,4 +30,10 @@ feat_map = nn.Upsample(scale_factor=2, mode='nearest')(feat_map) image_numpy = util.tensor2im(feat_map.data[0]) save_path = data['path'][0].replace('/train_label/', '/train_feat/') - util.save_image(image_numpy, save_path) \ No newline at end of file + util.save_image(image_numpy, save_path) + +def main(): + pass + +if __name__ == "__main__": + main() diff --git a/pix2pixhd/run_engine.py b/pix2pixhd/run_engine.py index 700494d2..bf1c180e 100644 --- a/pix2pixhd/run_engine.py +++ b/pix2pixhd/run_engine.py @@ -1,8 +1,6 @@ import os import sys -from random import randint import numpy as np -import tensorrt try: from PIL import Image diff --git a/pix2pixhd/test.py b/pix2pixhd/test.py index e0b1ec33..8e188fa9 100755 --- a/pix2pixhd/test.py +++ b/pix2pixhd/test.py @@ -1,13 +1,13 @@ import os from collections import OrderedDict -from torch.autograd import Variable -from options.test_options import TestOptions -from data.data_loader import CreateDataLoader -from models.models import create_model -import util.util as util -from util.visualizer import Visualizer -from util import html import torch +from torch.autograd import Variable +from .options.test_options import TestOptions +from .data.data_loader import CreateDataLoader +from .models.models import create_model +from .util import util +from .util.visualizer import Visualizer +from .util import html opt = TestOptions().parse(save=False) opt.nThreads = 1 # test code only supports nThreads = 1 @@ -33,7 +33,7 @@ if opt.verbose: print(model) else: - from run_engine import run_trt_engine, run_onnx + from .run_engine import run_trt_engine, run_onnx for i, data in enumerate(dataset): if i >= opt.how_many: @@ -65,3 +65,9 @@ visualizer.save_images(webpage, visuals, img_path) webpage.save() + +def main(): + pass + +if __name__ == "__main__": + main() diff --git a/pix2pixhd/train.py b/pix2pixhd/train.py index acedac25..789f3b2b 100755 --- a/pix2pixhd/train.py +++ b/pix2pixhd/train.py @@ -1,18 +1,19 @@ import time import os import numpy as np -import torch -from torch.autograd import Variable +import fractions from collections import OrderedDict from subprocess import call -import fractions -def lcm(a,b): return abs(a * b)/fractions.gcd(a,b) if a and b else 0 +import torch +from torch.autograd import Variable + +from .options.train_options import TrainOptions +from .data.data_loader import CreateDataLoader +from .models.models import create_model +from .util import util +from .util.visualizer import Visualizer -from options.train_options import TrainOptions -from data.data_loader import CreateDataLoader -from models.models import create_model -import util.util as util -from util.visualizer import Visualizer +def lcm(a,b): return abs(a * b)/fractions.gcd(a,b) if a and b else 0 opt = TrainOptions().parse() iter_path = os.path.join(opt.checkpoints_dir, opt.name, 'iter.txt') @@ -139,3 +140,9 @@ def lcm(a,b): return abs(a * b)/fractions.gcd(a,b) if a and b else 0 ### linearly decay learning rate after certain iterations if epoch > opt.niter: model.module.update_learning_rate() + +def main(): + pass + +if __name__ == "__main__": + main() diff --git a/pix2pixhd/util/visualizer.py b/pix2pixhd/util/visualizer.py index 584ac45d..2d12fd5b 100755 --- a/pix2pixhd/util/visualizer.py +++ b/pix2pixhd/util/visualizer.py @@ -1,6 +1,4 @@ -import numpy as np import os -import ntpath import time from . import util from . import html @@ -112,7 +110,7 @@ def print_current_errors(self, epoch, i, errors, t): # save image to the disk def save_images(self, webpage, visuals, image_path): image_dir = webpage.get_image_dir() - short_path = ntpath.basename(image_path[0]) + short_path = os.path.basename(image_path[0]) name = os.path.splitext(short_path)[0] webpage.add_header(name) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..b9f728ea --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +numpy +scipy +scikit-learn +pillow +tensorrt +torch>=1.4.0 +torchvision>=0.5.0 +dominate>=2.4.0 diff --git a/scripts/test_1024p.sh b/scripts/test_1024p.sh index 319803ca..b834e41d 100755 --- a/scripts/test_1024p.sh +++ b/scripts/test_1024p.sh @@ -1,4 +1,4 @@ #!/bin/bash ################################ Testing ################################ # labels only -python test.py --name label2city_1024p --netG local --ngf 32 --resize_or_crop none $@ +pix2pixhd-test --name label2city_1024p --netG local --ngf 32 --resize_or_crop none "$@" diff --git a/scripts/test_1024p_feat.sh b/scripts/test_1024p_feat.sh index 2f4ba171..278bfb23 100755 --- a/scripts/test_1024p_feat.sh +++ b/scripts/test_1024p_feat.sh @@ -1,5 +1,5 @@ ################################ Testing ################################ # first precompute and cluster all features -python encode_features.py --name label2city_1024p_feat --netG local --ngf 32 --resize_or_crop none; +pix2pix-encode-features --name label2city_1024p_feat --netG local --ngf 32 --resize_or_crop none; # use instance-wise features -python test.py --name label2city_1024p_feat ---netG local --ngf 32 --resize_or_crop none --instance_feat \ No newline at end of file +pix2pixhd-test --name label2city_1024p_feat ---netG local --ngf 32 --resize_or_crop none --instance_feat diff --git a/scripts/test_512p.sh b/scripts/test_512p.sh index 3131043d..695fac17 100755 --- a/scripts/test_512p.sh +++ b/scripts/test_512p.sh @@ -1,3 +1,4 @@ +#!/bin/sh ################################ Testing ################################ # labels only -python test.py --name label2city_512p \ No newline at end of file +pix2pixhd-test --name label2city_512p diff --git a/scripts/test_512p_feat.sh b/scripts/test_512p_feat.sh index 8f25e9cd..35117114 100755 --- a/scripts/test_512p_feat.sh +++ b/scripts/test_512p_feat.sh @@ -1,5 +1,6 @@ +#!/bin/sh ################################ Testing ################################ # first precompute and cluster all features -python encode_features.py --name label2city_512p_feat; +pix2pixhd-encode-features --name label2city_512p_feat; # use instance-wise features -python test.py --name label2city_512p_feat --instance_feat \ No newline at end of file +pix2pixhd-test --name label2city_512p_feat --instance_feat diff --git a/scripts/train_1024p_12G.sh b/scripts/train_1024p_12G.sh index d5ea7d70..21cce766 100755 --- a/scripts/train_1024p_12G.sh +++ b/scripts/train_1024p_12G.sh @@ -1,4 +1,5 @@ +#!/bin/sh ############## To train images at 2048 x 1024 resolution after training 1024 x 512 resolution models ############# ##### Using GPUs with 12G memory (not tested) # Using labels only -python train.py --name label2city_1024p --netG local --ngf 32 --num_D 3 --load_pretrain checkpoints/label2city_512p/ --niter_fix_global 20 --resize_or_crop crop --fineSize 1024 \ No newline at end of file +pix2pixhd-train --name label2city_1024p --netG local --ngf 32 --num_D 3 --load_pretrain checkpoints/label2city_512p/ --niter_fix_global 20 --resize_or_crop crop --fineSize 1024 diff --git a/scripts/train_1024p_24G.sh b/scripts/train_1024p_24G.sh index 88e58f75..8667f112 100755 --- a/scripts/train_1024p_24G.sh +++ b/scripts/train_1024p_24G.sh @@ -1,4 +1,5 @@ +#!/bin/sh ############## To train images at 2048 x 1024 resolution after training 1024 x 512 resolution models ############# ######## Using GPUs with 24G memory # Using labels only -python train.py --name label2city_1024p --netG local --ngf 32 --num_D 3 --load_pretrain checkpoints/label2city_512p/ --niter 50 --niter_decay 50 --niter_fix_global 10 --resize_or_crop none \ No newline at end of file +pix2pixhd-train --name label2city_1024p --netG local --ngf 32 --num_D 3 --load_pretrain checkpoints/label2city_512p/ --niter 50 --niter_decay 50 --niter_fix_global 10 --resize_or_crop none diff --git a/scripts/train_1024p_feat_12G.sh b/scripts/train_1024p_feat_12G.sh index f8e3d618..6ac3401a 100755 --- a/scripts/train_1024p_feat_12G.sh +++ b/scripts/train_1024p_feat_12G.sh @@ -1,6 +1,7 @@ +#!/bin/sh ############## To train images at 2048 x 1024 resolution after training 1024 x 512 resolution models ############# ##### Using GPUs with 12G memory (not tested) # First precompute feature maps and save them -python precompute_feature_maps.py --name label2city_512p_feat; +pix2pixhd-precompute-feature-maps --name label2city_512p_feat; # Adding instances and encoded features -python train.py --name label2city_1024p_feat --netG local --ngf 32 --num_D 3 --load_pretrain checkpoints/label2city_512p_feat/ --niter_fix_global 20 --resize_or_crop crop --fineSize 896 --instance_feat --load_features \ No newline at end of file +pix2pixhd-train --name label2city_1024p_feat --netG local --ngf 32 --num_D 3 --load_pretrain checkpoints/label2city_512p_feat/ --niter_fix_global 20 --resize_or_crop crop --fineSize 896 --instance_feat --load_features diff --git a/scripts/train_1024p_feat_24G.sh b/scripts/train_1024p_feat_24G.sh index 399d7205..2666cedb 100755 --- a/scripts/train_1024p_feat_24G.sh +++ b/scripts/train_1024p_feat_24G.sh @@ -1,6 +1,7 @@ +#!/bin/sh ############## To train images at 2048 x 1024 resolution after training 1024 x 512 resolution models ############# ######## Using GPUs with 24G memory # First precompute feature maps and save them -python precompute_feature_maps.py --name label2city_512p_feat; +pix2pixhd-precompute-feature-maps --name label2city_512p_feat; # Adding instances and encoded features -python train.py --name label2city_1024p_feat --netG local --ngf 32 --num_D 3 --load_pretrain checkpoints/label2city_512p_feat/ --niter 50 --niter_decay 50 --niter_fix_global 10 --resize_or_crop none --instance_feat --load_features \ No newline at end of file +pix2pixhd-train --name label2city_1024p_feat --netG local --ngf 32 --num_D 3 --load_pretrain checkpoints/label2city_512p_feat/ --niter 50 --niter_decay 50 --niter_fix_global 10 --resize_or_crop none --instance_feat --load_features diff --git a/scripts/train_512p.sh b/scripts/train_512p.sh index 222c348d..7b0570fd 100755 --- a/scripts/train_512p.sh +++ b/scripts/train_512p.sh @@ -1,2 +1,3 @@ +#!/bin/sh ### Using labels only -python train.py --name label2city_512p \ No newline at end of file +pix2pixhd-train --name label2city_512p diff --git a/scripts/train_512p_feat.sh b/scripts/train_512p_feat.sh index 9d4859c8..d6197826 100755 --- a/scripts/train_512p_feat.sh +++ b/scripts/train_512p_feat.sh @@ -1,2 +1,3 @@ +#!/bin/sh ### Adding instances and encoded features -python train.py --name label2city_512p_feat --instance_feat \ No newline at end of file +pix2pixhd-train --name label2city_512p_feat --instance_feat diff --git a/scripts/train_512p_fp16.sh b/scripts/train_512p_fp16.sh index 2bd5e070..d11d145b 100755 --- a/scripts/train_512p_fp16.sh +++ b/scripts/train_512p_fp16.sh @@ -1,2 +1,3 @@ +#!/bin/sh ### Using labels only - python -m torch.distributed.launch train.py --name label2city_512p --fp16 \ No newline at end of file +python -m torch.distributed.launch pix2pixhd/train.py --name label2city_512p --fp16 diff --git a/scripts/train_512p_fp16_multigpu.sh b/scripts/train_512p_fp16_multigpu.sh index 0d9686c8..a7f52181 100755 --- a/scripts/train_512p_fp16_multigpu.sh +++ b/scripts/train_512p_fp16_multigpu.sh @@ -1,2 +1,3 @@ +#!/bin/sh ######## Multi-GPU training example ####### -python -m torch.distributed.launch train.py --name label2city_512p --batchSize 8 --gpu_ids 0,1,2,3,4,5,6,7 --fp16 \ No newline at end of file +python -m torch.distributed.launch pix2pixhd/train.py --name label2city_512p --batchSize 8 --gpu_ids 0,1,2,3,4,5,6,7 --fp16 diff --git a/scripts/train_512p_multigpu.sh b/scripts/train_512p_multigpu.sh index 16f0a1a8..50c2a870 100755 --- a/scripts/train_512p_multigpu.sh +++ b/scripts/train_512p_multigpu.sh @@ -1,2 +1,3 @@ +#!/bin/sh ######## Multi-GPU training example ####### -python train.py --name label2city_512p --batchSize 8 --gpu_ids 0,1,2,3,4,5,6,7 \ No newline at end of file +pix2pixhd-train --name label2city_512p --batchSize 8 --gpu_ids 0,1,2,3,4,5,6,7 diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..ca93778e --- /dev/null +++ b/setup.py @@ -0,0 +1,34 @@ +""" +Installs scripts: + - pix2pixhd-train + - pix2pixhd-test + - pix2pixhd-encode-features + - pix2pixhd-precompute-feature-maps +""" +import codecs +from setuptools import setup, find_packages + +with codecs.open('README.md', encoding='utf-8') as f: + README = f.read() + +setup( + name='pix2pixhd', + version='1.0', + description='Synthesizing and manipulating 2048x1024 images with conditional GANs', + long_description=README, + long_description_content_type='text/markdown', + author='Ting-Chun Wang, Ming-Yu Liu, Jun-Yan Zhu, Andrew Tao, Jan Kautz, Bryan Catanzaro', + author_email='tingchunw@nvidia.com, https://mingyuliu.net, jan@jankautz.com, junyanz@cs.cmu.edu, bcatanzaro@acm.org', + url='https://github.com/NVIDIA/pix2pixHD', + license='BSD', + packages=find_packages(), + install_requires=open('requirements.txt').read().split('\n'), + entry_points={ + 'console_scripts': [ + 'pix2pixhd-train=pix2pixhd.train:main', + 'pix2pixhd-test=pix2pixhd.test:main', + 'pix2pixhd-encode-features=pix2pixhd.encode_features:main', + 'pix2pixhd-precompute-feature-maps=pix2pixhd.precompute_feature_maps:main', + ] + }, +) From 77c57b49effe8a88c98756a5f321fda9f9f3f72d Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 23 Mar 2022 23:41:43 +0100 Subject: [PATCH 10/14] convert EOL from DOS to UNIX --- README.md | 296 +++++++++++++-------------- pix2pixhd/encode_features.py | 120 +++++------ pix2pixhd/precompute_feature_maps.py | 78 +++---- scripts/test_1024p_feat.sh | 10 +- scripts/test_512p.sh | 8 +- scripts/test_512p_feat.sh | 12 +- scripts/train_1024p_12G.sh | 10 +- scripts/train_1024p_24G.sh | 10 +- scripts/train_1024p_feat_12G.sh | 14 +- scripts/train_1024p_feat_24G.sh | 14 +- scripts/train_512p.sh | 6 +- scripts/train_512p_feat.sh | 6 +- scripts/train_512p_fp16.sh | 6 +- scripts/train_512p_fp16_multigpu.sh | 6 +- scripts/train_512p_multigpu.sh | 6 +- 15 files changed, 301 insertions(+), 301 deletions(-) diff --git a/README.md b/README.md index 2e2c8948..2c94a574 100755 --- a/README.md +++ b/README.md @@ -1,148 +1,148 @@ - - -



- -# pix2pixHD -### [Project](https://tcwang0509.github.io/pix2pixHD/) | [Youtube](https://youtu.be/3AIpPlzM_qs) | [Paper](https://arxiv.org/pdf/1711.11585.pdf)
-Pytorch implementation of our method for high-resolution (e.g. 2048x1024) photorealistic image-to-image translation. It can be used for turning semantic label maps into photo-realistic images or synthesizing portraits from face label maps.

-[High-Resolution Image Synthesis and Semantic Manipulation with Conditional GANs](https://tcwang0509.github.io/pix2pixHD/) - [Ting-Chun Wang](https://tcwang0509.github.io/)1, [Ming-Yu Liu](http://mingyuliu.net/)1, [Jun-Yan Zhu](http://people.eecs.berkeley.edu/~junyanz/)2, Andrew Tao1, [Jan Kautz](http://jankautz.com/)1, [Bryan Catanzaro](http://catanzaro.name/)1 - 1NVIDIA Corporation, 2UC Berkeley - In CVPR 2018. - -## Image-to-image translation at 2k/1k resolution -- Our label-to-streetview results -

- - -

-- Interactive editing results -

- - -

-- Additional streetview results -

- - -

-

- - -

- -- Label-to-face and interactive editing results -

- - - -

-

- - - -

- -- Our editing interface -

- - -

- -## Prerequisites -- Linux or macOS -- Python 2 or 3 -- [optionally:] NVIDIA GPU (11G memory or larger) + CUDA cuDNN - -## Getting Started -### Installation -- Create a [virtual environment](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/), and activate it -- Clone this repo: - - git clone https://github.com/NVIDIA/pix2pixHD - pip install pix2pixHD - - - -### Testing -- A few example Cityscapes test images are included in the `datasets` folder. -- Please download the pre-trained Cityscapes model from [here](https://drive.google.com/file/d/1h9SykUnuZul7J3Nbms2QGH1wa85nbN2-/view?usp=sharing) (google drive link), and put it under `./checkpoints/label2city_1024p/` -- Test the model (`bash ./scripts/test_1024p.sh`): -```bash -#!./scripts/test_1024p.sh -pix2pixhd-test --name label2city_1024p --netG local --ngf 32 --resize_or_crop none -``` -The test results will be saved to a html file here: `./results/label2city_1024p/test_latest/index.html`. - -More example scripts can be found in the `scripts` directory. - -For other options, cf. - - pix2pixhd-test --help - - -### Dataset -- We use the Cityscapes dataset. To train a model on the full dataset, please download it from the [official website](https://www.cityscapes-dataset.com/) (registration required). -After downloading, please put it under the `datasets` folder in the same way the example images are provided. - - -### Training -- Train a model at 1024 x 512 resolution (`bash ./scripts/train_512p.sh`): -```bash -#!./scripts/train_512p.sh -pix2pixhd-train --name label2city_512p -``` -- To view training results, please checkout intermediate results in `./checkpoints/label2city_512p/web/index.html`. -If you have tensorflow installed, you can see tensorboard logs in `./checkpoints/label2city_512p/logs` by adding `--tf_log` to the training scripts. - -For other options, cf. - - pix2pixhd-train --help - -### Multi-GPU training -- Train a model using multiple GPUs (`bash ./scripts/train_512p_multigpu.sh`): -```bash -#!./scripts/train_512p_multigpu.sh -pix2pixhd-train --name label2city_512p --batchSize 8 --gpu_ids 0,1,2,3,4,5,6,7 -``` -Note: this is not tested and we trained our model using single GPU only. Please use at your own discretion. - -### Training with Automatic Mixed Precision (AMP) for faster speed -- To train with mixed precision support, please first install apex from: https://github.com/NVIDIA/apex -- You can then train the model by adding `--fp16`. For example, -```bash -#!./scripts/train_512p_fp16.sh -python -m torch.distributed.launch pix2pixhd/train.py --name label2city_512p --fp16 -``` -In our test case, it trains about 80% faster with AMP on a Volta machine. - -### Training at full resolution -- To train the images at full resolution (2048 x 1024) requires a GPU with 24G memory (`bash ./scripts/train_1024p_24G.sh`), or 16G memory if using mixed precision (AMP). -- If only GPUs with 12G memory are available, please use the 12G script (`bash ./scripts/train_1024p_12G.sh`), which will crop the images during training. Performance is not guaranteed using this script. - -### Training with your own dataset -- If you want to train with your own dataset, please generate label maps which are one-channel whose pixel values correspond to the object labels (i.e. 0,1,...,N-1, where N is the number of labels). This is because we need to generate one-hot vectors from the label maps. Please also specity `--label_nc N` during both training and testing. -- If your input is not a label map, please just specify `--label_nc 0` which will directly use the RGB colors as input. The folders should then be named `train_A`, `train_B` instead of `train_label`, `train_img`, where the goal is to translate images from A to B. -- If you don't have instance maps or don't want to use them, please specify `--no_instance`. -- The default setting for preprocessing is `scale_width`, which will scale the width of all training images to `opt.loadSize` (1024) while keeping the aspect ratio. If you want a different setting, please change it by using the `--resize_or_crop` option. For example, `scale_width_and_crop` first resizes the image to have width `opt.loadSize` and then does random cropping of size `(opt.fineSize, opt.fineSize)`. `crop` skips the resizing step and only performs random cropping. If you don't want any preprocessing, please specify `none`, which will do nothing other than making sure the image is divisible by 32. - -## More Training/Test Details -- Flags: see `options/train_options.py` and `options/base_options.py` for all the training flags; see `options/test_options.py` and `options/base_options.py` for all the test flags. -- Instance map: we take in both label maps and instance maps as input. If you don't want to use instance maps, please specify the flag `--no_instance`. - - -## Citation - -If you find this useful for your research, please use the following. - -```bibtex -@inproceedings{wang2018pix2pixHD, - title={High-Resolution Image Synthesis and Semantic Manipulation with Conditional GANs}, - author={Ting-Chun Wang and Ming-Yu Liu and Jun-Yan Zhu and Andrew Tao and Jan Kautz and Bryan Catanzaro}, - booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, - year={2018} -} -``` - -## Acknowledgments -This code borrows heavily from [pytorch-CycleGAN-and-pix2pix](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix). + + +



+ +# pix2pixHD +### [Project](https://tcwang0509.github.io/pix2pixHD/) | [Youtube](https://youtu.be/3AIpPlzM_qs) | [Paper](https://arxiv.org/pdf/1711.11585.pdf)
+Pytorch implementation of our method for high-resolution (e.g. 2048x1024) photorealistic image-to-image translation. It can be used for turning semantic label maps into photo-realistic images or synthesizing portraits from face label maps.

+[High-Resolution Image Synthesis and Semantic Manipulation with Conditional GANs](https://tcwang0509.github.io/pix2pixHD/) + [Ting-Chun Wang](https://tcwang0509.github.io/)1, [Ming-Yu Liu](http://mingyuliu.net/)1, [Jun-Yan Zhu](http://people.eecs.berkeley.edu/~junyanz/)2, Andrew Tao1, [Jan Kautz](http://jankautz.com/)1, [Bryan Catanzaro](http://catanzaro.name/)1 + 1NVIDIA Corporation, 2UC Berkeley + In CVPR 2018. + +## Image-to-image translation at 2k/1k resolution +- Our label-to-streetview results +

+ + +

+- Interactive editing results +

+ + +

+- Additional streetview results +

+ + +

+

+ + +

+ +- Label-to-face and interactive editing results +

+ + + +

+

+ + + +

+ +- Our editing interface +

+ + +

+ +## Prerequisites +- Linux or macOS +- Python 2 or 3 +- [optionally:] NVIDIA GPU (11G memory or larger) + CUDA cuDNN + +## Getting Started +### Installation +- Create a [virtual environment](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/), and activate it +- Clone this repo: + + git clone https://github.com/NVIDIA/pix2pixHD + pip install pix2pixHD + + + +### Testing +- A few example Cityscapes test images are included in the `datasets` folder. +- Please download the pre-trained Cityscapes model from [here](https://drive.google.com/file/d/1h9SykUnuZul7J3Nbms2QGH1wa85nbN2-/view?usp=sharing) (google drive link), and put it under `./checkpoints/label2city_1024p/` +- Test the model (`bash ./scripts/test_1024p.sh`): +```bash +#!./scripts/test_1024p.sh +pix2pixhd-test --name label2city_1024p --netG local --ngf 32 --resize_or_crop none +``` +The test results will be saved to a html file here: `./results/label2city_1024p/test_latest/index.html`. + +More example scripts can be found in the `scripts` directory. + +For other options, cf. + + pix2pixhd-test --help + + +### Dataset +- We use the Cityscapes dataset. To train a model on the full dataset, please download it from the [official website](https://www.cityscapes-dataset.com/) (registration required). +After downloading, please put it under the `datasets` folder in the same way the example images are provided. + + +### Training +- Train a model at 1024 x 512 resolution (`bash ./scripts/train_512p.sh`): +```bash +#!./scripts/train_512p.sh +pix2pixhd-train --name label2city_512p +``` +- To view training results, please checkout intermediate results in `./checkpoints/label2city_512p/web/index.html`. +If you have tensorflow installed, you can see tensorboard logs in `./checkpoints/label2city_512p/logs` by adding `--tf_log` to the training scripts. + +For other options, cf. + + pix2pixhd-train --help + +### Multi-GPU training +- Train a model using multiple GPUs (`bash ./scripts/train_512p_multigpu.sh`): +```bash +#!./scripts/train_512p_multigpu.sh +pix2pixhd-train --name label2city_512p --batchSize 8 --gpu_ids 0,1,2,3,4,5,6,7 +``` +Note: this is not tested and we trained our model using single GPU only. Please use at your own discretion. + +### Training with Automatic Mixed Precision (AMP) for faster speed +- To train with mixed precision support, please first install apex from: https://github.com/NVIDIA/apex +- You can then train the model by adding `--fp16`. For example, +```bash +#!./scripts/train_512p_fp16.sh +python -m torch.distributed.launch pix2pixhd/train.py --name label2city_512p --fp16 +``` +In our test case, it trains about 80% faster with AMP on a Volta machine. + +### Training at full resolution +- To train the images at full resolution (2048 x 1024) requires a GPU with 24G memory (`bash ./scripts/train_1024p_24G.sh`), or 16G memory if using mixed precision (AMP). +- If only GPUs with 12G memory are available, please use the 12G script (`bash ./scripts/train_1024p_12G.sh`), which will crop the images during training. Performance is not guaranteed using this script. + +### Training with your own dataset +- If you want to train with your own dataset, please generate label maps which are one-channel whose pixel values correspond to the object labels (i.e. 0,1,...,N-1, where N is the number of labels). This is because we need to generate one-hot vectors from the label maps. Please also specity `--label_nc N` during both training and testing. +- If your input is not a label map, please just specify `--label_nc 0` which will directly use the RGB colors as input. The folders should then be named `train_A`, `train_B` instead of `train_label`, `train_img`, where the goal is to translate images from A to B. +- If you don't have instance maps or don't want to use them, please specify `--no_instance`. +- The default setting for preprocessing is `scale_width`, which will scale the width of all training images to `opt.loadSize` (1024) while keeping the aspect ratio. If you want a different setting, please change it by using the `--resize_or_crop` option. For example, `scale_width_and_crop` first resizes the image to have width `opt.loadSize` and then does random cropping of size `(opt.fineSize, opt.fineSize)`. `crop` skips the resizing step and only performs random cropping. If you don't want any preprocessing, please specify `none`, which will do nothing other than making sure the image is divisible by 32. + +## More Training/Test Details +- Flags: see `options/train_options.py` and `options/base_options.py` for all the training flags; see `options/test_options.py` and `options/base_options.py` for all the test flags. +- Instance map: we take in both label maps and instance maps as input. If you don't want to use instance maps, please specify the flag `--no_instance`. + + +## Citation + +If you find this useful for your research, please use the following. + +```bibtex +@inproceedings{wang2018pix2pixHD, + title={High-Resolution Image Synthesis and Semantic Manipulation with Conditional GANs}, + author={Ting-Chun Wang and Ming-Yu Liu and Jun-Yan Zhu and Andrew Tao and Jan Kautz and Bryan Catanzaro}, + booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, + year={2018} +} +``` + +## Acknowledgments +This code borrows heavily from [pytorch-CycleGAN-and-pix2pix](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix). diff --git a/pix2pixhd/encode_features.py b/pix2pixhd/encode_features.py index 8558f494..5791ee1f 100755 --- a/pix2pixhd/encode_features.py +++ b/pix2pixhd/encode_features.py @@ -1,60 +1,60 @@ -import os -import numpy as np -from .options.train_options import TrainOptions -from .data.data_loader import CreateDataLoader -from .models.models import create_model - -opt = TrainOptions().parse() -opt.nThreads = 1 -opt.batchSize = 1 -opt.serial_batches = True -opt.no_flip = True -opt.instance_feat = True -opt.continue_train = True - -name = 'features' -save_path = os.path.join(opt.checkpoints_dir, opt.name) - -############ Initialize ######### -data_loader = CreateDataLoader(opt) -dataset = data_loader.load_data() -dataset_size = len(data_loader) -model = create_model(opt) - -########### Encode features ########### -reencode = True -if reencode: - features = {} - for label in range(opt.label_nc): - features[label] = np.zeros((0, opt.feat_num+1)) - for i, data in enumerate(dataset): - feat = model.module.encode_features(data['image'], data['inst']) - for label in range(opt.label_nc): - features[label] = np.append(features[label], feat[label], axis=0) - - print('%d / %d images' % (i+1, dataset_size)) - save_name = os.path.join(save_path, name + '.npy') - np.save(save_name, features) - -############## Clustering ########### -n_clusters = opt.n_clusters -load_name = os.path.join(save_path, name + '.npy') -features = np.load(load_name).item() -from sklearn.cluster import KMeans -centers = {} -for label in range(opt.label_nc): - feat = features[label] - feat = feat[feat[:,-1] > 0.5, :-1] - if feat.shape[0]: - n_clusters = min(feat.shape[0], opt.n_clusters) - kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(feat) - centers[label] = kmeans.cluster_centers_ -save_name = os.path.join(save_path, name + '_clustered_%03d.npy' % opt.n_clusters) -np.save(save_name, centers) -print('saving to %s' % save_name) - -def main(): - pass - -if __name__ == "__main__": - main() +import os +import numpy as np +from .options.train_options import TrainOptions +from .data.data_loader import CreateDataLoader +from .models.models import create_model + +opt = TrainOptions().parse() +opt.nThreads = 1 +opt.batchSize = 1 +opt.serial_batches = True +opt.no_flip = True +opt.instance_feat = True +opt.continue_train = True + +name = 'features' +save_path = os.path.join(opt.checkpoints_dir, opt.name) + +############ Initialize ######### +data_loader = CreateDataLoader(opt) +dataset = data_loader.load_data() +dataset_size = len(data_loader) +model = create_model(opt) + +########### Encode features ########### +reencode = True +if reencode: + features = {} + for label in range(opt.label_nc): + features[label] = np.zeros((0, opt.feat_num+1)) + for i, data in enumerate(dataset): + feat = model.module.encode_features(data['image'], data['inst']) + for label in range(opt.label_nc): + features[label] = np.append(features[label], feat[label], axis=0) + + print('%d / %d images' % (i+1, dataset_size)) + save_name = os.path.join(save_path, name + '.npy') + np.save(save_name, features) + +############## Clustering ########### +n_clusters = opt.n_clusters +load_name = os.path.join(save_path, name + '.npy') +features = np.load(load_name).item() +from sklearn.cluster import KMeans +centers = {} +for label in range(opt.label_nc): + feat = features[label] + feat = feat[feat[:,-1] > 0.5, :-1] + if feat.shape[0]: + n_clusters = min(feat.shape[0], opt.n_clusters) + kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(feat) + centers[label] = kmeans.cluster_centers_ +save_name = os.path.join(save_path, name + '_clustered_%03d.npy' % opt.n_clusters) +np.save(save_name, centers) +print('saving to %s' % save_name) + +def main(): + pass + +if __name__ == "__main__": + main() diff --git a/pix2pixhd/precompute_feature_maps.py b/pix2pixhd/precompute_feature_maps.py index 170c504f..10205385 100755 --- a/pix2pixhd/precompute_feature_maps.py +++ b/pix2pixhd/precompute_feature_maps.py @@ -1,39 +1,39 @@ -import os -from torch.autograd import Variable -import torch.nn as nn -from .options.train_options import TrainOptions -from .data.data_loader import CreateDataLoader -from .models.models import create_model -from .util import util - -opt = TrainOptions().parse() -opt.nThreads = 1 -opt.batchSize = 1 -opt.serial_batches = True -opt.no_flip = True -opt.instance_feat = True - -name = 'features' -save_path = os.path.join(opt.checkpoints_dir, opt.name) - -############ Initialize ######### -data_loader = CreateDataLoader(opt) -dataset = data_loader.load_data() -dataset_size = len(data_loader) -model = create_model(opt) -util.mkdirs(os.path.join(opt.dataroot, opt.phase + '_feat')) - -######## Save precomputed feature maps for 1024p training ####### -for i, data in enumerate(dataset): - print('%d / %d images' % (i+1, dataset_size)) - feat_map = model.module.netE.forward(Variable(data['image'].cuda(), volatile=True), data['inst'].cuda()) - feat_map = nn.Upsample(scale_factor=2, mode='nearest')(feat_map) - image_numpy = util.tensor2im(feat_map.data[0]) - save_path = data['path'][0].replace('/train_label/', '/train_feat/') - util.save_image(image_numpy, save_path) - -def main(): - pass - -if __name__ == "__main__": - main() +import os +from torch.autograd import Variable +import torch.nn as nn +from .options.train_options import TrainOptions +from .data.data_loader import CreateDataLoader +from .models.models import create_model +from .util import util + +opt = TrainOptions().parse() +opt.nThreads = 1 +opt.batchSize = 1 +opt.serial_batches = True +opt.no_flip = True +opt.instance_feat = True + +name = 'features' +save_path = os.path.join(opt.checkpoints_dir, opt.name) + +############ Initialize ######### +data_loader = CreateDataLoader(opt) +dataset = data_loader.load_data() +dataset_size = len(data_loader) +model = create_model(opt) +util.mkdirs(os.path.join(opt.dataroot, opt.phase + '_feat')) + +######## Save precomputed feature maps for 1024p training ####### +for i, data in enumerate(dataset): + print('%d / %d images' % (i+1, dataset_size)) + feat_map = model.module.netE.forward(Variable(data['image'].cuda(), volatile=True), data['inst'].cuda()) + feat_map = nn.Upsample(scale_factor=2, mode='nearest')(feat_map) + image_numpy = util.tensor2im(feat_map.data[0]) + save_path = data['path'][0].replace('/train_label/', '/train_feat/') + util.save_image(image_numpy, save_path) + +def main(): + pass + +if __name__ == "__main__": + main() diff --git a/scripts/test_1024p_feat.sh b/scripts/test_1024p_feat.sh index 278bfb23..3cbfb22e 100755 --- a/scripts/test_1024p_feat.sh +++ b/scripts/test_1024p_feat.sh @@ -1,5 +1,5 @@ -################################ Testing ################################ -# first precompute and cluster all features -pix2pix-encode-features --name label2city_1024p_feat --netG local --ngf 32 --resize_or_crop none; -# use instance-wise features -pix2pixhd-test --name label2city_1024p_feat ---netG local --ngf 32 --resize_or_crop none --instance_feat +################################ Testing ################################ +# first precompute and cluster all features +pix2pix-encode-features --name label2city_1024p_feat --netG local --ngf 32 --resize_or_crop none; +# use instance-wise features +pix2pixhd-test --name label2city_1024p_feat ---netG local --ngf 32 --resize_or_crop none --instance_feat diff --git a/scripts/test_512p.sh b/scripts/test_512p.sh index 695fac17..25525fb6 100755 --- a/scripts/test_512p.sh +++ b/scripts/test_512p.sh @@ -1,4 +1,4 @@ -#!/bin/sh -################################ Testing ################################ -# labels only -pix2pixhd-test --name label2city_512p +#!/bin/sh +################################ Testing ################################ +# labels only +pix2pixhd-test --name label2city_512p diff --git a/scripts/test_512p_feat.sh b/scripts/test_512p_feat.sh index 35117114..909a83d1 100755 --- a/scripts/test_512p_feat.sh +++ b/scripts/test_512p_feat.sh @@ -1,6 +1,6 @@ -#!/bin/sh -################################ Testing ################################ -# first precompute and cluster all features -pix2pixhd-encode-features --name label2city_512p_feat; -# use instance-wise features -pix2pixhd-test --name label2city_512p_feat --instance_feat +#!/bin/sh +################################ Testing ################################ +# first precompute and cluster all features +pix2pixhd-encode-features --name label2city_512p_feat; +# use instance-wise features +pix2pixhd-test --name label2city_512p_feat --instance_feat diff --git a/scripts/train_1024p_12G.sh b/scripts/train_1024p_12G.sh index 21cce766..4bf30bfc 100755 --- a/scripts/train_1024p_12G.sh +++ b/scripts/train_1024p_12G.sh @@ -1,5 +1,5 @@ -#!/bin/sh -############## To train images at 2048 x 1024 resolution after training 1024 x 512 resolution models ############# -##### Using GPUs with 12G memory (not tested) -# Using labels only -pix2pixhd-train --name label2city_1024p --netG local --ngf 32 --num_D 3 --load_pretrain checkpoints/label2city_512p/ --niter_fix_global 20 --resize_or_crop crop --fineSize 1024 +#!/bin/sh +############## To train images at 2048 x 1024 resolution after training 1024 x 512 resolution models ############# +##### Using GPUs with 12G memory (not tested) +# Using labels only +pix2pixhd-train --name label2city_1024p --netG local --ngf 32 --num_D 3 --load_pretrain checkpoints/label2city_512p/ --niter_fix_global 20 --resize_or_crop crop --fineSize 1024 diff --git a/scripts/train_1024p_24G.sh b/scripts/train_1024p_24G.sh index 8667f112..07e6bdb0 100755 --- a/scripts/train_1024p_24G.sh +++ b/scripts/train_1024p_24G.sh @@ -1,5 +1,5 @@ -#!/bin/sh -############## To train images at 2048 x 1024 resolution after training 1024 x 512 resolution models ############# -######## Using GPUs with 24G memory -# Using labels only -pix2pixhd-train --name label2city_1024p --netG local --ngf 32 --num_D 3 --load_pretrain checkpoints/label2city_512p/ --niter 50 --niter_decay 50 --niter_fix_global 10 --resize_or_crop none +#!/bin/sh +############## To train images at 2048 x 1024 resolution after training 1024 x 512 resolution models ############# +######## Using GPUs with 24G memory +# Using labels only +pix2pixhd-train --name label2city_1024p --netG local --ngf 32 --num_D 3 --load_pretrain checkpoints/label2city_512p/ --niter 50 --niter_decay 50 --niter_fix_global 10 --resize_or_crop none diff --git a/scripts/train_1024p_feat_12G.sh b/scripts/train_1024p_feat_12G.sh index 6ac3401a..24f16da9 100755 --- a/scripts/train_1024p_feat_12G.sh +++ b/scripts/train_1024p_feat_12G.sh @@ -1,7 +1,7 @@ -#!/bin/sh -############## To train images at 2048 x 1024 resolution after training 1024 x 512 resolution models ############# -##### Using GPUs with 12G memory (not tested) -# First precompute feature maps and save them -pix2pixhd-precompute-feature-maps --name label2city_512p_feat; -# Adding instances and encoded features -pix2pixhd-train --name label2city_1024p_feat --netG local --ngf 32 --num_D 3 --load_pretrain checkpoints/label2city_512p_feat/ --niter_fix_global 20 --resize_or_crop crop --fineSize 896 --instance_feat --load_features +#!/bin/sh +############## To train images at 2048 x 1024 resolution after training 1024 x 512 resolution models ############# +##### Using GPUs with 12G memory (not tested) +# First precompute feature maps and save them +pix2pixhd-precompute-feature-maps --name label2city_512p_feat; +# Adding instances and encoded features +pix2pixhd-train --name label2city_1024p_feat --netG local --ngf 32 --num_D 3 --load_pretrain checkpoints/label2city_512p_feat/ --niter_fix_global 20 --resize_or_crop crop --fineSize 896 --instance_feat --load_features diff --git a/scripts/train_1024p_feat_24G.sh b/scripts/train_1024p_feat_24G.sh index 2666cedb..f6c1f26c 100755 --- a/scripts/train_1024p_feat_24G.sh +++ b/scripts/train_1024p_feat_24G.sh @@ -1,7 +1,7 @@ -#!/bin/sh -############## To train images at 2048 x 1024 resolution after training 1024 x 512 resolution models ############# -######## Using GPUs with 24G memory -# First precompute feature maps and save them -pix2pixhd-precompute-feature-maps --name label2city_512p_feat; -# Adding instances and encoded features -pix2pixhd-train --name label2city_1024p_feat --netG local --ngf 32 --num_D 3 --load_pretrain checkpoints/label2city_512p_feat/ --niter 50 --niter_decay 50 --niter_fix_global 10 --resize_or_crop none --instance_feat --load_features +#!/bin/sh +############## To train images at 2048 x 1024 resolution after training 1024 x 512 resolution models ############# +######## Using GPUs with 24G memory +# First precompute feature maps and save them +pix2pixhd-precompute-feature-maps --name label2city_512p_feat; +# Adding instances and encoded features +pix2pixhd-train --name label2city_1024p_feat --netG local --ngf 32 --num_D 3 --load_pretrain checkpoints/label2city_512p_feat/ --niter 50 --niter_decay 50 --niter_fix_global 10 --resize_or_crop none --instance_feat --load_features diff --git a/scripts/train_512p.sh b/scripts/train_512p.sh index 7b0570fd..6d750b3e 100755 --- a/scripts/train_512p.sh +++ b/scripts/train_512p.sh @@ -1,3 +1,3 @@ -#!/bin/sh -### Using labels only -pix2pixhd-train --name label2city_512p +#!/bin/sh +### Using labels only +pix2pixhd-train --name label2city_512p diff --git a/scripts/train_512p_feat.sh b/scripts/train_512p_feat.sh index d6197826..03ebd83a 100755 --- a/scripts/train_512p_feat.sh +++ b/scripts/train_512p_feat.sh @@ -1,3 +1,3 @@ -#!/bin/sh -### Adding instances and encoded features -pix2pixhd-train --name label2city_512p_feat --instance_feat +#!/bin/sh +### Adding instances and encoded features +pix2pixhd-train --name label2city_512p_feat --instance_feat diff --git a/scripts/train_512p_fp16.sh b/scripts/train_512p_fp16.sh index d11d145b..8b024575 100755 --- a/scripts/train_512p_fp16.sh +++ b/scripts/train_512p_fp16.sh @@ -1,3 +1,3 @@ -#!/bin/sh -### Using labels only -python -m torch.distributed.launch pix2pixhd/train.py --name label2city_512p --fp16 +#!/bin/sh +### Using labels only +python -m torch.distributed.launch pix2pixhd/train.py --name label2city_512p --fp16 diff --git a/scripts/train_512p_fp16_multigpu.sh b/scripts/train_512p_fp16_multigpu.sh index a7f52181..a807eb3d 100755 --- a/scripts/train_512p_fp16_multigpu.sh +++ b/scripts/train_512p_fp16_multigpu.sh @@ -1,3 +1,3 @@ -#!/bin/sh -######## Multi-GPU training example ####### -python -m torch.distributed.launch pix2pixhd/train.py --name label2city_512p --batchSize 8 --gpu_ids 0,1,2,3,4,5,6,7 --fp16 +#!/bin/sh +######## Multi-GPU training example ####### +python -m torch.distributed.launch pix2pixhd/train.py --name label2city_512p --batchSize 8 --gpu_ids 0,1,2,3,4,5,6,7 --fp16 diff --git a/scripts/train_512p_multigpu.sh b/scripts/train_512p_multigpu.sh index 50c2a870..1530eba7 100755 --- a/scripts/train_512p_multigpu.sh +++ b/scripts/train_512p_multigpu.sh @@ -1,3 +1,3 @@ -#!/bin/sh -######## Multi-GPU training example ####### -pix2pixhd-train --name label2city_512p --batchSize 8 --gpu_ids 0,1,2,3,4,5,6,7 +#!/bin/sh +######## Multi-GPU training example ####### +pix2pixhd-train --name label2city_512p --batchSize 8 --gpu_ids 0,1,2,3,4,5,6,7 From 5804159023fff156949c267e69e53cd300d5cfea Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 23 Mar 2022 23:50:11 +0100 Subject: [PATCH 11/14] remove trailing whitespace --- pix2pixhd/data/aligned_dataset.py | 28 ++-- pix2pixhd/data/base_data_loader.py | 5 +- pix2pixhd/data/base_dataset.py | 16 +-- pix2pixhd/encode_features.py | 14 +- pix2pixhd/models/base_model.py | 18 +-- pix2pixhd/models/networks.py | 138 +++++++++---------- pix2pixhd/models/pix2pixHD_model.py | 118 ++++++++--------- pix2pixhd/models/ui_model.py | 190 +++++++++++++-------------- pix2pixhd/options/base_options.py | 38 +++--- pix2pixhd/options/test_options.py | 4 +- pix2pixhd/options/train_options.py | 10 +- pix2pixhd/precompute_feature_maps.py | 6 +- pix2pixhd/run_engine.py | 14 +- pix2pixhd/test.py | 10 +- pix2pixhd/train.py | 38 +++--- pix2pixhd/util/util.py | 8 +- 16 files changed, 327 insertions(+), 328 deletions(-) diff --git a/pix2pixhd/data/aligned_dataset.py b/pix2pixhd/data/aligned_dataset.py index 63ff6d35..756dc0ad 100755 --- a/pix2pixhd/data/aligned_dataset.py +++ b/pix2pixhd/data/aligned_dataset.py @@ -6,7 +6,7 @@ class AlignedDataset(BaseDataset): def initialize(self, opt): self.opt = opt - self.root = opt.dataroot + self.root = opt.dataroot ### input A (label maps) dir_A = '_A' if self.opt.label_nc == 0 else '_label' @@ -16,7 +16,7 @@ def initialize(self, opt): ### input B (real images) if opt.isTrain or opt.use_encoded_image: dir_B = '_B' if self.opt.label_nc == 0 else '_img' - self.dir_B = os.path.join(opt.dataroot, opt.phase + dir_B) + self.dir_B = os.path.join(opt.dataroot, opt.phase + dir_B) self.B_paths = sorted(make_dataset(self.dir_B)) ### instance maps @@ -25,17 +25,17 @@ def initialize(self, opt): self.inst_paths = sorted(make_dataset(self.dir_inst)) ### load precomputed instance-wise encoded features - if opt.load_features: + if opt.load_features: self.dir_feat = os.path.join(opt.dataroot, opt.phase + '_feat') print('----------- loading features from %s ----------' % self.dir_feat) self.feat_paths = sorted(make_dataset(self.dir_feat)) - self.dataset_size = len(self.A_paths) - - def __getitem__(self, index): + self.dataset_size = len(self.A_paths) + + def __getitem__(self, index): ### input A (label maps) - A_path = self.A_paths[index] - A = Image.open(A_path) + A_path = self.A_paths[index] + A = Image.open(A_path) params = get_params(self.opt, A.size) if self.opt.label_nc == 0: transform_A = get_transform(self.opt, params) @@ -47,24 +47,24 @@ def __getitem__(self, index): B_tensor = inst_tensor = feat_tensor = 0 ### input B (real images) if self.opt.isTrain or self.opt.use_encoded_image: - B_path = self.B_paths[index] + B_path = self.B_paths[index] B = Image.open(B_path).convert('RGB') - transform_B = get_transform(self.opt, params) + transform_B = get_transform(self.opt, params) B_tensor = transform_B(B) - ### if using instance maps + ### if using instance maps if not self.opt.no_instance: inst_path = self.inst_paths[index] inst = Image.open(inst_path) inst_tensor = transform_A(inst) if self.opt.load_features: - feat_path = self.feat_paths[index] + feat_path = self.feat_paths[index] feat = Image.open(feat_path).convert('RGB') norm = normalize() - feat_tensor = norm(transform_A(feat)) + feat_tensor = norm(transform_A(feat)) - input_dict = {'label': A_tensor, 'inst': inst_tensor, 'image': B_tensor, + input_dict = {'label': A_tensor, 'inst': inst_tensor, 'image': B_tensor, 'feat': feat_tensor, 'path': A_path} return input_dict diff --git a/pix2pixhd/data/base_data_loader.py b/pix2pixhd/data/base_data_loader.py index b94e40e2..931e9ccb 100755 --- a/pix2pixhd/data/base_data_loader.py +++ b/pix2pixhd/data/base_data_loader.py @@ -2,7 +2,7 @@ class BaseDataLoader(): def __init__(self): pass - + def initialize(self, opt): self.opt = opt pass @@ -10,5 +10,4 @@ def initialize(self, opt): def load_data(self): return None - - + diff --git a/pix2pixhd/data/base_dataset.py b/pix2pixhd/data/base_dataset.py index ee08b499..3cd0feb2 100755 --- a/pix2pixhd/data/base_dataset.py +++ b/pix2pixhd/data/base_dataset.py @@ -19,14 +19,14 @@ def get_params(opt, size): new_h = h new_w = w if opt.resize_or_crop == 'resize_and_crop': - new_h = new_w = opt.loadSize + new_h = new_w = opt.loadSize elif opt.resize_or_crop == 'scale_width_and_crop': new_w = opt.loadSize new_h = opt.loadSize * h // w x = random.randint(0, np.maximum(0, new_w - opt.fineSize)) y = random.randint(0, np.maximum(0, new_h - opt.fineSize)) - + flip = random.random() > 0.5 return {'crop_pos': (x, y), 'flip': flip} @@ -39,7 +39,7 @@ def get_transform(opt, params, method=Image.BICUBIC, normalize=True): transform_list.append(transforms.Resize(osize, interpolation=method)) elif 'scale_width' in opt.resize_or_crop: transform_list.append(transforms.Lambda(lambda img: __scale_width(img, opt.loadSize, method))) - + if 'crop' in opt.resize_or_crop: transform_list.append(transforms.Lambda(lambda img: __crop(img, params['crop_pos'], opt.fineSize))) @@ -59,11 +59,11 @@ def get_transform(opt, params, method=Image.BICUBIC, normalize=True): (0.5, 0.5, 0.5))] return transforms.Compose(transform_list) -def normalize(): +def normalize(): return transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) def __make_power_2(img, base, method=Image.BICUBIC): - ow, oh = img.size + ow, oh = img.size h = int(round(oh / base) * base) w = int(round(ow / base) * base) if (h == oh) and (w == ow): @@ -73,16 +73,16 @@ def __make_power_2(img, base, method=Image.BICUBIC): def __scale_width(img, target_width, method=Image.BICUBIC): ow, oh = img.size if (ow == target_width): - return img + return img w = target_width - h = int(target_width * oh / ow) + h = int(target_width * oh / ow) return img.resize((w, h), method) def __crop(img, pos, size): ow, oh = img.size x1, y1 = pos tw = th = size - if (ow > tw or oh > th): + if (ow > tw or oh > th): return img.crop((x1, y1, x1 + tw, y1 + th)) return img diff --git a/pix2pixhd/encode_features.py b/pix2pixhd/encode_features.py index 5791ee1f..e9d51810 100755 --- a/pix2pixhd/encode_features.py +++ b/pix2pixhd/encode_features.py @@ -6,8 +6,8 @@ opt = TrainOptions().parse() opt.nThreads = 1 -opt.batchSize = 1 -opt.serial_batches = True +opt.batchSize = 1 +opt.serial_batches = True opt.no_flip = True opt.instance_feat = True opt.continue_train = True @@ -27,12 +27,12 @@ features = {} for label in range(opt.label_nc): features[label] = np.zeros((0, opt.feat_num+1)) - for i, data in enumerate(dataset): + for i, data in enumerate(dataset): feat = model.module.encode_features(data['image'], data['inst']) for label in range(opt.label_nc): - features[label] = np.append(features[label], feat[label], axis=0) - - print('%d / %d images' % (i+1, dataset_size)) + features[label] = np.append(features[label], feat[label], axis=0) + + print('%d / %d images' % (i+1, dataset_size)) save_name = os.path.join(save_path, name + '.npy') np.save(save_name, features) @@ -44,7 +44,7 @@ centers = {} for label in range(opt.label_nc): feat = features[label] - feat = feat[feat[:,-1] > 0.5, :-1] + feat = feat[feat[:,-1] > 0.5, :-1] if feat.shape[0]: n_clusters = min(feat.shape[0], opt.n_clusters) kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(feat) diff --git a/pix2pixhd/models/base_model.py b/pix2pixhd/models/base_model.py index 486a9155..27bec097 100755 --- a/pix2pixhd/models/base_model.py +++ b/pix2pixhd/models/base_model.py @@ -47,11 +47,11 @@ def save_network(self, network, network_label, epoch_label, gpu_ids): network.cuda() # helper loading function that can be used by subclasses - def load_network(self, network, network_label, epoch_label, save_dir=''): + def load_network(self, network, network_label, epoch_label, save_dir=''): save_filename = '%s_net_%s.pth' % (epoch_label, network_label) if not save_dir: save_dir = self.save_dir - save_path = os.path.join(save_dir, save_filename) + save_path = os.path.join(save_dir, save_filename) if not os.path.isfile(save_path): print('%s not exists yet!' % save_path) if network_label == 'G': @@ -60,17 +60,17 @@ def load_network(self, network, network_label, epoch_label, save_dir=''): #network.load_state_dict(torch.load(save_path)) try: network.load_state_dict(torch.load(save_path)) - except: - pretrained_dict = torch.load(save_path) + except: + pretrained_dict = torch.load(save_path) model_dict = network.state_dict() try: - pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} + pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} network.load_state_dict(pretrained_dict) if self.opt.verbose: print('Pretrained network %s has excessive layers; Only loading layers that are used' % network_label) except: print('Pretrained network %s has fewer layers; The following are not initialized:' % network_label) - for k, v in pretrained_dict.items(): + for k, v in pretrained_dict.items(): if v.size() == model_dict[k].size(): model_dict[k] = v @@ -78,14 +78,14 @@ def load_network(self, network, network_label, epoch_label, save_dir=''): not_initialized = set() else: from sets import Set - not_initialized = Set() + not_initialized = Set() for k, v in model_dict.items(): if k not in pretrained_dict or v.size() != pretrained_dict[k].size(): not_initialized.add(k.split('.')[0]) - + print(sorted(not_initialized)) - network.load_state_dict(model_dict) + network.load_state_dict(model_dict) def update_learning_rate(): pass diff --git a/pix2pixhd/models/networks.py b/pix2pixhd/models/networks.py index 512c8eac..e80205ad 100755 --- a/pix2pixhd/models/networks.py +++ b/pix2pixhd/models/networks.py @@ -25,13 +25,13 @@ def get_norm_layer(norm_type='instance'): raise NotImplementedError('normalization layer [%s] is not found' % norm_type) return norm_layer -def define_G(input_nc, output_nc, ngf, netG, n_downsample_global=3, n_blocks_global=9, n_local_enhancers=1, - n_blocks_local=3, norm='instance', gpu_ids=[]): - norm_layer = get_norm_layer(norm_type=norm) - if netG == 'global': - netG = GlobalGenerator(input_nc, output_nc, ngf, n_downsample_global, n_blocks_global, norm_layer) - elif netG == 'local': - netG = LocalEnhancer(input_nc, output_nc, ngf, n_downsample_global, n_blocks_global, +def define_G(input_nc, output_nc, ngf, netG, n_downsample_global=3, n_blocks_global=9, n_local_enhancers=1, + n_blocks_local=3, norm='instance', gpu_ids=[]): + norm_layer = get_norm_layer(norm_type=norm) + if netG == 'global': + netG = GlobalGenerator(input_nc, output_nc, ngf, n_downsample_global, n_blocks_global, norm_layer) + elif netG == 'local': + netG = LocalEnhancer(input_nc, output_nc, ngf, n_downsample_global, n_blocks_global, n_local_enhancers, n_blocks_local, norm_layer) elif netG == 'encoder': netG = Encoder(input_nc, output_nc, ngf, n_downsample_global, norm_layer) @@ -39,14 +39,14 @@ def define_G(input_nc, output_nc, ngf, netG, n_downsample_global=3, n_blocks_glo raise(NotImplementedError('generator [%s] not implemented!' % netG)) #print(netG) if len(gpu_ids) > 0: - assert(torch.cuda.is_available()) + assert(torch.cuda.is_available()) netG.cuda(gpu_ids[0]) netG.apply(weights_init) return netG -def define_D(input_nc, ndf, n_layers_D, norm='instance', use_sigmoid=False, num_D=1, getIntermFeat=False, gpu_ids=[]): - norm_layer = get_norm_layer(norm_type=norm) - netD = MultiscaleDiscriminator(input_nc, ndf, n_layers_D, norm_layer, use_sigmoid, num_D, getIntermFeat) +def define_D(input_nc, ndf, n_layers_D, norm='instance', use_sigmoid=False, num_D=1, getIntermFeat=False, gpu_ids=[]): + norm_layer = get_norm_layer(norm_type=norm) + netD = MultiscaleDiscriminator(input_nc, ndf, n_layers_D, norm_layer, use_sigmoid, num_D, getIntermFeat) #print(netD) if len(gpu_ids) > 0: assert(torch.cuda.is_available()) @@ -106,46 +106,46 @@ def __call__(self, input, target_is_real): target_tensor = self.get_target_tensor(pred, target_is_real) loss += self.loss(pred, target_tensor) return loss - else: + else: target_tensor = self.get_target_tensor(input[-1], target_is_real) return self.loss(input[-1], target_tensor) class VGGLoss(nn.Module): def __init__(self, gpu_ids): - super(VGGLoss, self).__init__() + super(VGGLoss, self).__init__() self.vgg = Vgg19().cuda() self.criterion = nn.L1Loss() - self.weights = [1.0/32, 1.0/16, 1.0/8, 1.0/4, 1.0] + self.weights = [1.0/32, 1.0/16, 1.0/8, 1.0/4, 1.0] - def forward(self, x, y): + def forward(self, x, y): x_vgg, y_vgg = self.vgg(x), self.vgg(y) loss = 0 for i in range(len(x_vgg)): - loss += self.weights[i] * self.criterion(x_vgg[i], y_vgg[i].detach()) + loss += self.weights[i] * self.criterion(x_vgg[i], y_vgg[i].detach()) return loss ############################################################################## # Generator ############################################################################## class LocalEnhancer(nn.Module): - def __init__(self, input_nc, output_nc, ngf=32, n_downsample_global=3, n_blocks_global=9, - n_local_enhancers=1, n_blocks_local=3, norm_layer=nn.BatchNorm2d, padding_type='reflect'): + def __init__(self, input_nc, output_nc, ngf=32, n_downsample_global=3, n_blocks_global=9, + n_local_enhancers=1, n_blocks_local=3, norm_layer=nn.BatchNorm2d, padding_type='reflect'): super(LocalEnhancer, self).__init__() self.n_local_enhancers = n_local_enhancers - - ###### global generator model ##### + + ###### global generator model ##### ngf_global = ngf * (2**n_local_enhancers) - model_global = GlobalGenerator(input_nc, output_nc, ngf_global, n_downsample_global, n_blocks_global, norm_layer).model - model_global = [model_global[i] for i in range(len(model_global)-3)] # get rid of final convolution layers - self.model = nn.Sequential(*model_global) + model_global = GlobalGenerator(input_nc, output_nc, ngf_global, n_downsample_global, n_blocks_global, norm_layer).model + model_global = [model_global[i] for i in range(len(model_global)-3)] # get rid of final convolution layers + self.model = nn.Sequential(*model_global) ###### local enhancer layers ##### for n in range(1, n_local_enhancers+1): - ### downsample + ### downsample ngf_global = ngf * (2**(n_local_enhancers-n)) - model_downsample = [nn.ReflectionPad2d(3), nn.Conv2d(input_nc, ngf_global, kernel_size=7, padding=0), + model_downsample = [nn.ReflectionPad2d(3), nn.Conv2d(input_nc, ngf_global, kernel_size=7, padding=0), norm_layer(ngf_global), nn.ReLU(True), - nn.Conv2d(ngf_global, ngf_global * 2, kernel_size=3, stride=2, padding=1), + nn.Conv2d(ngf_global, ngf_global * 2, kernel_size=3, stride=2, padding=1), norm_layer(ngf_global * 2), nn.ReLU(True)] ### residual blocks model_upsample = [] @@ -153,40 +153,40 @@ def __init__(self, input_nc, output_nc, ngf=32, n_downsample_global=3, n_blocks_ model_upsample += [ResnetBlock(ngf_global * 2, padding_type=padding_type, norm_layer=norm_layer)] ### upsample - model_upsample += [nn.ConvTranspose2d(ngf_global * 2, ngf_global, kernel_size=3, stride=2, padding=1, output_padding=1), - norm_layer(ngf_global), nn.ReLU(True)] + model_upsample += [nn.ConvTranspose2d(ngf_global * 2, ngf_global, kernel_size=3, stride=2, padding=1, output_padding=1), + norm_layer(ngf_global), nn.ReLU(True)] ### final convolution - if n == n_local_enhancers: - model_upsample += [nn.ReflectionPad2d(3), nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0), nn.Tanh()] - + if n == n_local_enhancers: + model_upsample += [nn.ReflectionPad2d(3), nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0), nn.Tanh()] + setattr(self, 'model'+str(n)+'_1', nn.Sequential(*model_downsample)) - setattr(self, 'model'+str(n)+'_2', nn.Sequential(*model_upsample)) - + setattr(self, 'model'+str(n)+'_2', nn.Sequential(*model_upsample)) + self.downsample = nn.AvgPool2d(3, stride=2, padding=[1, 1], count_include_pad=False) - def forward(self, input): + def forward(self, input): ### create input pyramid input_downsampled = [input] for i in range(self.n_local_enhancers): input_downsampled.append(self.downsample(input_downsampled[-1])) ### output at coarest level - output_prev = self.model(input_downsampled[-1]) + output_prev = self.model(input_downsampled[-1]) ### build up one layer at a time for n_local_enhancers in range(1, self.n_local_enhancers+1): model_downsample = getattr(self, 'model'+str(n_local_enhancers)+'_1') - model_upsample = getattr(self, 'model'+str(n_local_enhancers)+'_2') - input_i = input_downsampled[self.n_local_enhancers-n_local_enhancers] + model_upsample = getattr(self, 'model'+str(n_local_enhancers)+'_2') + input_i = input_downsampled[self.n_local_enhancers-n_local_enhancers] output_prev = model_upsample(model_downsample(input_i) + output_prev) return output_prev class GlobalGenerator(nn.Module): - def __init__(self, input_nc, output_nc, ngf=64, n_downsampling=3, n_blocks=9, norm_layer=nn.BatchNorm2d, + def __init__(self, input_nc, output_nc, ngf=64, n_downsampling=3, n_blocks=9, norm_layer=nn.BatchNorm2d, padding_type='reflect'): assert(n_blocks >= 0) - super(GlobalGenerator, self).__init__() - activation = nn.ReLU(True) + super(GlobalGenerator, self).__init__() + activation = nn.ReLU(True) model = [nn.ReflectionPad2d(3), nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0), norm_layer(ngf), activation] ### downsample @@ -199,18 +199,18 @@ def __init__(self, input_nc, output_nc, ngf=64, n_downsampling=3, n_blocks=9, no mult = 2**n_downsampling for i in range(n_blocks): model += [ResnetBlock(ngf * mult, padding_type=padding_type, activation=activation, norm_layer=norm_layer)] - - ### upsample + + ### upsample for i in range(n_downsampling): mult = 2**(n_downsampling - i) model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), kernel_size=3, stride=2, padding=1, output_padding=1), norm_layer(int(ngf * mult / 2)), activation] - model += [nn.ReflectionPad2d(3), nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0), nn.Tanh()] + model += [nn.ReflectionPad2d(3), nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0), nn.Tanh()] self.model = nn.Sequential(*model) - + def forward(self, input): - return self.model(input) - + return self.model(input) + # Define a resnet block class ResnetBlock(nn.Module): def __init__(self, dim, padding_type, norm_layer, activation=nn.ReLU(True), use_dropout=False): @@ -255,54 +255,54 @@ def forward(self, x): class Encoder(nn.Module): def __init__(self, input_nc, output_nc, ngf=32, n_downsampling=4, norm_layer=nn.BatchNorm2d): - super(Encoder, self).__init__() - self.output_nc = output_nc + super(Encoder, self).__init__() + self.output_nc = output_nc - model = [nn.ReflectionPad2d(3), nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0), - norm_layer(ngf), nn.ReLU(True)] + model = [nn.ReflectionPad2d(3), nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0), + norm_layer(ngf), nn.ReLU(True)] ### downsample for i in range(n_downsampling): mult = 2**i model += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1), norm_layer(ngf * mult * 2), nn.ReLU(True)] - ### upsample + ### upsample for i in range(n_downsampling): mult = 2**(n_downsampling - i) model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), kernel_size=3, stride=2, padding=1, output_padding=1), - norm_layer(int(ngf * mult / 2)), nn.ReLU(True)] + norm_layer(int(ngf * mult / 2)), nn.ReLU(True)] model += [nn.ReflectionPad2d(3), nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0), nn.Tanh()] - self.model = nn.Sequential(*model) + self.model = nn.Sequential(*model) def forward(self, input, inst): outputs = self.model(input) # instance-wise average pooling outputs_mean = outputs.clone() - inst_list = np.unique(inst.cpu().numpy().astype(int)) + inst_list = np.unique(inst.cpu().numpy().astype(int)) for i in inst_list: for b in range(input.size()[0]): - indices = (inst[b:b+1] == int(i)).nonzero() # n x 4 + indices = (inst[b:b+1] == int(i)).nonzero() # n x 4 for j in range(self.output_nc): - output_ins = outputs[indices[:,0] + b, indices[:,1] + j, indices[:,2], indices[:,3]] - mean_feat = torch.mean(output_ins).expand_as(output_ins) - outputs_mean[indices[:,0] + b, indices[:,1] + j, indices[:,2], indices[:,3]] = mean_feat + output_ins = outputs[indices[:,0] + b, indices[:,1] + j, indices[:,2], indices[:,3]] + mean_feat = torch.mean(output_ins).expand_as(output_ins) + outputs_mean[indices[:,0] + b, indices[:,1] + j, indices[:,2], indices[:,3]] = mean_feat return outputs_mean class MultiscaleDiscriminator(nn.Module): - def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d, + def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d, use_sigmoid=False, num_D=3, getIntermFeat=False): super(MultiscaleDiscriminator, self).__init__() self.num_D = num_D self.n_layers = n_layers self.getIntermFeat = getIntermFeat - + for i in range(num_D): netD = NLayerDiscriminator(input_nc, ndf, n_layers, norm_layer, use_sigmoid, getIntermFeat) - if getIntermFeat: + if getIntermFeat: for j in range(n_layers+2): - setattr(self, 'scale'+str(i)+'_layer'+str(j), getattr(netD, 'model'+str(j))) + setattr(self, 'scale'+str(i)+'_layer'+str(j), getattr(netD, 'model'+str(j))) else: setattr(self, 'layer'+str(i), netD.model) @@ -317,7 +317,7 @@ def singleD_forward(self, model, input): else: return [model(input)] - def forward(self, input): + def forward(self, input): num_D = self.num_D result = [] input_downsampled = input @@ -330,7 +330,7 @@ def forward(self, input): if i != (num_D-1): input_downsampled = self.downsample(input_downsampled) return result - + # Defines the PatchGAN discriminator with the specified arguments. class NLayerDiscriminator(nn.Module): def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d, use_sigmoid=False, getIntermFeat=False): @@ -381,7 +381,7 @@ def forward(self, input): res.append(model(res[-1])) return res[1:] else: - return self.model(input) + return self.model(input) class Vgg19(torch.nn.Module): def __init__(self, requires_grad=False): @@ -408,9 +408,9 @@ def __init__(self, requires_grad=False): def forward(self, X): h_relu1 = self.slice1(X) - h_relu2 = self.slice2(h_relu1) - h_relu3 = self.slice3(h_relu2) - h_relu4 = self.slice4(h_relu3) - h_relu5 = self.slice5(h_relu4) + h_relu2 = self.slice2(h_relu1) + h_relu3 = self.slice3(h_relu2) + h_relu4 = self.slice4(h_relu3) + h_relu5 = self.slice5(h_relu4) out = [h_relu1, h_relu2, h_relu3, h_relu4, h_relu5] return out diff --git a/pix2pixhd/models/pix2pixHD_model.py b/pix2pixhd/models/pix2pixHD_model.py index af5fef52..d990a213 100755 --- a/pix2pixhd/models/pix2pixHD_model.py +++ b/pix2pixhd/models/pix2pixHD_model.py @@ -8,13 +8,13 @@ class Pix2PixHDModel(BaseModel): def name(self): return 'Pix2PixHDModel' - + def init_loss_filter(self, use_gan_feat_loss, use_vgg_loss): flags = (True, use_gan_feat_loss, use_vgg_loss, True, True) def loss_filter(g_gan, g_gan_feat, g_vgg, d_real, d_fake): return [l for (l,f) in zip((g_gan,g_gan_feat,g_vgg,d_real,d_fake),flags) if f] return loss_filter - + def initialize(self, opt): BaseModel.initialize(self, opt) if opt.resize_or_crop != 'none' or not opt.isTrain: # when training at full res this causes OOM @@ -24,16 +24,16 @@ def initialize(self, opt): self.gen_features = self.use_features and not self.opt.load_features input_nc = opt.label_nc if opt.label_nc != 0 else opt.input_nc - ##### define networks + ##### define networks # Generator network - netG_input_nc = input_nc + netG_input_nc = input_nc if not opt.no_instance: netG_input_nc += 1 if self.use_features: - netG_input_nc += opt.feat_num - self.netG = networks.define_G(netG_input_nc, opt.output_nc, opt.ngf, opt.netG, - opt.n_downsample_global, opt.n_blocks_global, opt.n_local_enhancers, - opt.n_blocks_local, opt.norm, gpu_ids=self.gpu_ids) + netG_input_nc += opt.feat_num + self.netG = networks.define_G(netG_input_nc, opt.output_nc, opt.ngf, opt.netG, + opt.n_downsample_global, opt.n_blocks_global, opt.n_local_enhancers, + opt.n_blocks_local, opt.norm, gpu_ids=self.gpu_ids) # Discriminator network if self.isTrain: @@ -41,24 +41,24 @@ def initialize(self, opt): netD_input_nc = input_nc + opt.output_nc if not opt.no_instance: netD_input_nc += 1 - self.netD = networks.define_D(netD_input_nc, opt.ndf, opt.n_layers_D, opt.norm, use_sigmoid, + self.netD = networks.define_D(netD_input_nc, opt.ndf, opt.n_layers_D, opt.norm, use_sigmoid, opt.num_D, not opt.no_ganFeat_loss, gpu_ids=self.gpu_ids) ### Encoder network - if self.gen_features: - self.netE = networks.define_G(opt.output_nc, opt.feat_num, opt.nef, 'encoder', - opt.n_downsample_E, norm=opt.norm, gpu_ids=self.gpu_ids) + if self.gen_features: + self.netE = networks.define_G(opt.output_nc, opt.feat_num, opt.nef, 'encoder', + opt.n_downsample_E, norm=opt.norm, gpu_ids=self.gpu_ids) if self.opt.verbose: print('---------- Networks initialized -------------') # load networks if not self.isTrain or opt.continue_train or opt.load_pretrain: pretrained_path = '' if not self.isTrain else opt.load_pretrain - self.load_network(self.netG, 'G', opt.which_epoch, pretrained_path) + self.load_network(self.netG, 'G', opt.which_epoch, pretrained_path) if self.isTrain: - self.load_network(self.netD, 'D', opt.which_epoch, pretrained_path) + self.load_network(self.netD, 'D', opt.which_epoch, pretrained_path) if self.gen_features: - self.load_network(self.netE, 'E', opt.which_epoch, pretrained_path) + self.load_network(self.netE, 'E', opt.which_epoch, pretrained_path) # set loss functions and optimizers if self.isTrain: @@ -69,19 +69,19 @@ def initialize(self, opt): # define loss functions self.loss_filter = self.init_loss_filter(not opt.no_ganFeat_loss, not opt.no_vgg_loss) - - self.criterionGAN = networks.GANLoss(use_lsgan=not opt.no_lsgan, tensor=self.Tensor) + + self.criterionGAN = networks.GANLoss(use_lsgan=not opt.no_lsgan, tensor=self.Tensor) self.criterionFeat = torch.nn.L1Loss() - if not opt.no_vgg_loss: + if not opt.no_vgg_loss: self.criterionVGG = networks.VGGLoss(self.gpu_ids) - - + + # Names so we can breakout loss self.loss_names = self.loss_filter('G_GAN','G_GAN_Feat','G_VGG','D_real', 'D_fake') # initialize optimizers # optimizer G - if opt.niter_fix_global > 0: + if opt.niter_fix_global > 0: import sys if sys.version_info >= (3,0): finetune_list = set() @@ -91,20 +91,20 @@ def initialize(self, opt): params_dict = dict(self.netG.named_parameters()) params = [] - for key, value in params_dict.items(): - if key.startswith('model' + str(opt.n_local_enhancers)): + for key, value in params_dict.items(): + if key.startswith('model' + str(opt.n_local_enhancers)): params += [value] - finetune_list.add(key.split('.')[0]) + finetune_list.add(key.split('.')[0]) print('------------- Only training the local enhancer network (for %d epochs) ------------' % opt.niter_fix_global) - print('The layers that are finetuned are ', sorted(finetune_list)) + print('The layers that are finetuned are ', sorted(finetune_list)) else: params = list(self.netG.parameters()) - if self.gen_features: - params += list(self.netE.parameters()) - self.optimizer_G = torch.optim.Adam(params, lr=opt.lr, betas=(opt.beta1, 0.999)) + if self.gen_features: + params += list(self.netE.parameters()) + self.optimizer_G = torch.optim.Adam(params, lr=opt.lr, betas=(opt.beta1, 0.999)) - # optimizer D - params = list(self.netD.parameters()) + # optimizer D + params = list(self.netD.parameters()) self.optimizer_D = torch.optim.Adam(params, lr=opt.lr, betas=(opt.beta1, 0.999)) def encode_input(self, label_map, inst_map=None, real_image=None, feat_map=None): @@ -145,7 +145,7 @@ def encode_input(self, label_map, inst_map=None, real_image=None, feat_map=None) def discriminate(self, input_label, test_image, use_pool=False): input_concat = torch.cat((input_label, test_image.detach()), dim=1) - if use_pool: + if use_pool: fake_query = self.fake_pool.query(input_concat) return self.netD.forward(fake_query) else: @@ -153,29 +153,29 @@ def discriminate(self, input_label, test_image, use_pool=False): def forward(self, label, inst, image, feat, infer=False): # Encode Inputs - input_label, inst_map, real_image, feat_map = self.encode_input(label, inst, image, feat) + input_label, inst_map, real_image, feat_map = self.encode_input(label, inst, image, feat) # Fake Generation if self.use_features: if not self.opt.load_features: - feat_map = self.netE.forward(real_image, inst_map) - input_concat = torch.cat((input_label, feat_map), dim=1) + feat_map = self.netE.forward(real_image, inst_map) + input_concat = torch.cat((input_label, feat_map), dim=1) else: input_concat = input_label fake_image = self.netG.forward(input_concat) # Fake Detection and Loss pred_fake_pool = self.discriminate(input_label, fake_image, use_pool=True) - loss_D_fake = self.criterionGAN(pred_fake_pool, False) + loss_D_fake = self.criterionGAN(pred_fake_pool, False) - # Real Detection and Loss + # Real Detection and Loss pred_real = self.discriminate(input_label, real_image) loss_D_real = self.criterionGAN(pred_real, True) - # GAN loss (Fake Passability Loss) - pred_fake = self.netD.forward(torch.cat((input_label, fake_image), dim=1)) - loss_G_GAN = self.criterionGAN(pred_fake, True) - + # GAN loss (Fake Passability Loss) + pred_fake = self.netD.forward(torch.cat((input_label, fake_image), dim=1)) + loss_G_GAN = self.criterionGAN(pred_fake, True) + # GAN feature matching loss loss_G_GAN_Feat = 0 if not self.opt.no_ganFeat_loss: @@ -185,17 +185,17 @@ def forward(self, label, inst, image, feat, infer=False): for j in range(len(pred_fake[i])-1): loss_G_GAN_Feat += D_weights * feat_weights * \ self.criterionFeat(pred_fake[i][j], pred_real[i][j].detach()) * self.opt.lambda_feat - + # VGG feature matching loss loss_G_VGG = 0 if not self.opt.no_vgg_loss: loss_G_VGG = self.criterionVGG(fake_image, real_image) * self.opt.lambda_feat - + # Only return the fake_B image if necessary to save BW return [ self.loss_filter( loss_G_GAN, loss_G_GAN_Feat, loss_G_VGG, loss_D_real, loss_D_fake ), None if not infer else fake_image ] def inference(self, label, inst, image=None): - # Encode Inputs + # Encode Inputs image = image if image is not None else None input_label, inst_map, real_image, _ = self.encode_input(label, inst, image) @@ -205,32 +205,32 @@ def inference(self, label, inst, image=None): # encode the real image to get feature map feat_map = self.netE.forward(real_image, inst_map) else: - # sample clusters from precomputed features + # sample clusters from precomputed features feat_map = self.sample_features(inst_map) - input_concat = torch.cat((input_label, feat_map), dim=1) + input_concat = torch.cat((input_label, feat_map), dim=1) else: - input_concat = input_label - + input_concat = input_label + with torch.no_grad(): fake_image = self.netG.forward(input_concat) return fake_image - def sample_features(self, inst): - # read precomputed feature clusters - cluster_path = os.path.join(self.opt.checkpoints_dir, self.opt.name, self.opt.cluster_path) + def sample_features(self, inst): + # read precomputed feature clusters + cluster_path = os.path.join(self.opt.checkpoints_dir, self.opt.name, self.opt.cluster_path) features_clustered = np.load(cluster_path, encoding='latin1').item() # randomly sample from the feature clusters - inst_np = inst.cpu().numpy().astype(int) + inst_np = inst.cpu().numpy().astype(int) feat_map = self.Tensor(inst.size()[0], self.opt.feat_num, inst.size()[2], inst.size()[3]) - for i in np.unique(inst_np): + for i in np.unique(inst_np): label = i if i < 1000 else i//1000 if label in features_clustered: feat = features_clustered[label] - cluster_idx = np.random.randint(0, feat.shape[0]) - + cluster_idx = np.random.randint(0, feat.shape[0]) + idx = (inst == int(i)).nonzero() - for k in range(self.opt.feat_num): + for k in range(self.opt.feat_num): feat_map[idx[:,0], idx[:,1] + k, idx[:,2], idx[:,3]] = feat[cluster_idx, k] if self.opt.data_type==16: feat_map = feat_map.half() @@ -254,7 +254,7 @@ def encode_features(self, image, inst): idx = (inst == int(i)).nonzero() num = idx.size()[0] idx = idx[num//2,:] - val = np.zeros((1, feat_num+1)) + val = np.zeros((1, feat_num+1)) for k in range(feat_num): val[0, k] = feat_map[idx[0], idx[1] + k, idx[2], idx[3]].item() val[0, feat_num] = float(num) / (h * w // block_num) @@ -285,14 +285,14 @@ def update_fixed_params(self): # after fixing the global generator for a number of iterations, also start finetuning it params = list(self.netG.parameters()) if self.gen_features: - params += list(self.netE.parameters()) + params += list(self.netE.parameters()) self.optimizer_G = torch.optim.Adam(params, lr=self.opt.lr, betas=(self.opt.beta1, 0.999)) if self.opt.verbose: print('------------ Now also finetuning global generator -----------') def update_learning_rate(self): lrd = self.opt.lr / self.opt.niter_decay - lr = self.old_lr - lrd + lr = self.old_lr - lrd for param_group in self.optimizer_D.param_groups: param_group['lr'] = lr for param_group in self.optimizer_G.param_groups: @@ -306,4 +306,4 @@ def forward(self, inp): label, inst = inp return self.inference(label, inst) - + diff --git a/pix2pixhd/models/ui_model.py b/pix2pixhd/models/ui_model.py index 1a5537cb..59f4be67 100755 --- a/pix2pixhd/models/ui_model.py +++ b/pix2pixhd/models/ui_model.py @@ -18,37 +18,37 @@ def initialize(self, opt): netG_input_nc = opt.label_nc if not opt.no_instance: - netG_input_nc += 1 - if self.use_features: - netG_input_nc += opt.feat_num + netG_input_nc += 1 + if self.use_features: + netG_input_nc += opt.feat_num - self.netG = networks.define_G(netG_input_nc, opt.output_nc, opt.ngf, opt.netG, - opt.n_downsample_global, opt.n_blocks_global, opt.n_local_enhancers, - opt.n_blocks_local, opt.norm, gpu_ids=self.gpu_ids) + self.netG = networks.define_G(netG_input_nc, opt.output_nc, opt.ngf, opt.netG, + opt.n_downsample_global, opt.n_blocks_global, opt.n_local_enhancers, + opt.n_blocks_local, opt.norm, gpu_ids=self.gpu_ids) self.load_network(self.netG, 'G', opt.which_epoch) print('---------- Networks initialized -------------') def toTensor(self, img, normalize=False): tensor = torch.from_numpy(np.array(img, np.int32, copy=False)) - tensor = tensor.view(1, img.size[1], img.size[0], len(img.mode)) + tensor = tensor.view(1, img.size[1], img.size[0], len(img.mode)) tensor = tensor.transpose(1, 2).transpose(1, 3).contiguous() if normalize: - return (tensor.float()/255.0 - 0.5) / 0.5 + return (tensor.float()/255.0 - 0.5) / 0.5 return tensor.float() def load_image(self, label_path, inst_path, feat_path): opt = self.opt # read label map - label_img = Image.open(label_path) + label_img = Image.open(label_path) if label_path.find('face') != -1: label_img = label_img.convert('L') - ow, oh = label_img.size + ow, oh = label_img.size w = opt.loadSize - h = int(w * oh / ow) + h = int(w * oh / ow) label_img = label_img.resize((w, h), Image.NEAREST) - label_map = self.toTensor(label_img) - + label_map = self.toTensor(label_img) + # onehot vector input for label map self.label_map = label_map.cuda() oneHot_size = (1, opt.label_nc, h, w) @@ -57,52 +57,52 @@ def load_image(self, label_path, inst_path, feat_path): # read instance map if not opt.no_instance: - inst_img = Image.open(inst_path) - inst_img = inst_img.resize((w, h), Image.NEAREST) + inst_img = Image.open(inst_path) + inst_img = inst_img.resize((w, h), Image.NEAREST) self.inst_map = self.toTensor(inst_img).cuda() - self.edge_map = self.get_edges(self.inst_map) + self.edge_map = self.get_edges(self.inst_map) self.net_input = Variable(torch.cat((self.input_label, self.edge_map), dim=1), volatile=True) else: - self.net_input = Variable(self.input_label, volatile=True) - + self.net_input = Variable(self.input_label, volatile=True) + self.features_clustered = np.load(feat_path).item() - self.object_map = self.inst_map if opt.instance_feat else self.label_map - - object_np = self.object_map.cpu().numpy().astype(int) - self.feat_map = self.Tensor(1, opt.feat_num, h, w).zero_() + self.object_map = self.inst_map if opt.instance_feat else self.label_map + + object_np = self.object_map.cpu().numpy().astype(int) + self.feat_map = self.Tensor(1, opt.feat_num, h, w).zero_() self.cluster_indices = np.zeros(self.opt.label_nc, np.uint8) - for i in np.unique(object_np): + for i in np.unique(object_np): label = i if i < 1000 else i//1000 if label in self.features_clustered: feat = self.features_clustered[label] np.random.seed(i+1) cluster_idx = np.random.randint(0, feat.shape[0]) self.cluster_indices[label] = cluster_idx - idx = (self.object_map == i).nonzero() + idx = (self.object_map == i).nonzero() self.set_features(idx, feat, cluster_idx) - self.net_input_original = self.net_input.clone() + self.net_input_original = self.net_input.clone() self.label_map_original = self.label_map.clone() self.feat_map_original = self.feat_map.clone() if not opt.no_instance: - self.inst_map_original = self.inst_map.clone() + self.inst_map_original = self.inst_map.clone() def reset(self): - self.net_input = self.net_input_prev = self.net_input_original.clone() + self.net_input = self.net_input_prev = self.net_input_original.clone() self.label_map = self.label_map_prev = self.label_map_original.clone() self.feat_map = self.feat_map_prev = self.feat_map_original.clone() if not self.opt.no_instance: self.inst_map = self.inst_map_prev = self.inst_map_original.clone() - self.object_map = self.inst_map if self.opt.instance_feat else self.label_map + self.object_map = self.inst_map if self.opt.instance_feat else self.label_map - def undo(self): + def undo(self): self.net_input = self.net_input_prev self.label_map = self.label_map_prev self.feat_map = self.feat_map_prev if not self.opt.no_instance: self.inst_map = self.inst_map_prev - self.object_map = self.inst_map if self.opt.instance_feat else self.label_map - + self.object_map = self.inst_map if self.opt.instance_feat else self.label_map + # get boundary map from instance map def get_edges(self, t): edge = torch.cuda.ByteTensor(t.size()).zero_() @@ -113,7 +113,7 @@ def get_edges(self, t): return edge.float() # change the label at the source position to the label at the target position - def change_labels(self, click_src, click_tgt): + def change_labels(self, click_src, click_tgt): y_src, x_src = click_src[0], click_src[1] y_tgt, x_tgt = click_tgt[0], click_tgt[1] label_src = int(self.label_map[0, 0, y_src, x_src]) @@ -121,17 +121,17 @@ def change_labels(self, click_src, click_tgt): label_tgt = int(self.label_map[0, 0, y_tgt, x_tgt]) inst_tgt = self.inst_map[0, 0, y_tgt, x_tgt] - idx_src = (self.inst_map == inst_src).nonzero() + idx_src = (self.inst_map == inst_src).nonzero() # need to change 3 things: label map, instance map, and feature map if idx_src.shape: # backup current maps - self.backup_current_state() + self.backup_current_state() # change both the label map and the network input self.label_map[idx_src[:,0], idx_src[:,1], idx_src[:,2], idx_src[:,3]] = label_tgt self.net_input[idx_src[:,0], idx_src[:,1] + label_src, idx_src[:,2], idx_src[:,3]] = 0 - self.net_input[idx_src[:,0], idx_src[:,1] + label_tgt, idx_src[:,2], idx_src[:,3]] = 1 - + self.net_input[idx_src[:,0], idx_src[:,1] + label_tgt, idx_src[:,2], idx_src[:,3]] = 1 + # update the instance map (and the network input) if inst_tgt > 1000: # if different instances have different ids, give the new object a new id @@ -140,8 +140,8 @@ def change_labels(self, click_src, click_tgt): self.inst_map[idx_src[:,0], idx_src[:,1], idx_src[:,2], idx_src[:,3]] = inst_tgt self.net_input[:,-1,:,:] = self.get_edges(self.inst_map) - # also copy the source features to the target position - idx_tgt = (self.inst_map == inst_tgt).nonzero() + # also copy the source features to the target position + idx_tgt = (self.inst_map == inst_tgt).nonzero() if idx_tgt.shape: self.copy_features(idx_src, idx_tgt[0,:]) @@ -149,7 +149,7 @@ def change_labels(self, click_src, click_tgt): # add strokes of target label in the image def add_strokes(self, click_src, label_tgt, bw, save): - # get the region of the new strokes (bw is the brush width) + # get the region of the new strokes (bw is the brush width) size = self.net_input.size() h, w = size[2], size[3] idx_src = torch.LongTensor(bw**2, 4).fill_(0) @@ -158,38 +158,38 @@ def add_strokes(self, click_src, label_tgt, bw, save): for j in range(bw): idx_src[i*bw+j, 3] = min(w-1, max(0, click_src[1]-bw//2 + j)) idx_src = idx_src.cuda() - + # again, need to update 3 things if idx_src.shape: # backup current maps if save: self.backup_current_state() - # update the label map (and the network input) in the stroke region + # update the label map (and the network input) in the stroke region self.label_map[idx_src[:,0], idx_src[:,1], idx_src[:,2], idx_src[:,3]] = label_tgt for k in range(self.opt.label_nc): self.net_input[idx_src[:,0], idx_src[:,1] + k, idx_src[:,2], idx_src[:,3]] = 0 - self.net_input[idx_src[:,0], idx_src[:,1] + label_tgt, idx_src[:,2], idx_src[:,3]] = 1 + self.net_input[idx_src[:,0], idx_src[:,1] + label_tgt, idx_src[:,2], idx_src[:,3]] = 1 # update the instance map (and the network input) self.inst_map[idx_src[:,0], idx_src[:,1], idx_src[:,2], idx_src[:,3]] = label_tgt self.net_input[:,-1,:,:] = self.get_edges(self.inst_map) - + # also update the features if available - if self.opt.instance_feat: + if self.opt.instance_feat: feat = self.features_clustered[label_tgt] - #np.random.seed(label_tgt+1) + #np.random.seed(label_tgt+1) #cluster_idx = np.random.randint(0, feat.shape[0]) cluster_idx = self.cluster_indices[label_tgt] - self.set_features(idx_src, feat, cluster_idx) - + self.set_features(idx_src, feat, cluster_idx) + self.fake_image = util.tensor2im(self.single_forward(self.net_input, self.feat_map)) # add an object to the clicked position with selected style def add_objects(self, click_src, label_tgt, mask, style_id=0): y, x = click_src[0], click_src[1] - mask = np.transpose(mask, (2, 0, 1))[np.newaxis,...] - idx_src = torch.from_numpy(mask).cuda().nonzero() + mask = np.transpose(mask, (2, 0, 1))[np.newaxis,...] + idx_src = torch.from_numpy(mask).cuda().nonzero() idx_src[:,2] += y idx_src[:,3] += x @@ -197,18 +197,18 @@ def add_objects(self, click_src, label_tgt, mask, style_id=0): self.backup_current_state() # update label map - self.label_map[idx_src[:,0], idx_src[:,1], idx_src[:,2], idx_src[:,3]] = label_tgt + self.label_map[idx_src[:,0], idx_src[:,1], idx_src[:,2], idx_src[:,3]] = label_tgt for k in range(self.opt.label_nc): self.net_input[idx_src[:,0], idx_src[:,1] + k, idx_src[:,2], idx_src[:,3]] = 0 - self.net_input[idx_src[:,0], idx_src[:,1] + label_tgt, idx_src[:,2], idx_src[:,3]] = 1 + self.net_input[idx_src[:,0], idx_src[:,1] + label_tgt, idx_src[:,2], idx_src[:,3]] = 1 # update instance map self.inst_map[idx_src[:,0], idx_src[:,1], idx_src[:,2], idx_src[:,3]] = label_tgt self.net_input[:,-1,:,:] = self.get_edges(self.inst_map) - + # update feature map - self.set_features(idx_src, self.feat, style_id) - + self.set_features(idx_src, self.feat, style_id) + self.fake_image = util.tensor2im(self.single_forward(self.net_input, self.feat_map)) def single_forward(self, net_input, feat_map): @@ -216,26 +216,26 @@ def single_forward(self, net_input, feat_map): fake_image = self.netG.forward(net_input) if fake_image.size()[0] == 1: - return fake_image.data[0] + return fake_image.data[0] return fake_image.data # generate all outputs for different styles - def style_forward(self, click_pt, style_id=-1): - if click_pt is None: + def style_forward(self, click_pt, style_id=-1): + if click_pt is None: self.fake_image = util.tensor2im(self.single_forward(self.net_input, self.feat_map)) self.crop = None - self.mask = None - else: + self.mask = None + else: instToChange = int(self.object_map[0, 0, click_pt[0], click_pt[1]]) self.instToChange = instToChange - label = instToChange if instToChange < 1000 else instToChange//1000 + label = instToChange if instToChange < 1000 else instToChange//1000 self.feat = self.features_clustered[label] self.fake_image = [] self.mask = self.object_map == instToChange idx = self.mask.nonzero() - self.get_crop_region(idx) - if idx.size(): + self.get_crop_region(idx) + if idx.size(): if style_id == -1: (min_y, min_x, max_y, max_x) = self.crop ### original @@ -243,30 +243,30 @@ def style_forward(self, click_pt, style_id=-1): self.set_features(idx, self.feat, cluster_idx) fake_image = self.single_forward(self.net_input, self.feat_map) fake_image = util.tensor2im(fake_image[:,min_y:max_y,min_x:max_x]) - self.fake_image.append(fake_image) + self.fake_image.append(fake_image) """### To speed up previewing different style results, either crop or downsample the label maps if instToChange > 1000: - (min_y, min_x, max_y, max_x) = self.crop - ### crop + (min_y, min_x, max_y, max_x) = self.crop + ### crop _, _, h, w = self.net_input.size() offset = 512 y_start, x_start = max(0, min_y-offset), max(0, min_x-offset) y_end, x_end = min(h, (max_y + offset)), min(w, (max_x + offset)) y_region = slice(y_start, y_start+(y_end-y_start)//16*16) x_region = slice(x_start, x_start+(x_end-x_start)//16*16) - net_input = self.net_input[:,:,y_region,x_region] - for cluster_idx in range(self.opt.multiple_output): + net_input = self.net_input[:,:,y_region,x_region] + for cluster_idx in range(self.opt.multiple_output): self.set_features(idx, self.feat, cluster_idx) - fake_image = self.single_forward(net_input, self.feat_map[:,:,y_region,x_region]) + fake_image = self.single_forward(net_input, self.feat_map[:,:,y_region,x_region]) fake_image = util.tensor2im(fake_image[:,min_y-y_start:max_y-y_start,min_x-x_start:max_x-x_start]) self.fake_image.append(fake_image) else: ### downsample - (min_y, min_x, max_y, max_x) = [crop//2 for crop in self.crop] - net_input = self.net_input[:,:,::2,::2] + (min_y, min_x, max_y, max_x) = [crop//2 for crop in self.crop] + net_input = self.net_input[:,:,::2,::2] size = net_input.size() - net_input_batch = net_input.expand(self.opt.multiple_output, size[1], size[2], size[3]) - for cluster_idx in range(self.opt.multiple_output): + net_input_batch = net_input.expand(self.opt.multiple_output, size[1], size[2], size[3]) + for cluster_idx in range(self.opt.multiple_output): self.set_features(idx, self.feat, cluster_idx) feat_map = self.feat_map[:,:,::2,::2] if cluster_idx == 0: @@ -276,24 +276,24 @@ def style_forward(self, click_pt, style_id=-1): fake_image_batch = self.single_forward(net_input_batch, feat_map_batch) for i in range(self.opt.multiple_output): self.fake_image.append(util.tensor2im(fake_image_batch[i,:,min_y:max_y,min_x:max_x]))""" - + else: self.set_features(idx, self.feat, style_id) self.cluster_indices[label] = style_id - self.fake_image = util.tensor2im(self.single_forward(self.net_input, self.feat_map)) + self.fake_image = util.tensor2im(self.single_forward(self.net_input, self.feat_map)) def backup_current_state(self): self.net_input_prev = self.net_input.clone() - self.label_map_prev = self.label_map.clone() - self.inst_map_prev = self.inst_map.clone() - self.feat_map_prev = self.feat_map.clone() + self.label_map_prev = self.label_map.clone() + self.inst_map_prev = self.inst_map.clone() + self.feat_map_prev = self.feat_map.clone() # crop the ROI and get the mask of the object def get_crop_region(self, idx): size = self.net_input.size() h, w = size[2], size[3] min_y, min_x = idx[:,2].min(), idx[:,3].min() - max_y, max_x = idx[:,2].max(), idx[:,3].max() + max_y, max_x = idx[:,2].max(), idx[:,3].max() crop_min = 128 if max_y - min_y < crop_min: min_y = max(0, (max_y + min_y) // 2 - crop_min // 2) @@ -301,46 +301,46 @@ def get_crop_region(self, idx): if max_x - min_x < crop_min: min_x = max(0, (max_x + min_x) // 2 - crop_min // 2) max_x = min(w-1, min_x + crop_min) - self.crop = (min_y, min_x, max_y, max_x) + self.crop = (min_y, min_x, max_y, max_x) self.mask = self.mask[:,:, min_y:max_y, min_x:max_x] # update the feature map once a new object is added or the label is changed - def update_features(self, cluster_idx, mask=None, click_pt=None): + def update_features(self, cluster_idx, mask=None, click_pt=None): self.feat_map_prev = self.feat_map.clone() # adding a new object if mask is not None: y, x = click_pt[0], click_pt[1] - mask = np.transpose(mask, (2,0,1))[np.newaxis,...] - idx = torch.from_numpy(mask).cuda().nonzero() + mask = np.transpose(mask, (2,0,1))[np.newaxis,...] + idx = torch.from_numpy(mask).cuda().nonzero() idx[:,2] += y - idx[:,3] += x - # changing the label of an existing object - else: - idx = (self.object_map == self.instToChange).nonzero() + idx[:,3] += x + # changing the label of an existing object + else: + idx = (self.object_map == self.instToChange).nonzero() # update feature map - self.set_features(idx, self.feat, cluster_idx) + self.set_features(idx, self.feat, cluster_idx) # set the class features to the target feature - def set_features(self, idx, feat, cluster_idx): + def set_features(self, idx, feat, cluster_idx): for k in range(self.opt.feat_num): - self.feat_map[idx[:,0], idx[:,1] + k, idx[:,2], idx[:,3]] = feat[cluster_idx, k] + self.feat_map[idx[:,0], idx[:,1] + k, idx[:,2], idx[:,3]] = feat[cluster_idx, k] # copy the features at the target position to the source position - def copy_features(self, idx_src, idx_tgt): + def copy_features(self, idx_src, idx_tgt): for k in range(self.opt.feat_num): val = self.feat_map[idx_tgt[0], idx_tgt[1] + k, idx_tgt[2], idx_tgt[3]] - self.feat_map[idx_src[:,0], idx_src[:,1] + k, idx_src[:,2], idx_src[:,3]] = val + self.feat_map[idx_src[:,0], idx_src[:,1] + k, idx_src[:,2], idx_src[:,3]] = val - def get_current_visuals(self, getLabel=False): - mask = self.mask + def get_current_visuals(self, getLabel=False): + mask = self.mask if self.mask is not None: - mask = np.transpose(self.mask[0].cpu().float().numpy(), (1,2,0)).astype(np.uint8) + mask = np.transpose(self.mask[0].cpu().float().numpy(), (1,2,0)).astype(np.uint8) dict_list = [('fake_image', self.fake_image), ('mask', mask)] if getLabel: # only output label map if needed to save bandwidth - label = util.tensor2label(self.net_input.data[0], self.opt.label_nc) + label = util.tensor2label(self.net_input.data[0], self.opt.label_nc) dict_list += [('label', label)] return OrderedDict(dict_list) diff --git a/pix2pixhd/options/base_options.py b/pix2pixhd/options/base_options.py index b0316792..e3f4bed3 100755 --- a/pix2pixhd/options/base_options.py +++ b/pix2pixhd/options/base_options.py @@ -8,20 +8,20 @@ def __init__(self): self.parser = argparse.ArgumentParser() self.initialized = False - def initialize(self): + def initialize(self): # experiment specifics - self.parser.add_argument('--name', type=str, default='label2city', help='name of the experiment. It decides where to store samples and models') + self.parser.add_argument('--name', type=str, default='label2city', help='name of the experiment. It decides where to store samples and models') self.parser.add_argument('--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU') self.parser.add_argument('--checkpoints_dir', type=str, default='./checkpoints', help='models are saved here') self.parser.add_argument('--model', type=str, default='pix2pixHD', help='which model to use') - self.parser.add_argument('--norm', type=str, default='instance', help='instance normalization or batch normalization') + self.parser.add_argument('--norm', type=str, default='instance', help='instance normalization or batch normalization') self.parser.add_argument('--use_dropout', action='store_true', help='use dropout for the generator') self.parser.add_argument('--data_type', default=32, type=int, choices=[8, 16, 32], help="Supported data type i.e. 8, 16, 32 bit") self.parser.add_argument('--verbose', action='store_true', default=False, help='toggles verbose') self.parser.add_argument('--fp16', action='store_true', default=False, help='train with AMP') self.parser.add_argument('--local_rank', type=int, default=0, help='local rank for distributed training') - # input/output sizes + # input/output sizes self.parser.add_argument('--batchSize', type=int, default=1, help='input batch size') self.parser.add_argument('--loadSize', type=int, default=1024, help='scale images to this size') self.parser.add_argument('--fineSize', type=int, default=512, help='then crop to this size') @@ -30,11 +30,11 @@ def initialize(self): self.parser.add_argument('--output_nc', type=int, default=3, help='# of output image channels') # for setting inputs - self.parser.add_argument('--dataroot', type=str, default='./datasets/cityscapes/') + self.parser.add_argument('--dataroot', type=str, default='./datasets/cityscapes/') self.parser.add_argument('--resize_or_crop', type=str, default='scale_width', help='scaling and cropping of images at load time [resize_and_crop|crop|scale_width|scale_width_and_crop]') - self.parser.add_argument('--serial_batches', action='store_true', help='if true, takes images in order to make batches, otherwise takes them randomly') - self.parser.add_argument('--no_flip', action='store_true', help='if specified, do not flip the images for data argumentation') - self.parser.add_argument('--nThreads', default=2, type=int, help='# threads for loading data') + self.parser.add_argument('--serial_batches', action='store_true', help='if true, takes images in order to make batches, otherwise takes them randomly') + self.parser.add_argument('--no_flip', action='store_true', help='if specified, do not flip the images for data argumentation') + self.parser.add_argument('--nThreads', default=2, type=int, help='# threads for loading data') self.parser.add_argument('--max_dataset_size', type=int, default=float("inf"), help='Maximum number of samples allowed per dataset. If the dataset directory contains more than max_dataset_size, only a subset is loaded.') # for displays @@ -44,21 +44,21 @@ def initialize(self): # for generator self.parser.add_argument('--netG', type=str, default='global', help='selects model to use for netG') self.parser.add_argument('--ngf', type=int, default=64, help='# of gen filters in first conv layer') - self.parser.add_argument('--n_downsample_global', type=int, default=4, help='number of downsampling layers in netG') + self.parser.add_argument('--n_downsample_global', type=int, default=4, help='number of downsampling layers in netG') self.parser.add_argument('--n_blocks_global', type=int, default=9, help='number of residual blocks in the global generator network') self.parser.add_argument('--n_blocks_local', type=int, default=3, help='number of residual blocks in the local enhancer network') - self.parser.add_argument('--n_local_enhancers', type=int, default=1, help='number of local enhancers to use') - self.parser.add_argument('--niter_fix_global', type=int, default=0, help='number of epochs that we only train the outmost local enhancer') + self.parser.add_argument('--n_local_enhancers', type=int, default=1, help='number of local enhancers to use') + self.parser.add_argument('--niter_fix_global', type=int, default=0, help='number of epochs that we only train the outmost local enhancer') # for instance-wise features - self.parser.add_argument('--no_instance', action='store_true', help='if specified, do *not* add instance map as input') + self.parser.add_argument('--no_instance', action='store_true', help='if specified, do *not* add instance map as input') self.parser.add_argument('--instance_feat', action='store_true', help='if specified, add encoded instance features as input') - self.parser.add_argument('--label_feat', action='store_true', help='if specified, add encoded label features as input') - self.parser.add_argument('--feat_num', type=int, default=3, help='vector length for encoded features') + self.parser.add_argument('--label_feat', action='store_true', help='if specified, add encoded label features as input') + self.parser.add_argument('--feat_num', type=int, default=3, help='vector length for encoded features') self.parser.add_argument('--load_features', action='store_true', help='if specified, load precomputed feature maps') - self.parser.add_argument('--n_downsample_E', type=int, default=4, help='# of downsampling layers in encoder') - self.parser.add_argument('--nef', type=int, default=16, help='# of encoder filters in the first conv layer') - self.parser.add_argument('--n_clusters', type=int, default=10, help='number of clusters for features') + self.parser.add_argument('--n_downsample_E', type=int, default=4, help='# of downsampling layers in encoder') + self.parser.add_argument('--nef', type=int, default=16, help='# of encoder filters in the first conv layer') + self.parser.add_argument('--n_clusters', type=int, default=10, help='number of clusters for features') self.initialized = True @@ -74,7 +74,7 @@ def parse(self, args=None, save=True, silent=False): id = int(str_id) if id >= 0: self.opt.gpu_ids.append(id) - + # set gpu ids if len(self.opt.gpu_ids) > 0: torch.cuda.set_device(self.opt.gpu_ids[0]) @@ -87,7 +87,7 @@ def parse(self, args=None, save=True, silent=False): print('%s: %s' % (str(k), str(v))) print('-------------- End ----------------') - # save to the disk + # save to the disk expr_dir = os.path.join(self.opt.checkpoints_dir, self.opt.name) util.mkdirs(expr_dir) if save and not self.opt.continue_train: diff --git a/pix2pixhd/options/test_options.py b/pix2pixhd/options/test_options.py index f27fc5ea..85f18fc9 100755 --- a/pix2pixhd/options/test_options.py +++ b/pix2pixhd/options/test_options.py @@ -8,10 +8,10 @@ def initialize(self): self.parser.add_argument('--aspect_ratio', type=float, default=1.0, help='aspect ratio of result images') self.parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc') self.parser.add_argument('--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model') - self.parser.add_argument('--how_many', type=int, default=50, help='how many test images to run') + self.parser.add_argument('--how_many', type=int, default=50, help='how many test images to run') self.parser.add_argument('--cluster_path', type=str, default='features_clustered_010.npy', help='the path for clustered results of encoded features') self.parser.add_argument('--use_encoded_image', action='store_true', help='if specified, encode the real image to get the feature map') self.parser.add_argument("--export_onnx", type=str, help="export ONNX model to a given file") self.parser.add_argument("--engine", type=str, help="run serialized TRT engine") - self.parser.add_argument("--onnx", type=str, help="run ONNX model via TRT") + self.parser.add_argument("--onnx", type=str, help="run ONNX model via TRT") self.isTrain = False diff --git a/pix2pixhd/options/train_options.py b/pix2pixhd/options/train_options.py index cacb8e7b..1b29e4d9 100755 --- a/pix2pixhd/options/train_options.py +++ b/pix2pixhd/options/train_options.py @@ -7,7 +7,7 @@ def initialize(self): self.parser.add_argument('--display_freq', type=int, default=100, help='frequency of showing training results on screen') self.parser.add_argument('--print_freq', type=int, default=100, help='frequency of showing training results on console') self.parser.add_argument('--save_latest_freq', type=int, default=1000, help='frequency of saving the latest results') - self.parser.add_argument('--save_epoch_freq', type=int, default=10, help='frequency of saving checkpoints at the end of epochs') + self.parser.add_argument('--save_epoch_freq', type=int, default=10, help='frequency of saving checkpoints at the end of epochs') self.parser.add_argument('--no_html', action='store_true', help='do not save intermediate training results to [opt.checkpoints_dir]/[opt.name]/web/') self.parser.add_argument('--debug', action='store_true', help='only do one epoch and displays at each iteration') @@ -21,13 +21,13 @@ def initialize(self): self.parser.add_argument('--beta1', type=float, default=0.5, help='momentum term of adam') self.parser.add_argument('--lr', type=float, default=0.0002, help='initial learning rate for adam') - # for discriminators + # for discriminators self.parser.add_argument('--num_D', type=int, default=2, help='number of discriminators to use') self.parser.add_argument('--n_layers_D', type=int, default=3, help='only used if which_model_netD==n_layers') - self.parser.add_argument('--ndf', type=int, default=64, help='# of discrim filters in first conv layer') - self.parser.add_argument('--lambda_feat', type=float, default=10.0, help='weight for feature matching loss') + self.parser.add_argument('--ndf', type=int, default=64, help='# of discrim filters in first conv layer') + self.parser.add_argument('--lambda_feat', type=float, default=10.0, help='weight for feature matching loss') self.parser.add_argument('--no_ganFeat_loss', action='store_true', help='if specified, do *not* use discriminator feature matching loss') - self.parser.add_argument('--no_vgg_loss', action='store_true', help='if specified, do *not* use VGG feature matching loss') + self.parser.add_argument('--no_vgg_loss', action='store_true', help='if specified, do *not* use VGG feature matching loss') self.parser.add_argument('--no_lsgan', action='store_true', help='do *not* use least square GAN, if false, use vanilla GAN') self.parser.add_argument('--pool_size', type=int, default=0, help='the size of image buffer that stores previously generated images') diff --git a/pix2pixhd/precompute_feature_maps.py b/pix2pixhd/precompute_feature_maps.py index 10205385..335b82da 100755 --- a/pix2pixhd/precompute_feature_maps.py +++ b/pix2pixhd/precompute_feature_maps.py @@ -8,8 +8,8 @@ opt = TrainOptions().parse() opt.nThreads = 1 -opt.batchSize = 1 -opt.serial_batches = True +opt.batchSize = 1 +opt.serial_batches = True opt.no_flip = True opt.instance_feat = True @@ -25,7 +25,7 @@ ######## Save precomputed feature maps for 1024p training ####### for i, data in enumerate(dataset): - print('%d / %d images' % (i+1, dataset_size)) + print('%d / %d images' % (i+1, dataset_size)) feat_map = model.module.netE.forward(Variable(data['image'].cuda(), volatile=True), data['inst'].cuda()) feat_map = nn.Upsample(scale_factor=2, mode='nearest')(feat_map) image_numpy = util.tensor2im(feat_map.data[0]) diff --git a/pix2pixhd/run_engine.py b/pix2pixhd/run_engine.py index bf1c180e..0f4176eb 100644 --- a/pix2pixhd/run_engine.py +++ b/pix2pixhd/run_engine.py @@ -19,7 +19,7 @@ try: import tensorrt as trt from tensorrt.parsers import caffeparser - from tensorrt.parsers import onnxparser + from tensorrt.parsers import onnxparser except ImportError as err: sys.stderr.write("""ERROR: failed to import module ({}) Please make sure you have the TensorRT Library installed @@ -65,7 +65,7 @@ def get_input_output_names(trt_engine): dims = trt_engine.get_binding_dimensions(b).to_DimsCHW() name = trt_engine.get_binding_name(b) type = trt_engine.get_binding_data_type(b) - + if (trt_engine.binding_is_input(b)): maps.append(name) print("Found input: ", name) @@ -113,8 +113,8 @@ def time_inference(engine, batch_size, inp): for i in range(iter): context.execute(batch_size, bindings) g_prof.print_layer_times() - - context.destroy() + + context.destroy() return @@ -154,16 +154,16 @@ def run_onnx(onnx_file, data_type, bs, inp): trt_builder = trt.infer.create_infer_builder(G_LOGGER) trt_builder.set_max_batch_size(max_batch_size) trt_builder.set_max_workspace_size(max_workspace_size) - + if (apex.get_model_dtype() == trt.infer.DataType_kHALF): print("------------------- Running FP16 -----------------------------") trt_builder.set_half2_mode(True) - elif (apex.get_model_dtype() == trt.infer.DataType_kINT8): + elif (apex.get_model_dtype() == trt.infer.DataType_kINT8): print("------------------- Running INT8 -----------------------------") trt_builder.set_int8_mode(True) else: print("------------------- Running FP32 -----------------------------") - + print("----- Builder is Done -----") print("----- Creating Engine -----") trt_engine = trt_builder.build_cuda_engine(trt_network) diff --git a/pix2pixhd/test.py b/pix2pixhd/test.py index 8e188fa9..d6abc975 100755 --- a/pix2pixhd/test.py +++ b/pix2pixhd/test.py @@ -29,12 +29,12 @@ model.half() elif opt.data_type == 8: model.type(torch.uint8) - + if opt.verbose: print(model) else: from .run_engine import run_trt_engine, run_onnx - + for i, data in enumerate(dataset): if i >= opt.how_many: break @@ -50,14 +50,14 @@ torch.onnx.export(model, [data['label'], data['inst']], opt.export_onnx, verbose=True) exit(0) - minibatch = 1 + minibatch = 1 if opt.engine: generated = run_trt_engine(opt.engine, minibatch, [data['label'], data['inst']]) elif opt.onnx: generated = run_onnx(opt.onnx, opt.data_type, minibatch, [data['label'], data['inst']]) - else: + else: generated = model.inference(data['label'], data['inst'], data['image']) - + visuals = OrderedDict([('input_label', util.tensor2label(data['label'][0], opt.label_nc)), ('synthesized_image', util.tensor2im(generated.data[0]))]) img_path = data['path'] diff --git a/pix2pixhd/train.py b/pix2pixhd/train.py index 789f3b2b..5719e708 100755 --- a/pix2pixhd/train.py +++ b/pix2pixhd/train.py @@ -22,11 +22,11 @@ def lcm(a,b): return abs(a * b)/fractions.gcd(a,b) if a and b else 0 start_epoch, epoch_iter = np.loadtxt(iter_path , delimiter=',', dtype=int) except: start_epoch, epoch_iter = 1, 0 - print('Resuming from epoch %d at iteration %d' % (start_epoch, epoch_iter)) -else: + print('Resuming from epoch %d at iteration %d' % (start_epoch, epoch_iter)) +else: start_epoch, epoch_iter = 1, 0 -opt.print_freq = lcm(opt.print_freq, opt.batchSize) +opt.print_freq = lcm(opt.print_freq, opt.batchSize) if opt.debug: opt.display_freq = 1 opt.print_freq = 1 @@ -41,9 +41,9 @@ def lcm(a,b): return abs(a * b)/fractions.gcd(a,b) if a and b else 0 model = create_model(opt) visualizer = Visualizer(opt) -if opt.fp16: +if opt.fp16: from apex import amp - model, [optimizer_G, optimizer_D] = amp.initialize(model, [model.optimizer_G, model.optimizer_D], opt_level='O1') + model, [optimizer_G, optimizer_D] = amp.initialize(model, [model.optimizer_G, model.optimizer_D], opt_level='O1') model = torch.nn.DataParallel(model, device_ids=opt.gpu_ids) else: optimizer_G, optimizer_D = model.module.optimizer_G, model.module.optimizer_D @@ -68,7 +68,7 @@ def lcm(a,b): return abs(a * b)/fractions.gcd(a,b) if a and b else 0 save_fake = total_steps % opt.display_freq == display_delta ############## Forward Pass ###################### - losses, generated = model(Variable(data['label']), Variable(data['inst']), + losses, generated = model(Variable(data['label']), Variable(data['inst']), Variable(data['image']), Variable(data['feat']), infer=save_fake) # sum per device losses @@ -82,28 +82,28 @@ def lcm(a,b): return abs(a * b)/fractions.gcd(a,b) if a and b else 0 ############### Backward Pass #################### # update generator weights optimizer_G.zero_grad() - if opt.fp16: - with amp.scale_loss(loss_G, optimizer_G) as scaled_loss: scaled_loss.backward() + if opt.fp16: + with amp.scale_loss(loss_G, optimizer_G) as scaled_loss: scaled_loss.backward() else: - loss_G.backward() + loss_G.backward() optimizer_G.step() # update discriminator weights optimizer_D.zero_grad() - if opt.fp16: - with amp.scale_loss(loss_D, optimizer_D) as scaled_loss: scaled_loss.backward() + if opt.fp16: + with amp.scale_loss(loss_D, optimizer_D) as scaled_loss: scaled_loss.backward() else: - loss_D.backward() - optimizer_D.step() + loss_D.backward() + optimizer_D.step() ############## Display results and errors ########## ### print out errors if total_steps % opt.print_freq == print_delta: - errors = {k: v.data.item() if not isinstance(v, int) else v for k, v in loss_dict.items()} + errors = {k: v.data.item() if not isinstance(v, int) else v for k, v in loss_dict.items()} t = (time.time() - iter_start_time) / opt.print_freq visualizer.print_current_errors(epoch, epoch_iter, errors, t) visualizer.plot_current_errors(errors, total_steps) - #call(["nvidia-smi", "--format=csv", "--query-gpu=memory.used,memory.free"]) + #call(["nvidia-smi", "--format=csv", "--query-gpu=memory.used,memory.free"]) ### display output images if save_fake: @@ -115,20 +115,20 @@ def lcm(a,b): return abs(a * b)/fractions.gcd(a,b) if a and b else 0 ### save latest model if total_steps % opt.save_latest_freq == save_delta: print('saving the latest model (epoch %d, total_steps %d)' % (epoch, total_steps)) - model.module.save('latest') + model.module.save('latest') np.savetxt(iter_path, (epoch, epoch_iter), delimiter=',', fmt='%d') if epoch_iter >= dataset_size: break - - # end of epoch + + # end of epoch iter_end_time = time.time() print('End of epoch %d / %d \t Time Taken: %d sec' % (epoch, opt.niter + opt.niter_decay, time.time() - epoch_start_time)) ### save model for this epoch if epoch % opt.save_epoch_freq == 0: - print('saving the model at the end of epoch %d, iters %d' % (epoch, total_steps)) + print('saving the model at the end of epoch %d, iters %d' % (epoch, total_steps)) model.module.save('latest') model.module.save(epoch) np.savetxt(iter_path, (epoch+1, 0), delimiter=',', fmt='%d') diff --git a/pix2pixhd/util/util.py b/pix2pixhd/util/util.py index f4f79ec0..141f83a7 100755 --- a/pix2pixhd/util/util.py +++ b/pix2pixhd/util/util.py @@ -17,9 +17,9 @@ def tensor2im(image_tensor, imtype=np.uint8, normalize=True): if normalize: image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0 else: - image_numpy = np.transpose(image_numpy, (1, 2, 0)) * 255.0 + image_numpy = np.transpose(image_numpy, (1, 2, 0)) * 255.0 image_numpy = np.clip(image_numpy, 0, 255) - if image_numpy.shape[2] == 1 or image_numpy.shape[2] > 3: + if image_numpy.shape[2] == 1 or image_numpy.shape[2] > 3: image_numpy = image_numpy[:,:,0] return image_numpy.astype(imtype) @@ -27,7 +27,7 @@ def tensor2im(image_tensor, imtype=np.uint8, normalize=True): def tensor2label(label_tensor, n_label, imtype=np.uint8): if n_label == 0: return tensor2im(label_tensor, imtype) - label_tensor = label_tensor.cpu().float() + label_tensor = label_tensor.cpu().float() if label_tensor.size()[0] > 1: label_tensor = label_tensor.max(0, keepdim=True)[1] label_tensor = Colorize(n_label)(label_tensor) @@ -64,7 +64,7 @@ def labelcolormap(N): (128, 64,128), (244, 35,232), (250,170,160), (230,150,140), ( 70, 70, 70), (102,102,156), (190,153,153), (180,165,180), (150,100,100), (150,120, 90), (153,153,153), (153,153,153), (250,170, 30), (220,220, 0), (107,142, 35), (152,251,152), ( 70,130,180), (220, 20, 60), (255, 0, 0), ( 0, 0,142), ( 0, 0, 70), - ( 0, 60,100), ( 0, 0, 90), ( 0, 0,110), ( 0, 80,100), ( 0, 0,230), (119, 11, 32), ( 0, 0,142)], + ( 0, 60,100), ( 0, 0, 90), ( 0, 0,110), ( 0, 80,100), ( 0, 0,230), (119, 11, 32), ( 0, 0,142)], dtype=np.uint8) else: cmap = np.zeros((N, 3), dtype=np.uint8) From 9b9c9668fe05a47a376d193a54e23ecc4b0f4c11 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 23 Mar 2022 23:59:35 +0100 Subject: [PATCH 12/14] spaces instead of tabs consistently --- pix2pixhd/encode_features.py | 34 ++++++++++++++-------------- pix2pixhd/models/models.py | 4 ++-- pix2pixhd/models/pix2pixHD_model.py | 2 +- pix2pixhd/precompute_feature_maps.py | 12 +++++----- 4 files changed, 26 insertions(+), 26 deletions(-) diff --git a/pix2pixhd/encode_features.py b/pix2pixhd/encode_features.py index e9d51810..ab118041 100755 --- a/pix2pixhd/encode_features.py +++ b/pix2pixhd/encode_features.py @@ -24,17 +24,17 @@ ########### Encode features ########### reencode = True if reencode: - features = {} - for label in range(opt.label_nc): - features[label] = np.zeros((0, opt.feat_num+1)) - for i, data in enumerate(dataset): - feat = model.module.encode_features(data['image'], data['inst']) - for label in range(opt.label_nc): - features[label] = np.append(features[label], feat[label], axis=0) - - print('%d / %d images' % (i+1, dataset_size)) - save_name = os.path.join(save_path, name + '.npy') - np.save(save_name, features) + features = {} + for label in range(opt.label_nc): + features[label] = np.zeros((0, opt.feat_num+1)) + for i, data in enumerate(dataset): + feat = model.module.encode_features(data['image'], data['inst']) + for label in range(opt.label_nc): + features[label] = np.append(features[label], feat[label], axis=0) + + print('%d / %d images' % (i+1, dataset_size)) + save_name = os.path.join(save_path, name + '.npy') + np.save(save_name, features) ############## Clustering ########### n_clusters = opt.n_clusters @@ -43,12 +43,12 @@ from sklearn.cluster import KMeans centers = {} for label in range(opt.label_nc): - feat = features[label] - feat = feat[feat[:,-1] > 0.5, :-1] - if feat.shape[0]: - n_clusters = min(feat.shape[0], opt.n_clusters) - kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(feat) - centers[label] = kmeans.cluster_centers_ + feat = features[label] + feat = feat[feat[:,-1] > 0.5, :-1] + if feat.shape[0]: + n_clusters = min(feat.shape[0], opt.n_clusters) + kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(feat) + centers[label] = kmeans.cluster_centers_ save_name = os.path.join(save_path, name + '_clustered_%03d.npy' % opt.n_clusters) np.save(save_name, centers) print('saving to %s' % save_name) diff --git a/pix2pixhd/models/models.py b/pix2pixhd/models/models.py index be1e30e6..b1838331 100755 --- a/pix2pixhd/models/models.py +++ b/pix2pixhd/models/models.py @@ -8,8 +8,8 @@ def create_model(opt): else: model = InferenceModel() else: - from .ui_model import UIModel - model = UIModel() + from .ui_model import UIModel + model = UIModel() model.initialize(opt) if opt.verbose: print("model [%s] was created" % (model.name())) diff --git a/pix2pixhd/models/pix2pixHD_model.py b/pix2pixhd/models/pix2pixHD_model.py index d990a213..d32ff7a3 100755 --- a/pix2pixhd/models/pix2pixHD_model.py +++ b/pix2pixhd/models/pix2pixHD_model.py @@ -49,7 +49,7 @@ def initialize(self, opt): self.netE = networks.define_G(opt.output_nc, opt.feat_num, opt.nef, 'encoder', opt.n_downsample_E, norm=opt.norm, gpu_ids=self.gpu_ids) if self.opt.verbose: - print('---------- Networks initialized -------------') + print('---------- Networks initialized -------------') # load networks if not self.isTrain or opt.continue_train or opt.load_pretrain: diff --git a/pix2pixhd/precompute_feature_maps.py b/pix2pixhd/precompute_feature_maps.py index 335b82da..100e6593 100755 --- a/pix2pixhd/precompute_feature_maps.py +++ b/pix2pixhd/precompute_feature_maps.py @@ -25,12 +25,12 @@ ######## Save precomputed feature maps for 1024p training ####### for i, data in enumerate(dataset): - print('%d / %d images' % (i+1, dataset_size)) - feat_map = model.module.netE.forward(Variable(data['image'].cuda(), volatile=True), data['inst'].cuda()) - feat_map = nn.Upsample(scale_factor=2, mode='nearest')(feat_map) - image_numpy = util.tensor2im(feat_map.data[0]) - save_path = data['path'][0].replace('/train_label/', '/train_feat/') - util.save_image(image_numpy, save_path) + print('%d / %d images' % (i+1, dataset_size)) + feat_map = model.module.netE.forward(Variable(data['image'].cuda(), volatile=True), data['inst'].cuda()) + feat_map = nn.Upsample(scale_factor=2, mode='nearest')(feat_map) + image_numpy = util.tensor2im(feat_map.data[0]) + save_path = data['path'][0].replace('/train_label/', '/train_feat/') + util.save_image(image_numpy, save_path) def main(): pass From 9d119da7d174472cee94d9a14c5daacc5339c0d5 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 24 Mar 2022 00:18:02 +0100 Subject: [PATCH 13/14] remove unnecessary scipy and tensorrt deps --- requirements.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index b9f728ea..f47a4d82 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,6 @@ numpy -scipy scikit-learn pillow -tensorrt torch>=1.4.0 torchvision>=0.5.0 dominate>=2.4.0 From 6b752323d83537f2554c96de3036e9b5b904aa70 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 24 Mar 2022 01:14:54 +0100 Subject: [PATCH 14/14] fix author_email --- .pylintrc | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ README.md | 4 ++-- setup.py | 2 +- 3 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 .pylintrc diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 00000000..265fceda --- /dev/null +++ b/.pylintrc @@ -0,0 +1,49 @@ +[MASTER] +extension-pkg-whitelist = numpy + +[TYPECHECK] +generated-members = np.*, torch.* +ignored-classes = np.* + +[MESSAGES CONTROL] +# as we have legacy code in ocrolib which +# does not adhere to most if this, and +# pylint does not offer per-subdirectory +# configuration yet (#618), we must be +# overly tolerant here: +disable = + ungrouped-imports, + multiple-imports, + bad-continuation, + missing-docstring, + no-self-use, + superfluous-parens, + invalid-name, + line-too-long, + too-many-arguments, + too-many-branches, + too-many-statements, + too-many-locals, + too-few-public-methods, + too-many-instance-attributes, + wrong-import-order, + duplicate-code, + #bad-whitespace, + consider-using-f-string, + unused-variable, + import-outside-toplevel, + wildcard-import, + unused-wildcard-import, + too-many-return-statements, + unidiomatic-typecheck, + multiple-statements, + no-else-return, + unnecessary-semicolon, + consider-using-dict-comprehension, + redefined-builtin + +# allow indented whitespace (as required by interpreter): +no-space-check=empty-line + +# allow non-snake-case identifiers: +good-names=n,i diff --git a/README.md b/README.md index 2c94a574..87a92ee6 100755 --- a/README.md +++ b/README.md @@ -59,8 +59,8 @@ Pytorch implementation of our method for high-resolution (e.g. 2048x1024) photor - Create a [virtual environment](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/), and activate it - Clone this repo: - git clone https://github.com/NVIDIA/pix2pixHD - pip install pix2pixHD + git clone https://github.com/NVIDIA/pix2pixHD + pip install pix2pixHD diff --git a/setup.py b/setup.py index ca93778e..4c244287 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ long_description=README, long_description_content_type='text/markdown', author='Ting-Chun Wang, Ming-Yu Liu, Jun-Yan Zhu, Andrew Tao, Jan Kautz, Bryan Catanzaro', - author_email='tingchunw@nvidia.com, https://mingyuliu.net, jan@jankautz.com, junyanz@cs.cmu.edu, bcatanzaro@acm.org', + author_email='tingchunw@nvidia.com, mingyul@nvidia.com, jan@jankautz.com, junyanz@cs.cmu.edu, bcatanzaro@acm.org', url='https://github.com/NVIDIA/pix2pixHD', license='BSD', packages=find_packages(),