import torch import torch.nn as nn import torch.nn.functional as F from torch.autograd import Variable from torchvision import transforms import PIL from PIL import Image try: import ffmpeg except ImportError: raise ImportError('ffmpeg-python not found! Install it via "pip install ffmpeg-python"') import scipy.ndimage as nd import numpy as np import os import click from typing import Union, Tuple, Optional, List, Type from tqdm import tqdm import re from torch_utils import gen_utils from network_features import DiscriminatorFeatures # ---------------------------------------------------------------------------- @click.group() def main(): pass # ---------------------------------------------------------------------------- def get_available_layers(max_resolution: int) -> List[str]: """Helper function to get the available layers given a max resolution (first block in the Discriminator)""" max_res_log2 = int(np.log2(max_resolution)) block_resolutions = [2**i for i in range(max_res_log2, 2, -1)] available_layers = ['from_rgb'] for block_res in block_resolutions: # We don't add the skip layer, as it's the same as conv1 (due to in-place addition; could be changed) available_layers.extend([f'b{block_res}_conv0', f'b{block_res}_conv1']) # We also skip 'b4_mbstd', as it doesn't add any new information compared to b8_conv1 available_layers.extend(['b4_conv', 'fc', 'out']) return available_layers # ---------------------------------------------------------------------------- # DeepDream code; modified from Erik Linder-Norén's repository: https://github.com/eriklindernoren/PyTorch-Deep-Dream def get_image(seed: int = 0, image_noise: str = 'random', starting_image: Union[str, os.PathLike] = None, image_size: int = 1024, convert_to_grayscale: bool = False, device: torch.device = torch.device('cpu')) -> Tuple[PIL.Image.Image, str]: """Set the random seed (NumPy + PyTorch), as well as get an image from a path or generate a random one with the seed""" torch.manual_seed(seed) rnd = np.random.RandomState(seed) # Load image or generate a random one if none is provided if starting_image is not None: image = Image.open(starting_image).convert('RGB').resize((image_size, image_size), Image.LANCZOS) else: if image_noise == 'random': starting_image = f'random_image-seed_{seed:08d}.jpg' image = Image.fromarray(rnd.randint(0, 255, (image_size, image_size, 3), dtype='uint8')) elif image_noise == 'perlin': try: # Graciously using Mathieu Duchesneau's implementation: https://github.com/duchesneaumathieu/pyperlin from pyperlin import FractalPerlin2D starting_image = f'perlin_image-seed_{seed:08d}.jpg' shape = (3, image_size, image_size) resolutions = [(2**i, 2**i) for i in range(1, 6+1)] # for lacunarity = 2.0 # TODO: set as cli variable factors = [0.5**i for i in range(6)] # for persistence = 0.5 TODO: set as cli variables g_cuda = torch.Generator(device=device).manual_seed(seed) rgb = FractalPerlin2D(shape, resolutions, factors, generator=g_cuda)().cpu().numpy() rgb = (255 * (rgb + 1) / 2).astype(np.uint8) # [-1.0, 1.0] => [0, 255] image = Image.fromarray(rgb.transpose(1, 2, 0), 'RGB') # Reshape leads us to weird tiling except ImportError: raise ImportError('pyperlin not found! Install it via "pip install pyperlin"') if convert_to_grayscale: image = image.convert('L').convert('RGB') # We do a little trolling to Pillow (so we have a 3-channel image) return image, starting_image def crop_resize_rotate(img: PIL.Image.Image, crop_size: int = None, new_size: int = None, rotation_deg: float = None, translate_x: float = 0.0, translate_y: float = 0.0) -> PIL.Image.Image: """Center-crop the input image into a square of sides crop_size; can be resized to new_size; rotated rotation_deg counter-clockwise""" # Center-crop the input image if crop_size is not None: w, h = img.size # Input image width and height img = img.crop(box=((w - crop_size) // 2, # Left pixel coordinate (h - crop_size) // 2, # Upper pixel coordinate (w + crop_size) // 2, # Right pixel coordinate (h + crop_size) // 2)) # Lower pixel coordinate # Resize if new_size is not None: img = img.resize(size=(new_size, new_size), # Requested size of the image in pixels; (width, height) resample=Image.LANCZOS) # Resampling filter # Rotation and translation if rotation_deg is not None: img = img.rotate(angle=rotation_deg, # Angle to rotate image, counter-clockwise resample=Image.BICUBIC, # Resampling filter; options: Image.Resampling.{NEAREST, BILINEAR, BICUBIC} expand=False, # If True, the whole rotated image will be shown translate=(translate_x, translate_y), # Translate the image, from top-left corner (post-rotation) fillcolor=(0, 0, 0)) # Black background # TODO: tile the background return img mean = np.array([0.485, 0.456, 0.406]) std = np.array([0.229, 0.224, 0.225]) preprocess = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)]) def deprocess(image_np: torch.Tensor) -> np.ndarray: image_np = image_np.squeeze().transpose(1, 2, 0) image_np = image_np * std.reshape((1, 1, 3)) + mean.reshape((1, 1, 3)) # image_np = (image_np + 1.0) / 2.0 image_np = np.clip(image_np, 0.0, 1.0) image_np = (255 * image_np).astype('uint8') return image_np def clip(image_tensor: torch.Tensor) -> torch.Tensor: """Clamp per channel""" for c in range(3): m, s = mean[c], std[c] image_tensor[0, c] = torch.clamp(image_tensor[0, c], -m / s, (1 - m) / s) return image_tensor def dream(image: PIL.Image.Image, model: torch.nn.Module, layers: List[str], channels: List[int] = None, normed: bool = False, sqrt_normed: bool = False, iterations: int = 20, lr: float = 1e-2) -> np.ndarray: """ Updates the image to maximize outputs for n iterations """ Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor image = Variable(Tensor(image), requires_grad=True) for i in range(iterations): model.zero_grad() out = model.get_layers_features(image, layers=layers, channels=channels, normed=normed, sqrt_normed=sqrt_normed) loss = sum(layer.norm() for layer in out) # More than one layer may be used loss.backward() avg_grad = np.abs(image.grad.data.cpu().numpy()).mean() norm_lr = lr / avg_grad image.data += norm_lr * image.grad.data image.data = clip(image.data) # image.data = torch.clamp(image.data, -1.0, 1.0) image.grad.data.zero_() return image.cpu().data.numpy() def deep_dream(image: PIL.Image.Image, model: torch.nn.Module, model_resolution: int, layers: List[str], channels: List[int], seed: Union[int, Type[None]], normed: bool, sqrt_normed: bool, iterations: int, lr: float, octave_scale: float, num_octaves: int, unzoom_octave: bool = False, disable_inner_tqdm: bool = False, ignore_initial_transform: bool = False) -> np.ndarray: """ Main deep dream method """ # Center-crop and resize if not ignore_initial_transform: image = crop_resize_rotate(img=image, crop_size=min(image.size), new_size=model_resolution) # Preprocess image image = preprocess(image) # image = torch.from_numpy(np.array(image)).permute(-1, 0, 1) / 127.5 - 1.0 # alternative image = image.unsqueeze(0).cpu().data.numpy() # Extract image representations for each octave octaves = [image] for _ in range(num_octaves - 1): # Alternatively, see if we get better results with: https://www.tensorflow.org/tutorials/generative/deepdream#taking_it_up_an_octave octave = nd.zoom(octaves[-1], (1, 1, 1 / octave_scale, 1 / octave_scale), order=1) # Necessary for StyleGAN's Discriminator, as it cannot handle any image size if unzoom_octave: octave = nd.zoom(octave, np.array(octaves[-1].shape) / np.array(octave.shape), order=1) octaves.append(octave) detail = np.zeros_like(octaves[-1]) tqdm_desc = f'Dreaming w/layers {"|".join(x for x in layers)}' tqdm_desc = f'Seed: {seed} - {tqdm_desc}' if seed is not None else tqdm_desc for octave, octave_base in enumerate(tqdm(octaves[::-1], desc=tqdm_desc, disable=disable_inner_tqdm)): if octave > 0: # Upsample detail to new octave dimension detail = nd.zoom(detail, np.array(octave_base.shape) / np.array(detail.shape), order=1) # Add deep dream detail from previous octave to new base input_image = octave_base + detail # Get new deep dream image dreamed_image = dream(input_image, model, layers, channels, normed, sqrt_normed, iterations, lr) # Extract deep dream details detail = dreamed_image - octave_base return deprocess(dreamed_image) # ---------------------------------------------------------------------------- # Helper functions (all base code taken from: https://pytorch.org/tutorials/advanced/neural_style_tutorial.html) class ContentLoss(nn.Module): def __init__(self, target,): super(ContentLoss, self).__init__() # we 'detach' the target content from the tree used # to dynamically compute the gradient: this is a stated value, # not a variable. Otherwise the forward method of the criterion # will throw an error. self.target = target.detach() def forward(self, input): self.loss = F.mse_loss(input, self.target) return input def gram_matrix(input): a, b, c, d = input.size() # (batch_size, no. feature maps, dims of a f. map (N=c*d)) features = input.view(a * b, c * d) # resize F_XL into \hat F_XL G = torch.mm(features, features.t()) # compute the gram product # 'Normalize' the values of the gram matrix by dividing by the number of element in each feature maps. return G.div(a * b * c * d) # can also do torch.numel(input) to get the number of elements class StyleLoss(nn.Module): def __init__(self, target_feature): super(StyleLoss, self).__init__() self.target = gram_matrix(target_feature).detach() def forward(self, input): G = gram_matrix(input) self.loss = F.mse_loss(G, self.target) return input @main.command(name='style-transfer', help='Use the StyleGAN2/3 Discriminator to perform style transfer') @click.option('--network', 'network_pkl', help='Network pickle filename', required=True) @click.option('--cfg', type=click.Choice(['stylegan3-t', 'stylegan3-r', 'stylegan2']), help='Model base configuration', default=None) @click.option('--content', type=str, help='Content image filename (url or local path)', required=True) @click.option('--style', type=str, help='Style image filename (url or local path)', required=True) def style_transfer_discriminator( ctx: click.Context, network_pkl: str, cfg: str, content: str, style: str, ): print('Coming soon!') # Reference: https://pytorch.org/tutorials/advanced/neural_style_tutorial.html # Set up device device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') imsize = 512 if torch.cuda.is_available() else 128 # use small size if no gpu loader = transforms.Compose([transforms.Resize(imsize), # scale imported image transforms.ToTensor()]) # transform it into a torch tensor # Helper function def image_loader(image_name): image = Image.open(image_name) # fake batch dimension required to fit network's input dimensions image = loader(image).unsqueeze(0) return image.to(device, torch.float) style_img = image_loader(style) content_img = image_loader(content) # This shouldn't really happen, but just in case assert style_img.size() == content_img.size(), 'Style and content images must be the same size' unloader = transforms.ToPILImage() # reconvert into PIL image # Load Discriminator D = gen_utils.load_network('D', network_pkl, cfg, device) # TODO: finish this! # ---------------------------------------------------------------------------- @main.command(name='dream', help='Discriminator Dreaming with the StyleGAN2/3 Discriminator and the chosen layers') @click.pass_context @click.option('--network', 'network_pkl', help='Network pickle filename', required=True) @click.option('--cfg', type=click.Choice(['stylegan3-t', 'stylegan3-r', 'stylegan2']), help='Model base configuration', default=None) # Synthesis options @click.option('--seeds', type=gen_utils.num_range, help='Random seeds to use. Accepted comma-separated values, ranges, or combinations: "a,b,c", "a-c", "a,b-d,e".', default='0') @click.option('--random-image-noise', '-noise', 'image_noise', type=click.Choice(['random', 'perlin']), default='perlin', show_default=True) @click.option('--starting-image', type=str, help='Path to image to start from', default=None) @click.option('--convert-to-grayscale', '-grayscale', is_flag=True, help='Add flag to grayscale the initial image') @click.option('--class', 'class_idx', type=int, help='Class label (unconditional if not specified)', default=None) @click.option('--lr', 'learning_rate', type=float, help='Learning rate', default=1e-2, show_default=True) @click.option('--iterations', '-it', type=int, help='Number of gradient ascent steps per octave', default=20, show_default=True) # Layer options @click.option('--layers', type=str, help='Layers of the Discriminator to use as the features. If "all", will generate a dream image per available layer in the loaded model. If "use_all", will use all available layers.', default='b16_conv1', show_default=True) @click.option('--channels', type=gen_utils.num_range, help='Comma-separated list and/or range of the channels of the Discriminator to use as the features. If "None", will use all channels in each specified layer.', default=None, show_default=True) @click.option('--normed', 'norm_model_layers', is_flag=True, help='Add flag to divide the features of each layer of D by its number of elements') @click.option('--sqrt-normed', 'sqrt_norm_model_layers', is_flag=True, help='Add flag to divide the features of each layer of D by the square root of its number of elements') # Octaves options @click.option('--num-octaves', type=int, help='Number of octaves', default=5, show_default=True) @click.option('--octave-scale', type=float, help='Image scale between octaves', default=1.4, show_default=True) @click.option('--unzoom-octave', type=bool, help='Set to True for the octaves to be unzoomed (this will be slower)', default=True, show_default=True) # Extra parameters for saving the results @click.option('--outdir', type=click.Path(file_okay=False), help='Directory path to save the results', default=os.path.join(os.getcwd(), 'out', 'discriminator_synthesis'), show_default=True, metavar='DIR') @click.option('--description', '-desc', type=str, help='Additional description name for the directory path to save results', default='', show_default=True) def discriminator_dream( ctx: click.Context, network_pkl: Union[str, os.PathLike], cfg: Optional[str], seeds: List[int], image_noise: str, starting_image: Union[str, os.PathLike], convert_to_grayscale: bool, class_idx: Optional[int], # TODO: conditional model learning_rate: float, iterations: int, layers: str, channels: Optional[List[int]], norm_model_layers: bool, sqrt_norm_model_layers: bool, num_octaves: int, octave_scale: float, unzoom_octave: bool, outdir: Union[str, os.PathLike], description: str, ): # Set up device device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') # Load Discriminator D = gen_utils.load_network('D', network_pkl, cfg, device) # Get the model resolution (image resizing and getting available layers) model_resolution = D.img_resolution # TODO: do this better, as we can combine these conditions later layers = layers.split(',') # We will use the features of the Discriminator, on the layer specified by the user model = DiscriminatorFeatures(D).requires_grad_(False).to(device) if 'all' in layers: # Get all the available layers in a list layers = get_available_layers(max_resolution=model_resolution) for seed in seeds: # Get the image and image name image, starting_image = get_image(seed=seed, image_noise=image_noise, starting_image=starting_image, image_size=model_resolution, convert_to_grayscale=convert_to_grayscale) # Make the run dir in the specified output directory desc = f'discriminator-dream-all_layers-seed_{seed}' desc = f'{desc}-{description}' if len(description) != 0 else desc run_dir = gen_utils.make_run_dir(outdir, desc) # Save starting image image.save(os.path.join(run_dir, f'{os.path.basename(starting_image).split(".")[0]}.jpg')) # Save the configuration used ctx.obj = { 'network_pkl': network_pkl, 'synthesis_options': { 'seed': seed, 'random_image_noise': image_noise, 'starting_image': starting_image, 'class_idx': class_idx, 'learning_rate': learning_rate, 'iterations': iterations}, 'layer_options': { 'layer': layers, 'channels': channels, 'norm_model_layers': norm_model_layers, 'sqrt_norm_model_layers': sqrt_norm_model_layers}, 'octaves_options': { 'num_octaves': num_octaves, 'octave_scale': octave_scale, 'unzoom_octave': unzoom_octave}, 'extra_parameters': { 'outdir': run_dir, 'description': description} } # Save the run configuration gen_utils.save_config(ctx=ctx, run_dir=run_dir) # For each layer: for layer in layers: # Extract deep dream image dreamed_image = deep_dream(image, model, model_resolution, layers=[layer], channels=channels, seed=seed, normed=norm_model_layers, sqrt_normed=sqrt_norm_model_layers, iterations=iterations, lr=learning_rate, octave_scale=octave_scale, num_octaves=num_octaves, unzoom_octave=unzoom_octave) # Save the resulting dreamed image filename = f'layer-{layer}_dreamed_{os.path.basename(starting_image).split(".")[0]}.jpg' Image.fromarray(dreamed_image, 'RGB').save(os.path.join(run_dir, filename)) else: if 'use_all' in layers: # Get all available layers layers = get_available_layers(max_resolution=model_resolution) else: # Parse the layers given by the user and leave only those available by the model available_layers = get_available_layers(max_resolution=model_resolution) layers = [layer for layer in layers if layer in available_layers] # Make the run dir in the specified output directory desc = f'discriminator-dream-layers_{"-".join(x for x in layers)}' desc = f'{desc}-{description}' if len(description) != 0 else desc run_dir = gen_utils.make_run_dir(outdir, desc) starting_images, used_seeds = [], [] for seed in seeds: # Get the image and image name image, starting_image = get_image(seed=seed, image_noise=image_noise, starting_image=starting_image, image_size=model_resolution, convert_to_grayscale=convert_to_grayscale) # Extract deep dream image dreamed_image = deep_dream(image, model, model_resolution, layers=layers, channels=channels, seed=seed, normed=norm_model_layers, sqrt_normed=sqrt_norm_model_layers, iterations=iterations, lr=learning_rate, octave_scale=octave_scale, num_octaves=num_octaves, unzoom_octave=unzoom_octave) # For logging later starting_images.append(starting_image) used_seeds.append(seed) # Save the resulting image and initial image filename = f'dreamed_{os.path.basename(starting_image)}' Image.fromarray(dreamed_image, 'RGB').save(os.path.join(run_dir, filename)) image.save(os.path.join(run_dir, os.path.basename(starting_image))) starting_image = None # Save the configuration used ctx.obj = { 'network_pkl': network_pkl, 'synthesis_options': { 'seeds': used_seeds, 'starting_image': starting_images, 'class_idx': class_idx, 'learning_rate': learning_rate, 'iterations': iterations}, 'layer_options': { 'layer': layers, 'channels': channels, 'norm_model_layers': norm_model_layers, 'sqrt_norm_model_layers': sqrt_norm_model_layers}, 'octaves_options': { 'octave_scale': octave_scale, 'num_octaves': num_octaves, 'unzoom_octave': unzoom_octave}, 'extra_parameters': { 'outdir': run_dir, 'description': description} } # Save the run configuration gen_utils.save_config(ctx=ctx, run_dir=run_dir) # ---------------------------------------------------------------------------- @main.command(name='dream-zoom', help='Zoom/rotate/translate after each Discriminator Dreaming iteration. A video will be saved.') @click.pass_context @click.option('--network', 'network_pkl', help='Network pickle filename', required=True) @click.option('--cfg', type=click.Choice(['stylegan3-t', 'stylegan3-r', 'stylegan2']), help='Model base configuration', default=None) # Synthesis options @click.option('--seed', type=int, help='Random seed to use', default=0, show_default=True) @click.option('--random-image-noise', '-noise', 'image_noise', type=click.Choice(['random', 'perlin']), default='random', show_default=True) @click.option('--starting-image', type=str, help='Path to image to start from', default=None) @click.option('--convert-to-grayscale', '-grayscale', is_flag=True, help='Add flag to grayscale the initial image') @click.option('--class', 'class_idx', type=int, help='Class label (unconditional if not specified)', default=None) @click.option('--lr', 'learning_rate', type=float, help='Learning rate', default=5e-3, show_default=True) @click.option('--iterations', '-it', type=click.IntRange(min=1), help='Number of gradient ascent steps per octave', default=10, show_default=True) # Layer options @click.option('--layers', type=str, help='Comma-separated list of the layers of the Discriminator to use as the features. If "use_all", will use all available layers.', default='b16_conv0', show_default=True) @click.option('--channels', type=gen_utils.num_range, help='Comma-separated list and/or range of the channels of the Discriminator to use as the features. If "None", will use all channels in each specified layer.', default=None, show_default=True) @click.option('--normed', 'norm_model_layers', is_flag=True, help='Add flag to divide the features of each layer of D by its number of elements') @click.option('--sqrt-normed', 'sqrt_norm_model_layers', is_flag=True, help='Add flag to divide the features of each layer of D by the square root of its number of elements') # Octaves options @click.option('--num-octaves', type=click.IntRange(min=1), help='Number of octaves', default=5, show_default=True) @click.option('--octave-scale', type=float, help='Image scale between octaves', default=1.4, show_default=True) @click.option('--unzoom-octave', type=bool, help='Set to True for the octaves to be unzoomed (this will be slower)', default=False, show_default=True) # Individual frame manipulation options @click.option('--pixel-zoom', '-zoom', type=int, help='How many pixels to zoom per step (positive for zoom in, negative for zoom out, padded with black)', default=2, show_default=True) @click.option('--rotation-deg', '-rot', type=float, help='Rotate image counter-clockwise per frame (padded with black)', default=0.0, show_default=True) @click.option('--translate-x', '-tx', type=float, help='Translate the image in the horizontal axis per frame (from left to right, padded with black)', default=0.0, show_default=True) @click.option('--translate-y', '-ty', type=float, help='Translate the image in the vertical axis per frame (from top to bottom, padded with black)', default=0.0, show_default=True) # Video options @click.option('--fps', type=gen_utils.parse_fps, help='FPS for the mp4 video of optimization progress (if saved)', default=25, show_default=True) @click.option('--duration-sec', type=float, help='Duration length of the video', default=15.0, show_default=True) @click.option('--reverse-video', is_flag=True, help='Add flag to reverse the generated video') @click.option('--include-starting-image', type=bool, help='Include the starting image in the final video', default=True, show_default=True) # Extra parameters for saving the results @click.option('--outdir', type=click.Path(file_okay=False), help='Directory path to save the results', default=os.path.join(os.getcwd(), 'out', 'discriminator_synthesis'), show_default=True, metavar='DIR') @click.option('--description', '-desc', type=str, help='Additional description name for the directory path to save results', default='', show_default=True) def discriminator_dream_zoom( ctx: click.Context, network_pkl: Union[str, os.PathLike], cfg: Optional[str], seed: int, image_noise: Optional[str], starting_image: Optional[Union[str, os.PathLike]], convert_to_grayscale: bool, class_idx: Optional[int], # TODO: conditional model learning_rate: float, iterations: int, layers: str, channels: List[int], norm_model_layers: Optional[bool], sqrt_norm_model_layers: Optional[bool], num_octaves: int, octave_scale: float, unzoom_octave: Optional[bool], pixel_zoom: int, rotation_deg: float, translate_x: int, translate_y: int, fps: int, duration_sec: float, reverse_video: bool, include_starting_image: bool, outdir: Union[str, os.PathLike], description: str, ): # Set up device device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') # Load Discriminator D = gen_utils.load_network('D', network_pkl, cfg, device) # Get the model resolution (for resizing the starting image if needed) model_resolution = D.img_resolution zoom_size = model_resolution - 2 * pixel_zoom layers = layers.split(',') if 'use_all' in layers: # Get all available layers layers = get_available_layers(max_resolution=model_resolution) else: # Parse the layers given by the user and leave only those available by the model available_layers = get_available_layers(max_resolution=model_resolution) layers = [layer for layer in layers if layer in available_layers] # We will use the features of the Discriminator, on the layer specified by the user model = DiscriminatorFeatures(D).requires_grad_(False).to(device) # Get the image and image name image, starting_image = get_image(seed=seed, image_noise=image_noise, starting_image=starting_image, image_size=model_resolution, convert_to_grayscale=convert_to_grayscale) # Make the run dir in the specified output directory desc = 'discriminator-dream-zoom' desc = f'{desc}-{description}' if len(description) != 0 else desc run_dir = gen_utils.make_run_dir(outdir, desc) # Save the configuration used ctx.obj = { 'network_pkl': network_pkl, 'synthesis_options': { 'seed': seed, 'random_image_noise': image_noise, 'starting_image': starting_image, 'class_idx': class_idx, 'learning_rate': learning_rate, 'iterations': iterations }, 'layer_options': { 'layers': layers, 'channels': channels, 'norm_model_layers': norm_model_layers, 'sqrt_norm_model_layers': sqrt_norm_model_layers }, 'octaves_options': { 'num_octaves': num_octaves, 'octave_scale': octave_scale, 'unzoom_octave': unzoom_octave }, 'frame_manipulation_options': { 'pixel_zoom': pixel_zoom, 'rotation_deg': rotation_deg, 'translate_x': translate_x, 'translate_y': translate_y, }, 'video_options': { 'fps': fps, 'duration_sec': duration_sec, 'reverse_video': reverse_video, 'include_starting_image': include_starting_image, }, 'extra_parameters': { 'outdir': run_dir, 'description': description } } # Save the run configuration gen_utils.save_config(ctx=ctx, run_dir=run_dir) num_frames = int(np.rint(duration_sec * fps)) # Number of frames for the video n_digits = int(np.log10(num_frames)) + 1 # Number of digits for naming each frame # Save the starting image starting_image_name = f'dreamed_{0:0{n_digits}d}.jpg' if include_starting_image else 'starting_image.jpg' image.save(os.path.join(run_dir, starting_image_name)) for idx, frame in enumerate(tqdm(range(num_frames), desc='Dreaming...', unit='frame')): # Zoom in after the first frame if idx > 0: image = crop_resize_rotate(image, crop_size=zoom_size, new_size=model_resolution, rotation_deg=rotation_deg, translate_x=translate_x, translate_y=translate_y) # Extract deep dream image dreamed_image = deep_dream(image, model, model_resolution, layers=layers, seed=seed, normed=norm_model_layers, sqrt_normed=sqrt_norm_model_layers, iterations=iterations, channels=channels, lr=learning_rate, octave_scale=octave_scale, num_octaves=num_octaves, unzoom_octave=unzoom_octave, disable_inner_tqdm=True) # Save the resulting image and initial image filename = f'dreamed_{idx + 1:0{n_digits}d}.jpg' Image.fromarray(dreamed_image, 'RGB').save(os.path.join(run_dir, filename)) # Now, the dreamed image is the starting image image = Image.fromarray(dreamed_image, 'RGB') # Save the final video gen_utils.save_video_from_images(run_dir=run_dir, image_names=f'dreamed_%0{n_digits}d.jpg', video_name='dream-zoom', fps=fps, reverse_video=reverse_video) # ---------------------------------------------------------------------------- @main.command(name='channel-zoom', help='Dream zoom using only the specified channels in the selected layer') @click.pass_context @click.option('--network', 'network_pkl', help='Network pickle filename', required=True) @click.option('--cfg', type=click.Choice(['stylegan3-t', 'stylegan3-r', 'stylegan2']), help='Model base configuration', default=None) # Synthesis options @click.option('--seed', type=int, help='Random seed to use', default=0, show_default=True) @click.option('--random-image-noise', '-noise', 'image_noise', type=click.Choice(['random', 'perlin']), default='random', show_default=True) @click.option('--starting-image', type=str, help='Path to image to start from', default=None) @click.option('--convert-to-grayscale', '-grayscale', is_flag=True, help='Add flag to grayscale the initial image') @click.option('--class', 'class_idx', type=int, help='Class label (unconditional if not specified)', default=None) @click.option('--lr', 'learning_rate', type=float, help='Learning rate', default=5e-3, show_default=True) @click.option('--iterations', '-it', type=click.IntRange(min=1), help='Number of gradient ascent steps per octave', default=10, show_default=True) # Layer options @click.option('--layer', type=str, help='Layers of the Discriminator to use as the features.', default='b8_conv0', show_default=True) @click.option('--normed', 'norm_model_layers', is_flag=True, help='Add flag to divide the features of each layer of D by its number of elements') @click.option('--sqrt-normed', 'sqrt_norm_model_layers', is_flag=True, help='Add flag to divide the features of each layer of D by the square root of its number of elements') # Octaves options @click.option('--num-octaves', type=click.IntRange(min=1), help='Number of octaves', default=5, show_default=True) @click.option('--octave-scale', type=float, help='Image scale between octaves', default=1.4, show_default=True) @click.option('--unzoom-octave', type=bool, help='Set to True for the octaves to be unzoomed (this will be slower)', default=False, show_default=True) # Individual frame manipulation options @click.option('--pixel-zoom', '-zoom', type=int, help='How many pixels to zoom per step (positive for zoom in, negative for zoom out, padded with black)', default=2, show_default=True) @click.option('--rotation-deg', '-rot', type=float, help='Rotate image counter-clockwise per frame (padded with black)', default=0.0, show_default=True) @click.option('--translate-x', '-tx', type=float, help='Translate the image in the horizontal axis per frame (from left to right, padded with black)', default=0.0, show_default=True) @click.option('--translate-y', '-ty', type=float, help='Translate the image in the vertical axis per frame (from top to bottom, padded with black)', default=0.0, show_default=True) # Video options @click.option('--frames-per-channel', type=click.IntRange(min=1), help='Number of frames per channel', default=1, show_default=True) @click.option('--fps', type=gen_utils.parse_fps, help='FPS for the mp4 video of optimization progress (if saved)', default=25, show_default=True) @click.option('--reverse-video', is_flag=True, help='Add flag to reverse the generated video') @click.option('--include-starting-image', type=bool, help='Include the starting image in the final video', default=True, show_default=True) # Extra parameters for saving the results @click.option('--outdir', type=click.Path(file_okay=False), help='Directory path to save the results', default=os.path.join(os.getcwd(), 'out', 'discriminator_synthesis'), show_default=True, metavar='DIR') @click.option('--description', '-desc', type=str, help='Additional description name for the directory path to save results', default='', show_default=True) def channel_zoom( ctx: click.Context, network_pkl: Union[str, os.PathLike], cfg: Optional[str], seed: int, image_noise: Optional[str], starting_image: Optional[Union[str, os.PathLike]], convert_to_grayscale: bool, class_idx: Optional[int], # TODO: conditional model learning_rate: float, iterations: int, layer: str, norm_model_layers: Optional[bool], sqrt_norm_model_layers: Optional[bool], num_octaves: int, octave_scale: float, unzoom_octave: Optional[bool], pixel_zoom: int, rotation_deg: float, translate_x: int, translate_y: int, frames_per_channel: int, fps: int, reverse_video: bool, include_starting_image: bool, outdir: Union[str, os.PathLike], description: str, ): """Zoom in using all the channels of a network (or a specified layer)""" # Set up device device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') # Load Discriminator D = gen_utils.load_network('D', network_pkl, cfg, device) # Get the model resolution (for resizing the starting image if needed) model_resolution = D.img_resolution zoom_size = model_resolution - 2 * pixel_zoom if 'use_all' in layer: ctx.fail('Cannot use "use_all" with this command. Please specify the layers you want to use.') else: # Parse the layers given by the user and leave only those available by the model available_layers = get_available_layers(max_resolution=model_resolution) assert layer in available_layers, f'Layer {layer} not available. Available layers: {available_layers}' layers = [layer] # We will use the features of the Discriminator, on the layer specified by the user model = DiscriminatorFeatures(D).requires_grad_(False).to(device) # Get the image and image name image, starting_image = get_image(seed=seed, image_noise=image_noise, starting_image=starting_image, image_size=model_resolution, convert_to_grayscale=convert_to_grayscale) # Make the run dir in the specified output directory desc = 'discriminator-channel-zoom' desc = f'{desc}-{description}' if len(description) != 0 else desc run_dir = gen_utils.make_run_dir(outdir, desc) # Finally, let's get the number of channels in the selected layer channels_dict = {res: D.get_submodule(f'b{res}.conv0').out_channels for res in D.block_resolutions} channels_dict[4] = D.get_submodule('b4.conv').out_channels # Last block has a different name # Get the dimension of the block from the selected layer (e.g., from 'b128_conv0' get '128') block_resolution = re.search(r'b(\d+)_', layer).group(1) total_channels = channels_dict[int(block_resolution)] # Make a list of all the channels, each repeated frames_per_channel channels = np.repeat(np.arange(total_channels), frames_per_channel) num_frames = int(np.rint(total_channels * frames_per_channel)) # Number of frames for the video n_digits = int(np.log10(num_frames)) + 1 # Number of digits for naming each frame # Save the starting image starting_image_name = f'dreamed_{0:0{n_digits}d}.jpg' if include_starting_image else 'starting_image.jpg' image.save(os.path.join(run_dir, starting_image_name)) for idx, frame in enumerate(tqdm(range(num_frames), desc='Dreaming...', unit='frame')): # Zoom in after the first frame if idx > 0: image = crop_resize_rotate(image, crop_size=zoom_size, new_size=model_resolution, rotation_deg=rotation_deg, translate_x=translate_x, translate_y=translate_y) # Extract deep dream image dreamed_image = deep_dream(image, model, model_resolution, layers=layers, seed=seed, normed=norm_model_layers, sqrt_normed=sqrt_norm_model_layers, iterations=iterations, channels=channels[idx:idx + 1], lr=learning_rate, octave_scale=octave_scale, num_octaves=num_octaves, unzoom_octave=unzoom_octave, disable_inner_tqdm=True) # Save the resulting image and initial image filename = f'dreamed_{idx + 1:0{n_digits}d}.jpg' Image.fromarray(dreamed_image, 'RGB').save(os.path.join(run_dir, filename)) # Now, the dreamed image is the starting image image = Image.fromarray(dreamed_image, 'RGB') # Save the final video gen_utils.save_video_from_images(run_dir=run_dir, image_names=f'dreamed_%0{n_digits}d.jpg', video_name='channel-zoom', fps=fps, reverse_video=reverse_video) # Save the configuration used ctx.obj = { 'network_pkl': network_pkl, 'synthesis_options': { 'seed': seed, 'random_image_noise': image_noise, 'starting_image': starting_image, 'class_idx': class_idx, 'learning_rate': learning_rate, 'iterations': iterations }, 'layer_options': { 'layer': layer, 'channels': 'all', 'total_channels': total_channels, 'norm_model_layers': norm_model_layers, 'sqrt_norm_model_layers': sqrt_norm_model_layers }, 'octaves_options': { 'num_octaves': num_octaves, 'octave_scale': octave_scale, 'unzoom_octave': unzoom_octave }, 'frame_manipulation_options': { 'pixel_zoom': pixel_zoom, 'rotation_deg': rotation_deg, 'translate_x': translate_x, 'translate_y': translate_y, }, 'video_options': { 'fps': fps, 'frames_per_channel': frames_per_channel, 'reverse_video': reverse_video, 'include_starting_image': include_starting_image, }, 'extra_parameters': { 'outdir': run_dir, 'description': description } } # Save the run configuration gen_utils.save_config(ctx=ctx, run_dir=run_dir) # ---------------------------------------------------------------------------- @main.command(name='interp', help='Interpolate between two or more seeds') @click.pass_context @click.option('--network', 'network_pkl', help='Network pickle filename', required=True) @click.option('--cfg', type=click.Choice(['stylegan3-t', 'stylegan3-r', 'stylegan2']), help='Model base configuration', default=None) # Synthesis options @click.option('--seeds', type=gen_utils.num_range, help='Random seeds to generate the Perlin noise from', required=True) @click.option('--interp-type', '-interp', type=click.Choice(['linear', 'spherical']), help='Type of interpolation in Z or W', default='spherical', show_default=True) @click.option('--smooth', is_flag=True, help='Add flag to smooth the interpolation between the seeds') @click.option('--random-image-noise', '-noise', 'image_noise', type=click.Choice(['random', 'perlin']), default='random', show_default=True) @click.option('--starting-image', type=str, help='Path to image to start from', default=None) @click.option('--convert-to-grayscale', '-grayscale', is_flag=True, help='Add flag to grayscale the initial image') @click.option('--class', 'class_idx', type=int, help='Class label (unconditional if not specified)', default=None) @click.option('--lr', 'learning_rate', type=float, help='Learning rate', default=5e-3, show_default=True) @click.option('--iterations', '-it', type=click.IntRange(min=1), help='Number of gradient ascent steps per octave', default=10, show_default=True) # Layer options @click.option('--layers', type=str, help='Comma-separated list of the layers of the Discriminator to use as the features. If "use_all", will use all available layers.', default='b16_conv0', show_default=True) @click.option('--channels', type=gen_utils.num_range, help='Comma-separated list and/or range of the channels of the Discriminator to use as the features. If "None", will use all channels in each specified layer.', default=None, show_default=True) @click.option('--normed', 'norm_model_layers', is_flag=True, help='Add flag to divide the features of each layer of D by its number of elements') @click.option('--sqrt-normed', 'sqrt_norm_model_layers', is_flag=True, help='Add flag to divide the features of each layer of D by the square root of its number of elements') # Octaves options @click.option('--num-octaves', type=click.IntRange(min=1), help='Number of octaves', default=5, show_default=True) @click.option('--octave-scale', type=float, help='Image scale between octaves', default=1.4, show_default=True) @click.option('--unzoom-octave', type=bool, help='Set to True for the octaves to be unzoomed (this will be slower)', default=False, show_default=True) # TODO: Individual frame manipulation options # Video options @click.option('--seed-sec', '-sec', type=float, help='Number of seconds between each seed transition', default=5.0, show_default=True) @click.option('--fps', type=gen_utils.parse_fps, help='FPS for the mp4 video of optimization progress (if saved)', default=25, show_default=True) # Extra parameters for saving the results @click.option('--outdir', type=click.Path(file_okay=False), help='Directory path to save the results', default=os.path.join(os.getcwd(), 'out', 'discriminator_synthesis'), show_default=True, metavar='DIR') @click.option('--description', '-desc', type=str, help='Additional description name for the directory path to save results', default='', show_default=True) def random_interpolation( ctx: click.Context, network_pkl: Union[str, os.PathLike], cfg: Optional[str], seeds: List[int], interp_type: Optional[str], smooth: Optional[bool], image_noise: Optional[str], starting_image: Optional[Union[str, os.PathLike]], convert_to_grayscale: bool, class_idx: Optional[int], # TODO: conditional model learning_rate: float, iterations: int, layers: str, channels: List[int], norm_model_layers: Optional[bool], sqrt_norm_model_layers: Optional[bool], num_octaves: int, octave_scale: float, unzoom_octave: Optional[bool], seed_sec: float, fps: int, outdir: Union[str, os.PathLike], description: str, ): """Do a latent walk between random Perlin images (given the seeds) and generate a video with these frames.""" # TODO: To make this better and more stable, we generate Perlin noise animations, not interpolations # Set up device device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') # Load Discriminator D = gen_utils.load_network('D', network_pkl, cfg, device) # Get model resolution model_resolution = D.img_resolution model = DiscriminatorFeatures(D).requires_grad_(False).to(device) layers = layers.split(',') # Get all available layers if 'use_all' in layers: layers = get_available_layers(max_resolution=model_resolution) else: # Parse the layers given by the user and leave only those available by the model available_layers = get_available_layers(max_resolution=model_resolution) layers = [layer for layer in layers if layer in available_layers] # Make the run dir in the specified output directory desc = f'random-interp-layers_{"-".join(x for x in layers)}' desc = f'{desc}-{description}' if len(description) != 0 else desc run_dir = gen_utils.make_run_dir(outdir, desc) # Number of steps to take between each random image n_steps = int(np.rint(seed_sec * fps)) # Total number of frames num_frames = int(n_steps * (len(seeds) - 1)) # Total video length in seconds duration_sec = num_frames / fps # Number of digits for naming purposes n_digits = int(np.log10(num_frames)) + 1 # Create interpolation of noises random_images = [] for seed in seeds: # Get the starting seed and image image, _ = get_image(seed=seed, image_noise=image_noise, starting_image=starting_image, image_size=model_resolution, convert_to_grayscale=convert_to_grayscale) image = np.array(image) / 255.0 random_images.append(image) random_images = np.stack(random_images) all_images = np.empty([0] + list(random_images.shape[1:]), dtype=np.float32) # Do interpolation for i in range(len(random_images) - 1): # Interpolate between each pair of images interp = gen_utils.interpolate(random_images[i], random_images[i + 1], n_steps, interp_type, smooth) # Append it to the list of all images all_images = np.append(all_images, interp, axis=0) # DeepDream expects a list of PIL.Image objects pil_images = [] for idx in range(len(all_images)): im = (255 * all_images[idx]).astype(dtype=np.uint8) pil_images.append(Image.fromarray(im)) for idx, image in enumerate(tqdm(pil_images, desc='Interpolating...', unit='frame', total=num_frames)): # Extract deep dream image dreamed_image = deep_dream(image, model, model_resolution, layers=layers, channels=channels, seed=None, normed=norm_model_layers, disable_inner_tqdm=True, ignore_initial_transform=True, sqrt_normed=sqrt_norm_model_layers, iterations=iterations, lr=learning_rate, octave_scale=octave_scale, num_octaves=num_octaves, unzoom_octave=unzoom_octave) # Save the resulting image and initial image filename = f'{image_noise}-interpolation_frame_{idx:0{n_digits}d}.jpg' Image.fromarray(dreamed_image, 'RGB').save(os.path.join(run_dir, filename)) # Save the configuration used ctx.obj = { 'network_pkl': network_pkl, 'synthesis_options': { 'seeds': seeds, 'starting_image': starting_image, 'class_idx': class_idx, 'learning_rate': learning_rate, 'iterations': iterations}, 'layer_options': { 'layer': layers, 'channels': channels, 'norm_model_layers': norm_model_layers, 'sqrt_norm_model_layers': sqrt_norm_model_layers}, 'octaves_options': { 'octave_scale': octave_scale, 'num_octaves': num_octaves, 'unzoom_octave': unzoom_octave}, 'extra_parameters': { 'outdir': run_dir, 'description': description} } # Save the run configuration gen_utils.save_config(ctx=ctx, run_dir=run_dir) # Generate video print('Saving video...') ffmpeg_command = r'/usr/bin/ffmpeg' if os.name != 'nt' else r'C:\\Ffmpeg\\bin\\ffmpeg.exe' stream = ffmpeg.input(os.path.join(run_dir, f'{image_noise}-interpolation_frame_%0{n_digits}d.jpg'), framerate=fps) stream = ffmpeg.output(stream, os.path.join(run_dir, f'{image_noise}-interpolation.mp4'), crf=20, pix_fmt='yuv420p') ffmpeg.run(stream, capture_stdout=True, capture_stderr=True, cmd=ffmpeg_command) # ---------------------------------------------------------------------------- if __name__ == '__main__': main() # ----------------------------------------------------------------------------