Spaces:

callum-canavan
/

Multi-View-Illusion-Diffusion

Paused

App Files Files Community

callum-canavan commited on Dec 2, 2023

Commit

954caab

•

1 Parent(s): b4209f3

Add helpers, change to hot dog example

Browse files

Files changed (21) hide show

.gitignore +3 -0
app.py +20 -4
diffuse.py +34 -0
generate.py +79 -0
requirements.txt +165 -0
visual_anagrams/__init__.py +0 -0
visual_anagrams/samplers.py +232 -0
visual_anagrams/utils.py +93 -0
visual_anagrams/views/__init__.py +46 -0
visual_anagrams/views/jigsaw_helpers.py +35 -0
visual_anagrams/views/permutations.py +242 -0
visual_anagrams/views/view_base.py +49 -0
visual_anagrams/views/view_flip.py +30 -0
visual_anagrams/views/view_identity.py +11 -0
visual_anagrams/views/view_inner_circle.py +56 -0
visual_anagrams/views/view_jigsaw.py +222 -0
visual_anagrams/views/view_negate.py +41 -0
visual_anagrams/views/view_patch_permute.py +154 -0
visual_anagrams/views/view_permute.py +91 -0
visual_anagrams/views/view_rotate.py +87 -0
visual_anagrams/views/view_skew.py +55 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+env/
+__pycache__/
+assets/

app.py CHANGED Viewed

@@ -1,9 +1,25 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-iface = gr.Interface(fn=greet, inputs="text", outputs="text")
-iface.launch()

 import gradio as gr
+from transformers import pipeline
+pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
+def predict(input_img):
+    predictions = pipeline(input_img)
+    return input_img, {p["label"]: p["score"] for p in predictions}
+gradio_app = gr.Interface(
+    predict,
+    inputs=gr.Image(
+        label="Select hot dog candidate", sources=["upload", "webcam"], type="pil"
+    ),
+    outputs=[
+        gr.Image(label="Processed Image"),
+        gr.Label(label="Result", num_top_classes=2),
+    ],
+    title="Hot Dog? Or Not?",
+)
+if __name__ == "__main__":
+    gradio_app.launch()

diffuse.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from diffusers import DiffusionPipeline
+from diffusers.utils import pt_to_pil
+import torch
+# stage 1
+stage_1 = DiffusionPipeline.from_pretrained(
+    "DeepFloyd/IF-I-M-v1.0", variant="fp16", torch_dtype=torch.float16
+)
+stage_1.enable_xformers_memory_efficient_attention()  # remove line if torch.__version__ >= 2.0.0
+stage_1.enable_model_cpu_offload()
+# stage 2
+stage_2 = DiffusionPipeline.from_pretrained(
+    "DeepFloyd/IF-II-M-v1.0",
+    text_encoder=None,
+    variant="fp16",
+    torch_dtype=torch.float16,
+)
+stage_2.enable_xformers_memory_efficient_attention()  # remove line if torch.__version__ >= 2.0.0
+stage_2.enable_model_cpu_offload()
+# stage 3
+safety_modules = {
+    "feature_extractor": stage_1.feature_extractor,
+    "safety_checker": stage_1.safety_checker,
+    "watermarker": stage_1.watermarker,
+}
+stage_3 = DiffusionPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-x4-upscaler",
+    **safety_modules,
+    torch_dtype=torch.float16
+)
+stage_3.enable_xformers_memory_efficient_attention()  # remove line if torch.__version__ >= 2.0.0
+stage_3.enable_model_cpu_offload()

generate.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import argparse
+from pathlib import Path
+import torch
+from diffusers import DiffusionPipeline
+from visual_anagrams.views import get_views
+from visual_anagrams.samplers import sample_stage_1, sample_stage_2
+from visual_anagrams.utils import add_args, save_illusion, save_metadata
+# Parse args
+parser = argparse.ArgumentParser()
+parser = add_args(parser)
+args = parser.parse_args()
+# Do admin stuff
+save_dir = Path(args.save_dir) / args.name
+save_dir.mkdir(exist_ok=True, parents=True)
+# Make models
+stage_1 = DiffusionPipeline.from_pretrained(
+                "DeepFloyd/IF-I-M-v1.0",
+                variant="fp16",
+                torch_dtype=torch.float16)
+stage_2 = DiffusionPipeline.from_pretrained(
+                "DeepFloyd/IF-II-M-v1.0",
+                text_encoder=None,
+                variant="fp16",
+                torch_dtype=torch.float16,
+            )
+stage_1.enable_model_cpu_offload()
+stage_2.enable_model_cpu_offload()
+stage_1 = stage_1.to(args.device)
+stage_2 = stage_2.to(args.device)
+# Get prompt embeddings
+prompt_embeds = [stage_1.encode_prompt(f'{args.style} {p}'.strip()) for p in args.prompts]
+prompt_embeds, negative_prompt_embeds = zip(*prompt_embeds)
+prompt_embeds = torch.cat(prompt_embeds)
+negative_prompt_embeds = torch.cat(negative_prompt_embeds)  # These are just null embeds
+# Get views
+views = get_views(args.views)
+# Save metadata
+save_metadata(views, args, save_dir)
+# Sample illusions
+for i in range(args.num_samples):
+    # Admin stuff
+    generator = torch.manual_seed(args.seed + i)
+    sample_dir = save_dir / f'{i:04}'
+    sample_dir.mkdir(exist_ok=True, parents=True)
+    # Sample 64x64 image
+    image = sample_stage_1(stage_1,
+                           prompt_embeds,
+                           negative_prompt_embeds,
+                           views,
+                           num_inference_steps=args.num_inference_steps,
+                           guidance_scale=args.guidance_scale,
+                           reduction=args.reduction,
+                           generator=generator)
+    save_illusion(image, views, sample_dir)
+    # Sample 256x256 image, by upsampling 64x64 image
+    image = sample_stage_2(stage_2,
+                           image,
+                           prompt_embeds,
+                           negative_prompt_embeds,
+                           views,
+                           num_inference_steps=args.num_inference_steps,
+                           guidance_scale=args.guidance_scale,
+                           reduction=args.reduction,
+                           noise_level=args.noise_level,
+                           generator=generator)
+    save_illusion(image, views, sample_dir)

requirements.txt ADDED Viewed

	@@ -0,0 +1,165 @@

+absl-py==1.4.0
+aiohttp==3.8.5
+aiosignal==1.3.1
+annotated-types==0.5.0
+anyio==3.7.1
+argcomplete @ file:///private/tmp/python-argcomplete-20231112-5493-8o8e4p/argcomplete-3.1.6
+arrow==1.2.3
+astroid==2.15.6
+astunparse==1.6.3
+async-timeout==4.0.3
+attrs==23.1.0
+aws-cdk-lib==2.104.0
+aws-cdk.asset-awscli-v1==2.2.201
+aws-cdk.asset-kubectl-v20==2.1.2
+aws-cdk.asset-node-proxy-agent-v6==2.0.1
+backoff==2.2.1
+beautifulsoup4==4.12.2
+black==23.9.1
+blessed==1.20.0
+cachetools==5.3.1
+cattrs==23.1.2
+certifi==2022.12.7
+charset-normalizer==3.1.0
+click==8.1.7
+constructs==10.3.0
+contourpy==1.0.7
+croniter==1.4.1
+cycler==0.11.0
+dataclasses-json==0.6.1
+dateutils==0.6.12
+deepdiff==6.5.0
+diffusers==0.24.0
+dill==0.3.7
+distlib==0.3.6
+easydict==1.10
+fastapi==0.103.1
+filelock==3.9.0
+flatbuffers==23.5.26
+fonttools==4.39.3
+frozenlist==1.4.0
+fsspec==2023.9.0
+gast==0.4.0
+gitdb==4.0.10
+GitPython==3.1.36
+google-auth==2.22.0
+google-auth-oauthlib==1.0.0
+google-pasta==0.2.0
+grpcio==1.57.0
+h11==0.14.0
+h5py==3.9.0
+huggingface-hub==0.19.4
+idna==3.4
+importlib-metadata==6.9.0
+importlib-resources==6.1.0
+iniconfig==2.0.0
+inquirer==3.1.3
+isort==5.12.0
+itsdangerous==2.1.2
+Jinja2==3.1.2
+joblib==1.3.2
+jsii==1.91.0
+jsonpatch==1.33
+jsonpointer==2.4
+keras==2.13.1
+kiwisolver==1.4.4
+langchain==0.0.330
+langsmith==0.0.57
+lazy-object-proxy==1.9.0
+libclang==16.0.6
+lightning==2.0.8
+lightning-cloud==0.5.38
+lightning-utilities==0.9.0
+Markdown==3.4.4
+markdown-it-py==3.0.0
+MarkupSafe==2.1.2
+marshmallow==3.20.1
+matplotlib==3.7.2
+mccabe==0.7.0
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.0.4
+mypy-extensions==1.0.0
+networkx==3.1
+numpy==1.24.2
+oauthlib==3.2.2
+opencv-python==4.7.0.72
+opt-einsum==3.3.0
+ordered-set==4.1.0
+packaging==23.1
+pandas==2.0.3
+pathspec==0.11.2
+Pillow==9.5.0
+platformdirs==3.1.0
+pluggy==1.3.0
+protobuf==4.24.0
+psutil==5.9.5
+publication==0.0.3
+py-cpuinfo==9.0.0
+pyasn1==0.5.0
+pyasn1-modules==0.3.0
+pybind11==2.11.1
+pydantic==2.1.1
+pydantic_core==2.4.0
+Pygments==2.16.1
+PyJWT==2.8.0
+pylint==2.17.5
+pyparsing==3.0.9
+pytest==7.4.2
+python-dateutil==2.8.2
+python-dotenv==1.0.0
+python-editor==1.0.4
+python-multipart==0.0.6
+pytorch-lightning==2.0.8
+pytz==2023.3
+PyYAML==6.0.1
+readchar==4.0.5
+regex==2023.10.3
+requests==2.28.2
+requests-oauthlib==1.3.1
+rich==13.5.2
+rsa==4.9
+safetensors==0.4.1
+scikit-learn==1.3.0
+seaborn==0.12.2
+six==1.16.0
+smmap==5.0.0
+sniffio==1.3.0
+soupsieve==2.5
+SQLAlchemy==2.0.23
+starlette==0.27.0
+starsessions==1.3.0
+sympy==1.11.1
+tenacity==8.2.3
+tensorboard==2.13.0
+tensorboard-data-server==0.7.1
+tensorflow==2.13.0
+tensorflow-estimator==2.13.0
+termcolor==2.3.0
+threadpoolctl==3.2.0
+tokenizers==0.15.0
+tomlkit==0.12.1
+torch==2.0.1
+torchaudio==2.0.2
+torchmetrics==1.1.2
+torchvision==0.15.2
+tqdm==4.65.0
+traitlets==5.10.0
+transformers==4.35.2
+typeguard==2.13.3
+typing-inspect==0.9.0
+typing_extensions==4.6.1
+tzdata==2023.3
+ultralytics==8.0.178
+urllib3==1.26.15
+uvicorn==0.23.2
+virtualenv==20.20.0
+wcwidth==0.2.6
+websocket-client==1.6.3
+websockets==11.0.3
+Werkzeug==2.3.7
+wrapt==1.15.0
+yacs==0.1.8
+yarl==1.9.2
+yolov4==2.0.3
+zipp==3.17.0

visual_anagrams/__init__.py ADDED Viewed

File without changes

visual_anagrams/samplers.py ADDED Viewed

	@@ -0,0 +1,232 @@

+from tqdm import tqdm
+import torch
+import torch.nn.functional as F
+from diffusers.utils.torch_utils import randn_tensor
+@torch.no_grad()
+def sample_stage_1(model,
+                   prompt_embeds,
+                   negative_prompt_embeds,
+                   views,
+                   num_inference_steps=100,
+                   guidance_scale=7.0,
+                   reduction='mean',
+                   generator=None):
+    # Params
+    num_images_per_prompt = 1
+    device = model.device
+    height = model.unet.config.sample_size
+    width = model.unet.config.sample_size
+    batch_size = 1      # TODO: Support larger batch sizes, maybe
+    num_prompts = prompt_embeds.shape[0]
+    assert num_prompts == len(views), \
+        "Number of prompts must match number of views!"
+    # For CFG
+    prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
+    # Setup timesteps
+    model.scheduler.set_timesteps(num_inference_steps, device=device)
+    timesteps = model.scheduler.timesteps
+    # Make intermediate_images
+    noisy_images = model.prepare_intermediate_images(
+        batch_size * num_images_per_prompt,
+        model.unet.config.in_channels,
+        height,
+        width,
+        prompt_embeds.dtype,
+        device,
+        generator,
+    )
+    for i, t in enumerate(tqdm(timesteps)):
+        # Apply views to noisy_image
+        viewed_noisy_images = []
+        for view_fn in views:
+            viewed_noisy_images.append(view_fn.view(noisy_images[0]))
+        viewed_noisy_images = torch.stack(viewed_noisy_images)
+        # Duplicate inputs for CFG
+        # Model input is: [ neg_0, neg_1, ..., pos_0, pos_1, ... ]
+        model_input = torch.cat([viewed_noisy_images] * 2)
+        model_input = model.scheduler.scale_model_input(model_input, t)
+        # Predict noise estimate
+        noise_pred = model.unet(
+            model_input,
+            t,
+            encoder_hidden_states=prompt_embeds,
+            cross_attention_kwargs=None,
+            return_dict=False,
+        )[0]
+        # Extract uncond (neg) and cond noise estimates
+        noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+        # Invert the unconditional (negative) estimates
+        inverted_preds = []
+        for pred, view in zip(noise_pred_uncond, views):
+            inverted_pred = view.inverse_view(pred)
+            inverted_preds.append(inverted_pred)
+        noise_pred_uncond = torch.stack(inverted_preds)
+                                # Invert the conditional estimates
+        inverted_preds = []
+        for pred, view in zip(noise_pred_text, views):
+            inverted_pred = view.inverse_view(pred)
+            inverted_preds.append(inverted_pred)
+        noise_pred_text = torch.stack(inverted_preds)
+        # Split into noise estimate and variance estimates
+        noise_pred_uncond, _ = noise_pred_uncond.split(model_input.shape[1], dim=1)
+        noise_pred_text, predicted_variance = noise_pred_text.split(model_input.shape[1], dim=1)
+        noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
+        # Reduce predicted noise and variances
+        noise_pred = noise_pred.view(-1,num_prompts,3,64,64)
+        predicted_variance = predicted_variance.view(-1,num_prompts,3,64,64)
+        if reduction == 'mean':
+            noise_pred = noise_pred.mean(1)
+            predicted_variance = predicted_variance.mean(1)
+        elif reduction == 'alternate':
+            noise_pred = noise_pred[:,i%num_prompts]
+            predicted_variance = predicted_variance[:,i%num_prompts]
+        else:
+            raise ValueError('Reduction must be either `mean` or `alternate`')
+        noise_pred = torch.cat([noise_pred, predicted_variance], dim=1)
+        # compute the previous noisy sample x_t -> x_t-1
+        noisy_images = model.scheduler.step(
+            noise_pred, t, noisy_images, generator=generator, return_dict=False
+        )[0]
+    # Return denoised images
+    return noisy_images
+@torch.no_grad()
+def sample_stage_2(model,
+                   image,
+                   prompt_embeds,
+                   negative_prompt_embeds,
+                   views,
+                   num_inference_steps=100,
+                   guidance_scale=7.0,
+                   reduction='mean',
+                   noise_level=50,
+                   generator=None):
+    # Params
+    batch_size = 1      # TODO: Support larger batch sizes, maybe
+    num_prompts = prompt_embeds.shape[0]
+    height = model.unet.config.sample_size
+    width = model.unet.config.sample_size
+    device = model.device
+    num_images_per_prompt = 1
+    # For CFG
+    prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
+    # Get timesteps
+    model.scheduler.set_timesteps(num_inference_steps, device=device)
+    timesteps = model.scheduler.timesteps
+    num_channels = model.unet.config.in_channels // 2
+    noisy_images = model.prepare_intermediate_images(
+        batch_size * num_images_per_prompt,
+        num_channels,
+        height,
+        width,
+        prompt_embeds.dtype,
+        device,
+        generator,
+    )
+    # Prepare upscaled image and noise level
+    image = model.preprocess_image(image, num_images_per_prompt, device)
+    upscaled = F.interpolate(image, (height, width), mode="bilinear", align_corners=True)
+    noise_level = torch.tensor([noise_level] * upscaled.shape[0], device=upscaled.device)
+    noise = randn_tensor(upscaled.shape, generator=generator, device=upscaled.device, dtype=upscaled.dtype)
+    upscaled = model.image_noising_scheduler.add_noise(upscaled, noise, timesteps=noise_level)
+    # Condition on noise level, for each model input
+    noise_level = torch.cat([noise_level] * num_prompts * 2)
+    # Denoising Loop
+    for i, t in enumerate(tqdm(timesteps)):
+        # Cat noisy image with upscaled conditioning image
+        model_input = torch.cat([noisy_images, upscaled], dim=1)
+        # Apply views to noisy_image
+        viewed_inputs = []
+        for view_fn in views:
+            viewed_inputs.append(view_fn.view(model_input[0]))
+        viewed_inputs = torch.stack(viewed_inputs)
+        # Duplicate inputs for CFG
+        # Model input is: [ neg_0, neg_1, ..., pos_0, pos_1, ... ]
+        model_input = torch.cat([viewed_inputs] * 2)
+        model_input = model.scheduler.scale_model_input(model_input, t)
+        # predict the noise residual
+        noise_pred = model.unet(
+            model_input,
+            t,
+            encoder_hidden_states=prompt_embeds,
+            class_labels=noise_level,
+            cross_attention_kwargs=None,
+            return_dict=False,
+        )[0]
+        # Extract uncond (neg) and cond noise estimates
+        noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+        # Invert the unconditional (negative) estimates
+        # TODO: pretty sure you can combine these into one loop
+        inverted_preds = []
+        for pred, view in zip(noise_pred_uncond, views):
+            inverted_pred = view.inverse_view(pred)
+            inverted_preds.append(inverted_pred)
+        noise_pred_uncond = torch.stack(inverted_preds)
+        # Invert the conditional estimates
+        inverted_preds = []
+        for pred, view in zip(noise_pred_text, views):
+            inverted_pred = view.inverse_view(pred)
+            inverted_preds.append(inverted_pred)
+        noise_pred_text = torch.stack(inverted_preds)
+        # Split predicted noise and predicted variances
+        noise_pred_uncond, _ = noise_pred_uncond.split(model_input.shape[1] // 2, dim=1)
+        noise_pred_text, predicted_variance = noise_pred_text.split(model_input.shape[1] // 2, dim=1)
+        noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
+        # Combine noise estimates (and variance estimates)
+        noise_pred = noise_pred.view(-1,num_prompts,3,256,256)
+        predicted_variance = predicted_variance.view(-1,num_prompts,3,256,256)
+        if reduction == 'mean':
+            noise_pred = noise_pred.mean(1)
+            predicted_variance = predicted_variance.mean(1)
+        elif reduction == 'alternate':
+            noise_pred = noise_pred[:,i%num_prompts]
+            predicted_variance = predicted_variance[:,i%num_prompts]
+        noise_pred = torch.cat([noise_pred, predicted_variance], dim=1)
+        # compute the previous noisy sample x_t -> x_t-1
+        noisy_images = model.scheduler.step(
+            noise_pred, t, noisy_images, generator=generator, return_dict=False
+        )[0]
+    # Return denoised images
+    return noisy_images

visual_anagrams/utils.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import pickle
+from pathlib import Path
+import torch
+from torchvision.utils import save_image
+def add_args(parser):
+    """
+    Add arguments for sampling to a parser
+    """
+    parser.add_argument("--name", required=True, type=str)
+    parser.add_argument(
+        "--save_dir",
+        type=str,
+        default="results",
+        help="Location to samples and metadata",
+    )
+    parser.add_argument(
+        "--prompts",
+        required=True,
+        type=str,
+        nargs="+",
+        help="Prompts to use, corresponding to each view.",
+    )
+    parser.add_argument(
+        "--views",
+        required=True,
+        type=str,
+        nargs="+",
+        help="Name of views to use. See `get_views` in `views.py`.",
+    )
+    parser.add_argument(
+        "--style", default="", type=str, help="Optional string to prepend prompt with"
+    )
+    parser.add_argument("--num_inference_steps", type=int, default=100)
+    parser.add_argument("--num_samples", type=int, default=100)
+    parser.add_argument("--reduction", type=str, default="mean")
+    parser.add_argument("--seed", type=int, default=0)
+    parser.add_argument("--guidance_scale", type=float, default=7.0)
+    parser.add_argument(
+        "--noise_level", type=int, default=50, help="Noise level for stage 2"
+    )
+    parser.add_argument("--device", type=str, default="cpu")
+    parser.add_argument(
+        "--save_metadata",
+        action="store_true",
+        help="If true, save metadata about the views. May use lots of disk space, particular for permutation views.",
+    )
+    return parser
+def save_illusion(image, views, sample_dir):
+    """
+    Saves the illusion (`image`), as well as all views of the illusion
+    image (torch.tensor) :
+        Tensor of shape (1,3,H,W) representing the image
+    views (views.BaseView) :
+        Represents the view, inherits from BaseView
+    sample_dir (pathlib.Path) :
+        pathlib Path object, representing the directory to save to
+    """
+    size = image.shape[-1]
+    # Save illusion
+    save_image(image / 2.0 + 0.5, sample_dir / f"sample_{size}.png", padding=0)
+    # Save views of the illusion
+    im_views = torch.stack([view.view(image[0]) for view in views])
+    save_image(im_views / 2.0 + 0.5, sample_dir / f"sample_{size}.views.png", padding=0)
+def save_metadata(views, args, save_dir):
+    """
+    Saves the following the sample_dir
+        1) pickled view object
+        2) args for the illusion
+    """
+    metadata = {"views": views, "args": args}
+    with open(save_dir / "metadata.pkl", "wb") as f:
+        pickle.dump(metadata, f)
+def get_courier_font_path():
+    font_path = Path(__file__).parent / "assets" / "CourierPrime-Regular.ttf"
+    return str(font_path)

visual_anagrams/views/__init__.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from pathlib import Path
+from PIL import Image
+import numpy as np
+from .view_identity import IdentityView
+from .view_flip import FlipView
+from .view_rotate import Rotate180View, Rotate90CCWView, Rotate90CWView
+from .view_negate import NegateView
+from .view_skew import SkewView
+from .view_patch_permute import PatchPermuteView
+from .view_jigsaw import JigsawView
+from .view_inner_circle import InnerCircleView
+VIEW_MAP = {
+    'identity': IdentityView,
+    'flip': FlipView,
+    'rotate_cw': Rotate90CWView,
+    'rotate_ccw': Rotate90CCWView,
+    'rotate_180': Rotate180View,
+    'negate': NegateView,
+    'skew': SkewView,
+    'patch_permute': PatchPermuteView,
+    'pixel_permute': PatchPermuteView,
+    'jigsaw': JigsawView,
+    'inner_circle': InnerCircleView,
+}
+def get_views(view_names):
+    '''
+    Bespoke function to get views (just to make command line usage easier)
+    '''
+    views = []
+    for view_name in view_names:
+        if view_name == 'patch_permute':
+            args = [8]
+        elif view_name == 'pixel_permute':
+            args = [64]
+        elif view_name == 'skew':
+            args = [1.5]
+        else:
+            args = []
+        view = VIEW_MAP[view_name](*args)
+        views.append(view)
+    return views

visual_anagrams/views/jigsaw_helpers.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from pathlib import Path
+from PIL import Image
+import numpy as np
+def get_jigsaw_pieces(size):
+    '''
+    Load all pieces of the 4x4 jigsaw puzzle.
+    size (int) :
+        Should be 64 or 256, indicating side length of jigsaw puzzle
+    '''
+    # Location of pieces
+    piece_dir = Path(__file__).parent / 'assets'
+    # Helper function to load pieces as np arrays
+    def load_pieces(path):
+        '''
+        Load a piece, from the given path, as a binary numpy array.
+        Return a list of the "base" piece, and all four of its rotations.
+        '''
+        piece = Image.open(path)
+        piece = np.array(piece)[:,:,0] // 255
+        pieces = np.stack([np.rot90(piece, k=-i) for i in range(4)])
+        return pieces
+    # Load pieces and rotate to get 16 pieces, and cat
+    pieces_corner = load_pieces(piece_dir / f'4x4/4x4_corner_{size}.png')
+    pieces_inner = load_pieces(piece_dir / f'4x4/4x4_inner_{size}.png')
+    pieces_edge1 = load_pieces(piece_dir / f'4x4/4x4_edge1_{size}.png')
+    pieces_edge2 = load_pieces(piece_dir / f'4x4/4x4_edge2_{size}.png')
+    pieces = np.concatenate([pieces_corner, pieces_inner, pieces_edge1, pieces_edge2])
+    return pieces

visual_anagrams/views/permutations.py ADDED Viewed

	@@ -0,0 +1,242 @@

+from pathlib import Path
+import numpy as np
+import torch
+import torchvision.transforms.functional as TF
+from einops import rearrange, repeat
+from .jigsaw_helpers import get_jigsaw_pieces
+def get_inv_perm(perm):
+    '''
+    Get the inverse permutation of a permutation. That is, the array such that
+        perm[perm_inv] = perm_inv[perm] = arange(len(perm))
+    perm (torch.tensor) :
+        A 1-dimensional integer array, representing a permutation. Indicates
+        that element i should move to index perm[i]
+    '''
+    perm_inv = torch.empty_like(perm)
+    perm_inv[perm] = torch.arange(len(perm))
+    return perm_inv
+def make_inner_circle_perm(im_size=64, r=24):
+    '''
+    Makes permutations for "inner circle" view. Given size of image, and
+    `r`, the radius of the circle. We do this by iterating through every
+    pixel and figuring out where it should go.
+    '''
+    perm = []       # Permutation array
+    # Iterate through all positions, in order
+    for iy in range(im_size):
+        for ix in range(im_size):
+            # Get coordinates, with origin at (0, 0)
+            x = ix - im_size // 2 + 0.5
+            y = iy - im_size // 2 + 0.5
+            # Do 180 deg rotation if in circle
+            if x**2 + y**2 < r**2:
+                x = -x
+                y = -y
+            # Convert back to integer coordinates
+            x = int(x + im_size // 2 - 0.5)
+            y = int(y + im_size // 2 - 0.5)
+            # Append destination pixel index to permutation
+            perm.append(x + y * im_size)
+    perm = torch.tensor(perm)
+    return perm
+def make_jigsaw_perm(size, seed=0):
+    '''
+    Returns a permutation of pixels that is a jigsaw permutation
+    There are 3 types of pieces: corner, edge, and inner pieces. These were
+        created in MS Paint. They are all identical and laid out like:
+        c0 e0 f0 c1
+        f3 i0 i1 e1
+        e3 i3 i2 f1
+        c3 f2 e2 c2
+        where c is "corner," i is "inner," and "e" and "f" are "edges."
+        "e" and "f" pieces are identical, but labeled differently such that
+        to move any piece to the next index you can apply a 90 deg rotation.
+    Pieces c0, e0, f0, and i0 are defined by pngs, and will be loaded in. All
+        other pieces are obtained by 90 deg rotations of these "base" pieces.
+    Permutations are defined by:
+        1. permutation of corner (c) pieces (length 4 perm list)
+        2. permutation of inner (i) pieces (length 4 perm list)
+        3. permutation of edge (e) pieces (length 4 perm list)
+        4. permutation of edge (f) pieces (length 4 perm list)
+        5. list of four swaps, indicating swaps between e and f
+                edge pieces along the same edge (length 4 bit list)
+        Note these perm indexes will just be a "rotation index" indicating
+        how many 90 deg rotations to apply to the base pieces. The swaps
+        ensure that any edge piece can go to any edge piece, and are indexed
+        by the indexes of the "e" and "f" pieces on the edge.
+    Also note, order of indexes in permutation array is raster scan order. So,
+        go along x's first, then y's. This means y * size + x gives us the
+        1-D location in the permutation array. And image arrays are in
+        (y,x) order.
+    Plan of attack for making a pixel permutation array that represents
+        a jigsaw permutation:
+        1. Iterate through all pixels (in raster scan order)
+        2. Figure out which puzzle piece it is in initially
+        3. Look at the permutations, and see where it should go
+        4. Additionally, see if it's an edge piece, and needs to be swapped
+        5. Add the new (1-D) index to the permutation array
+    '''
+    np.random.seed(seed)
+    # Get location of puzzle pieces
+    piece_dir = Path(__file__).parent / 'assets'
+    # Get random permutations of groups of 4, and cat
+    identity = np.arange(4)
+    perm_corner = np.random.permutation(identity)
+    perm_inner = np.random.permutation(identity)
+    perm_edge1 = np.random.permutation(identity)
+    perm_edge2 = np.random.permutation(identity)
+    edge_swaps = np.random.randint(2, size=4)
+    piece_perms = np.concatenate([perm_corner, perm_inner, perm_edge1, perm_edge2])
+    # Get all 16 jigsaw pieces (in the order above)
+    pieces = get_jigsaw_pieces(size)
+    # Make permutation array to fill
+    perm = []
+    # For each pixel, figure out where it should go
+    for y in range(size):
+        for x in range(size):
+            # Figure out which piece (x,y) is in:
+            piece_idx = pieces[:,y,x].argmax()
+            # Figure out how many 90 deg rotations are on the piece
+            rot_idx = piece_idx % 4
+            # The perms tells us how many 90 deg rotations to apply to
+            # arrive at new pixel location
+            dest_rot_idx = piece_perms[piece_idx]
+            angle = (dest_rot_idx - rot_idx) * 90 / 180 * np.pi
+            # Center coordinates on origin
+            cx = x - (size - 1) / 2.
+            cy = y - (size - 1) / 2.
+            # Perform rotation
+            nx = np.cos(angle) * cx - np.sin(angle) * cy
+            ny = np.sin(angle) * cx + np.cos(angle) * cy
+            # Translate back and round coordinates to _nearest_ integer
+            nx = nx + (size - 1) / 2.
+            ny = ny + (size - 1) / 2.
+            nx = int(np.rint(nx))
+            ny = int(np.rint(ny))
+            # Perform swap if piece is an edge, and swap == 1 at NEW location
+            new_piece_idx = pieces[:,ny,nx].argmax()
+            edge_idx = new_piece_idx % 4
+            if new_piece_idx >= 8 and edge_swaps[edge_idx] == 1:
+                is_f_edge = (new_piece_idx - 8) // 4    # 1 if f, 0 if e edge
+                edge_type_parity = 1 - 2 * is_f_edge
+                rotation_parity = 1 - 2 * (edge_idx // 2)
+                swap_dist = size // 4
+                # if edge_idx is even, swap in x direction, else y
+                if edge_idx % 2 == 0:
+                    nx = nx + swap_dist * edge_type_parity * rotation_parity
+                else:
+                    ny = ny + swap_dist * edge_type_parity * rotation_parity
+            # append new index to permutation array
+            new_idx = int(ny * size + nx)
+            perm.append(new_idx)
+    # sanity check
+    #import matplotlib.pyplot as plt
+    #missing = sorted(set(range(size*size)).difference(set(perm)))
+    #asdf = np.zeros(size*size)
+    #asdf[missing] = 1
+    #plt.imshow(asdf.reshape(size,size))
+    #plt.savefig('tmp.png')
+    #plt.show()
+    #print(np.sum(asdf))
+    #viz = np.zeros((64,64))
+    #for idx in perm:
+    #    y, x = idx // 64, idx % 64
+    #    viz[y,x] = 1
+    #plt.imshow(viz)
+    #plt.savefig('tmp.png')
+    #Image.fromarray(viz * 255).convert('RGB').save('tmp.png')
+    #Image.fromarray(pieces_edge1[0] * 255).convert('RGB').save('tmp.png')
+    # sanity check on test image
+    #im = Image.open('results/flip.campfire.man/0000/sample_64.png')
+    #im = Image.open('results/flip.campfire.man/0000/sample_256.png')
+    #im = np.array(im)
+    #Image.fromarray(im.reshape(-1, 3)[perm].reshape(size,size,3)).save('test.png')
+    return torch.tensor(perm), (piece_perms, edge_swaps)
+#for i in range(100):
+    #make_jigsaw_perm(64, seed=i)
+#make_jigsaw_perm(256, seed=11)
+def recover_patch_permute(im_0, im_1, patch_size):
+    '''
+    Given two views of a patch permutation illusion, recover the patch
+    permutation used.
+    im_0 (PIL.Image) :
+        Identity view of the illusion
+    im_1 (PIL.Image) :
+        Patch permuted view of the illusion
+    patch_size (int) :
+        Size of the patches in the image
+    '''
+    # Convert to tensors
+    im_0 = TF.to_tensor(im_0)
+    im_1 = TF.to_tensor(im_1)
+    # Extract patches
+    patches_0 = rearrange(im_0,
+                          'c (h p1) (w p2) -> (h w) c p1 p2',
+                          p1=patch_size,
+                          p2=patch_size)
+    patches_1 = rearrange(im_1,
+                          'c (h p1) (w p2) -> (h w) c p1 p2',
+                          p1=patch_size,
+                          p2=patch_size)
+    # Repeat patches_1 for each patch in patches_0
+    patches_1_repeated = repeat(patches_1,
+                                'np c p1 p2 -> np1 np c p1 p2',
+                                np=patches_1.shape[0],
+                                np1=patches_1.shape[0],
+                                p1=patch_size,
+                                p2=patch_size)
+    # Find closest patch in other image by L1 dist, and return indexes
+    perm = (patches_1_repeated - patches_0[:,None]).abs().sum((2,3,4)).argmin(1)
+    return perm

visual_anagrams/views/view_base.py ADDED Viewed

	@@ -0,0 +1,49 @@

+class BaseView:
+    '''
+    BaseView class, from which all views inherit. Implements the
+        following functions:
+    '''
+    def __init__(self):
+        pass
+    def view(self, im):
+        '''
+        Apply transform to an image.
+        im (`torch.tensor`):
+            For stage 1: Tensor of shape (3, H, W) representing a noisy image
+            OR
+            For stage 2: Tensor of shape (6, H, W) representing a noisy image
+            concatenated with an upsampled conditioning image from stage 1
+        '''
+        raise NotImplementedError()
+    def inverse_view(self, noise):
+        '''
+        Apply inverse transform to noise estimates.
+            Because DeepFloyd estimates the variance in addition to
+            the noise, this function must apply the inverse to the
+            variance as well.
+        im (`torch.tensor`):
+            Tensor of shape (6, H, W) representing the noise estimate
+            (first three channel dims) and variacne estimates (last
+            three channel dims)
+        '''
+        raise NotImplementedError()
+    def make_frame(self, im, t):
+        '''
+        Make a frame, transitioning linearly from the identity view (t=0)
+            to this view (t=1)
+        im (`PIL.Image`) :
+            A PIL Image of the illusion
+        t (float) :
+            A float in [0,1] indicating time in the animation. Should start
+            at the identity view at t=0, and continuously transition to the
+            view at t=1.
+        '''
+        raise NotImplementedError()

visual_anagrams/views/view_flip.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from PIL import Image
+import torch
+from .view_base import BaseView
+class FlipView(BaseView):
+    def __init__(self):
+        pass
+    def view(self, im):
+        return torch.flip(im, [1])
+    def inverse_view(self, noise):
+        return torch.flip(noise, [1])
+    def make_frame(self, im, t):
+        im_size = im.size[0]
+        frame_size = int(im_size * 1.5)
+        theta = t * 180
+        # TODO: Technically not a flip, change this to a homography later
+        frame = Image.new('RGB', (frame_size, frame_size), (255, 255, 255))
+        frame.paste(im, ((frame_size - im_size) // 2, (frame_size - im_size) // 2))
+        frame = frame.rotate(theta,
+                             resample=Image.Resampling.BILINEAR,
+                             expand=False,
+                             fillcolor=(255,255,255))
+        return frame

visual_anagrams/views/view_identity.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from .view_base import BaseView
+class IdentityView(BaseView):
+    def __init__(self):
+        pass
+    def view(self, im):
+        return im
+    def inverse_view(self, noise):
+        return noise

visual_anagrams/views/view_inner_circle.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from PIL import Image
+import numpy as np
+import torch
+import torchvision.transforms.functional as TF
+from .permutations import make_inner_circle_perm
+from .view_permute import PermuteView
+class InnerCircleView(PermuteView):
+    '''
+    Implements an "inner circle" view, where a circle inside the image spins
+    but the border stays still. Inherits from `PermuteView`, which implements
+    the `view` and `inverse_view` functions as permutations. We just make
+    the correct permutation here, and implement the `make_frame` method
+    for animation
+    '''
+    def __init__(self):
+        '''
+        Make the correct "inner circle" permutations and pass it to the
+        parent class constructor.
+        '''
+        self.perm_64 = make_inner_circle_perm(im_size=64, r=24)
+        self.perm_256 = make_inner_circle_perm(im_size=256, r=96)
+        super().__init__(self.perm_64, self.perm_256)
+    def make_frame(self, im, t):
+        im_size = im.size[0]
+        frame_size = int(im_size * 1.5)
+        theta = -t * 180
+        # Convert to tensor
+        im = torch.tensor(np.array(im) / 255.).permute(2,0,1)
+        # Get mask of circle (TODO: assuming size 256)
+        coords = torch.arange(0, 256) - 127.5
+        xx, yy = torch.meshgrid(coords, coords)
+        mask = xx**2 + yy**2 < (24*4)**2
+        mask = torch.stack([mask]*3).float()
+        # Get rotate image
+        im_rotated = TF.rotate(im, theta)
+        # Composite rotated circle + border together
+        im = im * (1 - mask) + im_rotated * mask
+        # Convert back to PIL
+        im = Image.fromarray((np.array(im.permute(1,2,0)) * 255.).astype(np.uint8))
+        # Paste on to canvas
+        frame = Image.new('RGB', (frame_size, frame_size), (255, 255, 255))
+        frame.paste(im, ((frame_size - im_size) // 2, (frame_size - im_size) // 2))
+        return frame

visual_anagrams/views/view_jigsaw.py ADDED Viewed

	@@ -0,0 +1,222 @@

+import numpy as np
+from PIL import Image
+import torch
+from einops import einsum, rearrange
+from .permutations import make_jigsaw_perm, get_inv_perm
+from .view_permute import PermuteView
+from .jigsaw_helpers import get_jigsaw_pieces
+class JigsawView(PermuteView):
+    '''
+    Implements a 4x4 jigsaw puzzle view...
+    '''
+    def __init__(self, seed=11):
+        '''
+        '''
+        # Get pixel permutations, corresponding to jigsaw permutations
+        self.perm_64, _ = make_jigsaw_perm(64, seed=seed)
+        self.perm_256, (jigsaw_perm) = make_jigsaw_perm(256, seed=seed)
+        # keep track of jigsaw permutation as well
+        self.piece_perms, self.edge_swaps = jigsaw_perm
+        # Init parent PermuteView, with above pixel perms
+        super().__init__(self.perm_64, self.perm_256)
+    def extract_pieces(self, im):
+        '''
+        Given an image, extract jigsaw puzzle pieces from it
+        im (PIL.Image) :
+            PIL Image of the jigsaw illusion
+        '''
+        im = np.array(im)
+        size = im.shape[0]
+        pieces = []
+        # Get jigsaw pieces
+        piece_masks = get_jigsaw_pieces(size)
+        # Save pieces
+        for piece_mask in piece_masks:
+            # Add mask as alpha mask to image
+            im_piece = np.concatenate([im, piece_mask[:,:,None] * 255], axis=2)
+            # Get extents of piece, and crop
+            x_min = np.nonzero(im_piece[:,:,-1].sum(0))[0].min()
+            x_max = np.nonzero(im_piece[:,:,-1].sum(0))[0].max()
+            y_min = np.nonzero(im_piece[:,:,-1].sum(1))[0].min()
+            y_max = np.nonzero(im_piece[:,:,-1].sum(1))[0].max()
+            im_piece = im_piece[y_min:y_max+1, x_min:x_max+1]
+            pieces.append(Image.fromarray(im_piece))
+        return pieces
+    def paste_piece(self, piece, x, y, theta, xc, yc, canvas_size=384):
+        '''
+        Given a PIL Image of a piece, place it so that it's center is at
+            (x,y) and it's rotate about that center at theta degrees
+        x (float) : x coordinate to place piece at
+        y (float) : y coordinate to place piece at
+        theta (float) : degrees to rotate piece about center
+        xc (float) : x coordinate of center of piece
+        yc (float) : y coordinate of center of piece
+        '''
+        # Make canvas
+        canvas = Image.new("RGBA",
+                           (canvas_size, canvas_size),
+                           (255, 255, 255, 0))
+        # Past piece so center is at (x, y)
+        canvas.paste(piece, (x-xc,y-yc), piece)
+        # Rotate about (x, y)
+        canvas = canvas.rotate(theta, resample=Image.BILINEAR, center=(x, y))
+        return canvas
+    def make_frame(self, im, t, canvas_size=384, knot_seed=0):
+        '''
+        This function returns a PIL image of a frame animating a jigsaw
+            permutation. Pieces move and rotate from the identity view
+            (t = 0) to the rearranged view (t = 1) along splines.
+        The approach is as follows:
+            1. Extract all 16 pieces
+            2. Figure out start locations for each of these pieces (t=0)
+            3. Figure out how these pieces permute
+            4. Using these permutations, figure out end locations (t=1)
+            5. Make knots for splines, randomly offset normally from the
+                    midpoint of the start and end locations
+            6. Paste pieces into correct locations, determined by
+                    spline interpolation
+        im (PIL.Image) :
+            PIL image representing the jigsaw illusion
+        t (float) :
+            Interpolation parameter in [0,1] indicating what frame of the
+            animation to generate
+        canvas_size (int) :
+            Side length of the frame
+        knot_seed (int) :
+            Seed for random offsets for the knots
+        '''
+        im_size = im.size[0]
+        # Extract 16 jigsaw pieces
+        pieces = self.extract_pieces(im)
+        # Rotate all pieces to "base" piece orientation
+        pieces = [p.rotate(90 * (i % 4),
+                           resample=Image.BILINEAR,
+                           expand=1) for i, p in enumerate(pieces)]
+        # Get (hardcoded) start locations for each base piece, on a
+        # 4x4 grid centered on the origin.
+        corner_start_loc = np.array([-1.5, -1.5])
+        inner_start_loc = np.array([-0.5, -0.5])
+        edge_e_start_loc = np.array([-1.5, -0.5])
+        edge_f_start_loc = np.array([-1.5, 0.5])
+        base_start_locs = np.stack([corner_start_loc,
+                                    inner_start_loc,
+                                    edge_e_start_loc,
+                                    edge_f_start_loc])
+        # Construct all start locations by rotating around (0,0)
+        # by 90 degrees, 4 times, and concatenating the results
+        rot_mats = []
+        for theta in -np.arange(4) * 90 / 180 * np.pi:
+            rot_mat = np.array([[np.cos(theta), -np.sin(theta)],
+                                [np.sin(theta), np.cos(theta)]])
+            rot_mats.append(rot_mat)
+        rot_mats = np.stack(rot_mats)
+        start_locs = einsum(base_start_locs, rot_mats,
+                                'start i, rot j i -> start rot j')
+        start_locs = rearrange(start_locs,
+                               'start rot j -> (start rot) j')
+        # Add rotation information to start locations
+        thetas = np.tile(np.arange(4) * -90, 4)[:, None]
+        start_locs = np.concatenate([start_locs, thetas], axis=1)
+        # Get explicit permutation of pieces from permutation metadata
+        perm = self.piece_perms + np.repeat(np.arange(4), 4) * 4
+        for edge_idx, to_swap in enumerate(self.edge_swaps):
+            if to_swap:
+                # Make swap permutation array
+                swap_perm = np.arange(16)
+                swap_perm[8 + edge_idx], swap_perm[12 + edge_idx] = \
+                    swap_perm[12 + edge_idx], swap_perm[8 + edge_idx]
+                # Apply swap permutation after perm
+                perm = np.array([swap_perm[perm[i]] for i in range(16)])
+        # Get inverse perm (the actual permutation needed)...
+        perm_inv = get_inv_perm(torch.tensor(perm))
+        # ...and use it to get the final locations of pieces
+        end_locs = start_locs[perm_inv]
+        # Convert start and end locations to pixel coordinate system
+        start_locs[:,:2] = (start_locs[:,:2] + 2) * 64
+        end_locs[:,:2] = (end_locs[:,:2] + 2) * 64
+        # Add offset so pieces are centered on canvas
+        start_locs[:,:2] = start_locs[:,:2] + (canvas_size - im_size) // 2
+        end_locs[:,:2] = end_locs[:,:2] + (canvas_size - im_size) // 2
+        # Get random offsets from middle for spline knot (so path is pretty)
+        # Wrapped in a set seed
+        original_state = np.random.get_state()
+        np.random.seed(knot_seed)
+        rand_offsets = np.random.rand(16, 1) * 2 - 1
+        rand_offsets = rand_offsets * 2
+        eps = np.random.randn(16, 2)    # Add epsilon for divide by zero
+        np.random.set_state(original_state)
+        # Make spline knots by taking average of start and end,
+        # and offsetting by some amount normal from the line
+        avg_locs = (start_locs[:, :2] + end_locs[:, :2]) / 2.
+        norm = (end_locs[:, :2] - start_locs[:, :2])
+        norm = norm + eps
+        norm = norm / np.linalg.norm(norm, axis=1, keepdims=True)
+        rot_mat = np.array([[0,1], [-1,0]])
+        norm = norm @ rot_mat
+        rand_offsets = rand_offsets * (im_size / 4)
+        knot_locs = avg_locs + norm * rand_offsets
+        # Paste pieces on to a canvas
+        canvas = Image.new("RGBA", (canvas_size, canvas_size), (255,255,255,255))
+        for i in range(16):
+            # Get start and end coords
+            y_0, x_0, theta_0 = start_locs[i]
+            y_1, x_1, theta_1 = end_locs[i]
+            y_k, x_k = knot_locs[i]
+            # Take spline interpolation for x and y
+            x_int_0 = x_0 * (1-t) + x_k * t
+            y_int_0 = y_0 * (1-t) + y_k * t
+            x_int_1 = x_k * (1-t) + x_1 * t
+            y_int_1 = y_k * (1-t) + y_1 * t
+            x = int(np.round(x_int_0 * (1-t) + x_int_1 * t))
+            y = int(np.round(y_int_0 * (1-t) + y_int_1 * t))
+            # Just take normal interpolation for theta
+            theta = int(np.round(theta_0 * (1-t) + theta_1 * t))
+            # Get piece in location and rotation
+            xc = yc = im_size // 4 // 2
+            pasted_piece = self.paste_piece(pieces[i], x, y, theta, xc, yc)
+            canvas.paste(pasted_piece, (0,0), pasted_piece)
+        return canvas

visual_anagrams/views/view_negate.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from PIL import Image
+import numpy as np
+import torch
+from .view_base import BaseView
+class NegateView(BaseView):
+    def __init__(self):
+        pass
+    def view(self, im):
+        return -im
+    def inverse_view(self, noise):
+        '''
+        Negating the variance estimate is "weird" so just don't do it.
+            This hack seems to work just fine
+        '''
+        invert_mask = torch.ones_like(noise)
+        invert_mask[:3] = -1
+        return noise * invert_mask
+    def make_frame(self, im, t):
+        im_size = im.size[0]
+        frame_size = int(im_size * 1.5)
+        # map t from [0, 1] -> [1, -1]
+        t = 1 - t
+        t = t * 2 - 1
+        # Interpolate from pixels from [0, 1] to [1, 0]
+        im = np.array(im) / 255.
+        im = ((2 * im - 1) * t + 1) / 2.
+        im = Image.fromarray((im * 255.).astype(np.uint8))
+        # Paste on to canvas
+        frame = Image.new('RGB', (frame_size, frame_size), (255, 255, 255))
+        frame.paste(im, ((frame_size - im_size) // 2, (frame_size - im_size) // 2))
+        return frame

visual_anagrams/views/view_patch_permute.py ADDED Viewed

	@@ -0,0 +1,154 @@

+from PIL import Image
+import numpy as np
+import torch
+import torch.nn.functional as F
+import torchvision.transforms.functional as TF
+from einops import rearrange
+from .permutations import get_inv_perm
+from .view_base import BaseView
+class PatchPermuteView(BaseView):
+    def __init__(self, num_patches=8):
+        '''
+        Implements random patch permutations, with `num_patches`
+            patches per side
+        num_patches (int) :
+            Number of patches in one dimension. Total number
+            of patches will be num_patches**2. Should be a power of 2.
+        '''
+        assert 64 % num_patches == 0 and 256 % num_patches == 0, \
+            "`num_patches` must divide image side lengths of 64 and 256"
+        self.num_patches = num_patches
+        # Get random permutation and inverse permutation
+        self.perm = torch.randperm(self.num_patches**2)
+        self.perm_inv = get_inv_perm(self.perm)
+    def view(self, im):
+        im_size = im.shape[-1]
+        # Get number of pixels on one side of a patch
+        patch_size = int(im_size / self.num_patches)
+        # Reshape into patches of size (c, patch_size, patch_size)
+        patches = rearrange(im,
+                            'c (h p1) (w p2) -> (h w) c p1 p2',
+                            p1=patch_size,
+                            p2=patch_size)
+        # Permute
+        patches = patches[self.perm]
+        # Reshape back into image
+        im_rearr = rearrange(patches,
+                             '(h w) c p1 p2 -> c (h p1) (w p2)',
+                             h=self.num_patches,
+                             w=self.num_patches,
+                             p1=patch_size,
+                             p2=patch_size)
+        return im_rearr
+    def inverse_view(self, noise):
+        im_size = noise.shape[-1]
+        # Get number of pixels on one side of a patch
+        patch_size = int(im_size / self.num_patches)
+        # Reshape into patches of size (c, patch_size, patch_size)
+        patches = rearrange(noise,
+                            'c (h p1) (w p2) -> (h w) c p1 p2',
+                            p1=patch_size,
+                            p2=patch_size)
+        # Apply inverse permutation
+        patches = patches[self.perm_inv]
+        # Reshape back into image
+        im_rearr = rearrange(patches,
+                             '(h w) c p1 p2 -> c (h p1) (w p2)',
+                             h=self.num_patches,
+                             w=self.num_patches,
+                             p1=patch_size,
+                             p2=patch_size)
+        return im_rearr
+    def make_frame(self, im, t, canvas_size=384, scale=4, knot_seed=0):
+        '''
+        Scale is a hack, because PIL for some reason doesn't support pasting
+            at floating point coordinates. So just render at larger scale
+            and resize by 1/scale
+        '''
+        # Get useful info
+        im_size = im.size[0]
+        offset = (canvas_size - im_size) // 2  # offset to center animation
+        canvas_size = canvas_size * scale
+        offset = offset * scale
+        im = TF.to_tensor(im)
+        # Get number of pixels on one side of a patch
+        im_size = im.shape[-1]
+        patch_size = int(im_size / self.num_patches)
+        # Extract patches
+        patches = rearrange(im,
+                            'c (h p1) (w p2) -> (h w) c p1 p2',
+                            p1=patch_size,
+                            p2=patch_size)
+        # Get start locations (top left corner of patch)
+        yy, xx = torch.meshgrid(
+                        torch.arange(self.num_patches),
+                        torch.arange(self.num_patches)
+                    )
+        xx = xx.flatten()
+        yy = yy.flatten()
+        start_locs = torch.stack([xx, yy], dim=1) * patch_size * scale
+        start_locs = start_locs + offset
+        # Get end locations by permuting
+        end_locs = start_locs[self.perm]
+        # Get random anchor locations
+        original_state = np.random.get_state()
+        np.random.seed(knot_seed)
+        rand_offsets = np.random.rand(self.num_patches**2, 1) * 2 - 1
+        rand_offsets = rand_offsets * 2 * scale
+        eps = np.random.randn(*start_locs.shape)    # Add epsilon for divide by zero
+        np.random.set_state(original_state)
+        # Make spline knots by taking average of start and end,
+        # and offsetting by some amount normal from the line
+        avg_locs = (start_locs + end_locs) / 2.
+        norm = (end_locs - start_locs)
+        norm = norm + eps
+        norm = norm / np.linalg.norm(norm, axis=1, keepdims=True)
+        rot_mat = np.array([[0,1], [-1,0]])
+        norm = norm @ rot_mat
+        rand_offsets = rand_offsets * (im_size / 4)
+        knot_locs = avg_locs + norm * rand_offsets
+        # Get paste locations
+        spline_0 = start_locs * (1 - t) + knot_locs * t
+        spline_1 = knot_locs * (1 - t) + end_locs * t
+        paste_locs = spline_0 * (1 - t) + spline_1 * t
+        paste_locs = paste_locs.to(int)
+        # Paste patches onto canvas
+        canvas = Image.new("RGBA", (canvas_size, canvas_size), (255,255,255,255))
+        for patch, paste_loc in zip(patches, paste_locs):
+            patch = TF.to_pil_image(patch).convert('RGBA')
+            patch = patch.resize((patch_size * scale, patch_size * scale))
+            paste_loc = (paste_loc[0].item(), paste_loc[1].item())
+            canvas.paste(patch, paste_loc, patch)
+        if scale != 1.0:
+            canvas = canvas.resize((canvas_size // scale, canvas_size // scale))
+        return canvas

visual_anagrams/views/view_permute.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import torch
+from einops import rearrange
+from .permutations import get_inv_perm
+from .view_base import BaseView
+class PermuteView(BaseView):
+    def __init__(self, perm_64, perm_256):
+        '''
+        Implements arbitrary pixel permutations, for a given permutation.
+            We need two permutations. One of size 64x64 for stage 1, and
+            one of size 256x256 for stage 2.
+        perm_64 (torch.tensor) :
+            Tensor of integer indexes, defining a permutation, of size 64*64
+        perm_256 (torch.tensor) :
+            Tensor of integer indexes, defining a permutation, of size 256*256
+        '''
+        assert perm_64.shape == torch.Size([64*64]), \
+            "`perm_64` must be a permutation tensor of size 64*64"
+        assert perm_256.shape == torch.Size([256*256]), \
+            "`perm_256` must be a permutation tensor of size 256*256"
+        # Get random permutation and inverse permutation for stage 1
+        self.perm_64 = perm_64
+        self.perm_64_inv = get_inv_perm(self.perm_64)
+        # Get random permutation and inverse permutation for stage 2
+        self.perm_256 = perm_256
+        self.perm_256_inv = get_inv_perm(self.perm_256)
+    def view(self, im):
+        im_size = im.shape[-1]
+        perm = self.perm_64 if im_size == 64 else self.perm_256
+        num_patches = im_size
+        # Permute every pixel in the image
+        patch_size = 1
+        # Reshape into patches of size (c, patch_size, patch_size)
+        patches = rearrange(im,
+                            'c (h p1) (w p2) -> (h w) c p1 p2',
+                            p1=patch_size,
+                            p2=patch_size)
+        # Permute
+        patches = patches[perm]
+        # Reshape back into image
+        im_rearr = rearrange(patches,
+                             '(h w) c p1 p2 -> c (h p1) (w p2)',
+                             h=num_patches,
+                             w=num_patches,
+                             p1=patch_size,
+                             p2=patch_size)
+        return im_rearr
+    def inverse_view(self, noise):
+        im_size = noise.shape[-1]
+        perm_inv = self.perm_64_inv if im_size == 64 else self.perm_256_inv
+        num_patches = im_size
+        # Permute every pixel in the image
+        patch_size = 1
+        # Reshape into patches of size (c, patch_size, patch_size)
+        patches = rearrange(noise,
+                            'c (h p1) (w p2) -> (h w) c p1 p2',
+                            p1=patch_size,
+                            p2=patch_size)
+        # Apply inverse permutation
+        patches = patches[perm_inv]
+        # Reshape back into image
+        im_rearr = rearrange(patches,
+                             '(h w) c p1 p2 -> c (h p1) (w p2)',
+                             h=num_patches,
+                             w=num_patches,
+                             p1=patch_size,
+                             p2=patch_size)
+        return im_rearr
+    def make_frame(self, im, t):
+        # TODO: Implement this, as just moving pixels around
+        raise NotImplementedError()

visual_anagrams/views/view_rotate.py ADDED Viewed

	@@ -0,0 +1,87 @@

+from PIL import Image
+import torchvision.transforms.functional as TF
+from torchvision.transforms import InterpolationMode
+from .view_base import BaseView
+class Rotate90CWView(BaseView):
+    def __init__(self):
+        pass
+    def view(self, im):
+        # TODO: Is nearest-exact better?
+        return TF.rotate(im, -90, interpolation=InterpolationMode.NEAREST)
+    def inverse_view(self, noise):
+        return TF.rotate(noise, 90, interpolation=InterpolationMode.NEAREST)
+    def make_frame(self, im, t):
+        im_size = im.size[0]
+        frame_size = int(im_size * 1.5)
+        theta = t * -90
+        frame = Image.new('RGB', (frame_size, frame_size), (255, 255, 255))
+        centered_loc = (frame_size - im_size) // 2
+        frame.paste(im, (centered_loc, centered_loc))
+        frame = frame.rotate(theta,
+                             resample=Image.Resampling.BILINEAR,
+                             expand=False,
+                             fillcolor=(255,255,255))
+        return frame
+class Rotate90CCWView(BaseView):
+    def __init__(self):
+        pass
+    def view(self, im):
+        # TODO: Is nearest-exact better?
+        return TF.rotate(im, 90, interpolation=InterpolationMode.NEAREST)
+    def inverse_view(self, noise):
+        return TF.rotate(noise, -90, interpolation=InterpolationMode.NEAREST)
+    def make_frame(self, im, t):
+        im_size = im.size[0]
+        frame_size = int(im_size * 1.5)
+        theta = t * 90
+        frame = Image.new('RGB', (frame_size, frame_size), (255, 255, 255))
+        centered_loc = (frame_size - im_size) // 2
+        frame.paste(im, (centered_loc, centered_loc))
+        frame = frame.rotate(theta,
+                             resample=Image.Resampling.BILINEAR,
+                             expand=False,
+                             fillcolor=(255,255,255))
+        return frame
+class Rotate180View(BaseView):
+    def __init__(self):
+        pass
+    def view(self, im):
+        # TODO: Is nearest-exact better?
+        return TF.rotate(im, 180, interpolation=InterpolationMode.NEAREST)
+    def inverse_view(self, noise):
+        return TF.rotate(noise, -180, interpolation=InterpolationMode.NEAREST)
+    def make_frame(self, im, t):
+        im_size = im.size[0]
+        frame_size = int(im_size * 1.5)
+        theta = t * 180
+        frame = Image.new('RGB', (frame_size, frame_size), (255, 255, 255))
+        centered_loc = (frame_size - im_size) // 2
+        frame.paste(im, (centered_loc, centered_loc))
+        frame = frame.rotate(theta,
+                             resample=Image.Resampling.BILINEAR,
+                             expand=False,
+                             fillcolor=(255,255,255))
+        return frame

visual_anagrams/views/view_skew.py ADDED Viewed

	@@ -0,0 +1,55 @@

+from PIL import Image
+import numpy as np
+import torch
+from .view_base import BaseView
+class SkewView(BaseView):
+    def __init__(self, skew_factor=1.5):
+        self.skew_factor = skew_factor
+    def skew_image(self, im, skew_factor):
+        '''
+        Roll each column of the image by increasing displacements.
+            This is a permutation of pixels
+        '''
+        # Params
+        c,h,w = im.shape
+        h_center = h//2
+        # Roll columns
+        cols = []
+        for i in range(w):
+            d = int(skew_factor * (i - h_center))  # Displacement
+            col = im[:,:,i]
+            cols.append(col.roll(d, dims=1))
+        # Stack rolled columns
+        skewed = torch.stack(cols, dim=2)
+        return skewed
+    def view(self, im):
+        return self.skew_image(im, self.skew_factor)
+    def inverse_view(self, noise):
+        return self.skew_image(noise, -self.skew_factor)
+    def make_frame(self, im, t):
+        im_size = im.size[0]
+        frame_size = int(im_size * 1.5)
+        skew_factor = t * self.skew_factor
+        # Convert to tensor, skew, then convert back to PIL
+        im = torch.tensor(np.array(im) / 255.).permute(2,0,1)
+        im = self.skew_image(im, skew_factor)
+        im = Image.fromarray((np.array(im.permute(1,2,0)) * 255.).astype(np.uint8))
+        # Paste on to canvas
+        frame = Image.new('RGB', (frame_size, frame_size), (255, 255, 255))
+        frame.paste(im, ((frame_size - im_size) // 2, (frame_size - im_size) // 2))
+        return frame