metadata
license: creativeml-openrail-m
tags:
- stable-diffusion
- stable-diffusion-diffusers
- text-to-image
- multires_noise
inference: true
A model trained with Pyramid Noise - see https://wandb.ai/johnowhitaker/multires_noise/reports/Multi-Resolution-Noise-for-Diffusion-Model-Training--VmlldzozNjYyOTU2 for details
from torch import nn
import random
def pyramid_noise_like(x, discount=0.8):
b, c, w, h = x.shape
u = nn.Upsample(size=(w, h), mode='bilinear')
noise = torch.randn_like(x)
for i in range(6):
r = random.random()*2+2 # Rather than always going 2x,
w, h = max(1, int(w/(r**i))), max(1, int(h/(r**i)))
noise += u(torch.randn(b, c, w, h).to(x)) * discount**i
if w==1 or h==1: break
return noise / noise.std() # Scale back to unit variance
To use the mode for inference, just load it like a normal stable diffusion pipeline:
from diffusers import StableDiffusionPipeline
model_path = "johnowhitaker/pyramid_noise_test_600steps_08discount"
pipe = StableDiffusionPipeline.from_pretrained(model_path, torch_dtype=torch.float16)
pipe.to("cuda")
image = pipe(prompt="A black image").images[0]
image