Spaces:
Runtime error
Runtime error
# Adapted from https://github.com/MCG-NJU/EMA-VFI/blob/main/model/loss.py | |
import torch | |
import torch.nn as nn | |
import numpy as np | |
import torch.nn.functional as F | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
def gauss_kernel(channels=3): | |
kernel = torch.tensor([[1., 4., 6., 4., 1], | |
[4., 16., 24., 16., 4.], | |
[6., 24., 36., 24., 6.], | |
[4., 16., 24., 16., 4.], | |
[1., 4., 6., 4., 1.]]) | |
kernel /= 256. | |
kernel = kernel.repeat(channels, 1, 1, 1) | |
kernel = kernel.to(device) | |
return kernel | |
def downsample(x): | |
return x[:, :, ::2, ::2] | |
def upsample(x): | |
cc = torch.cat([x, torch.zeros(x.shape[0], x.shape[1], x.shape[2], x.shape[3]).to(device)], dim=3) | |
cc = cc.view(x.shape[0], x.shape[1], x.shape[2]*2, x.shape[3]) | |
cc = cc.permute(0,1,3,2) | |
cc = torch.cat([cc, torch.zeros(x.shape[0], x.shape[1], x.shape[3], x.shape[2]*2).to(device)], dim=3) | |
cc = cc.view(x.shape[0], x.shape[1], x.shape[3]*2, x.shape[2]*2) | |
x_up = cc.permute(0,1,3,2) | |
return conv_gauss(x_up, 4*gauss_kernel(channels=x.shape[1])) | |
def conv_gauss(img, kernel): | |
img = torch.nn.functional.pad(img, (2, 2, 2, 2), mode='reflect') | |
out = torch.nn.functional.conv2d(img, kernel, groups=img.shape[1]) | |
return out | |
def laplacian_pyramid(img, kernel, max_levels=3): | |
current = img | |
pyr = [] | |
for level in range(max_levels): | |
filtered = conv_gauss(current, kernel) | |
down = downsample(filtered) | |
up = upsample(down) | |
diff = current-up | |
pyr.append(diff) | |
current = down | |
return pyr | |
class LapLoss(torch.nn.Module): | |
def __init__(self, max_levels=5, channels=3): | |
super(LapLoss, self).__init__() | |
self.max_levels = max_levels | |
self.gauss_kernel = gauss_kernel(channels=channels) | |
def forward(self, input, target): | |
pyr_input = laplacian_pyramid(img=input, kernel=self.gauss_kernel, max_levels=self.max_levels) | |
pyr_target = laplacian_pyramid(img=target, kernel=self.gauss_kernel, max_levels=self.max_levels) | |
return sum(torch.nn.functional.l1_loss(a, b) for a, b in zip(pyr_input, pyr_target)) | |
class Ternary(nn.Module): | |
def __init__(self, device): | |
super(Ternary, self).__init__() | |
patch_size = 7 | |
out_channels = patch_size * patch_size | |
self.w = np.eye(out_channels).reshape( | |
(patch_size, patch_size, 1, out_channels)) | |
self.w = np.transpose(self.w, (3, 2, 0, 1)) | |
self.w = torch.tensor(self.w).float().to(device) | |
def transform(self, img): | |
patches = F.conv2d(img, self.w, padding=3, bias=None) | |
transf = patches - img | |
transf_norm = transf / torch.sqrt(0.81 + transf**2) | |
return transf_norm | |
def rgb2gray(self, rgb): | |
r, g, b = rgb[:, 0:1, :, :], rgb[:, 1:2, :, :], rgb[:, 2:3, :, :] | |
gray = 0.2989 * r + 0.5870 * g + 0.1140 * b | |
return gray | |
def hamming(self, t1, t2): | |
dist = (t1 - t2) ** 2 | |
dist_norm = torch.mean(dist / (0.1 + dist), 1, True) | |
return dist_norm | |
def valid_mask(self, t, padding): | |
n, _, h, w = t.size() | |
inner = torch.ones(n, 1, h - 2 * padding, w - 2 * padding).type_as(t) | |
mask = F.pad(inner, [padding] * 4) | |
return mask | |
def forward(self, img0, img1): | |
img0 = self.transform(self.rgb2gray(img0)) | |
img1 = self.transform(self.rgb2gray(img1)) | |
return self.hamming(img0, img1) * self.valid_mask(img0, 1) | |