Spaces:

My-AI-Projects
/

colorphoto

Running

App Files Files Community

My-AI-Projects commited on Aug 1

Commit

aaa2047

•

1 Parent(s): 9b88016

Add application file

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

basicsr/__init__.py +12 -0
basicsr/__pycache__/__init__.cpython-39.pyc +0 -0
basicsr/__pycache__/train.cpython-39.pyc +0 -0
basicsr/__pycache__/version.cpython-39.pyc +0 -0
basicsr/archs/__init__.py +25 -0
basicsr/archs/__pycache__/__init__.cpython-39.pyc +0 -0
basicsr/archs/__pycache__/ddcolor_arch.cpython-39.pyc +0 -0
basicsr/archs/__pycache__/discriminator_arch.cpython-39.pyc +0 -0
basicsr/archs/__pycache__/vgg_arch.cpython-39.pyc +0 -0
basicsr/archs/ddcolor_arch.py +385 -0
basicsr/archs/ddcolor_arch_utils/__int__.py +0 -0
basicsr/archs/ddcolor_arch_utils/__pycache__/convnext.cpython-38.pyc +0 -0
basicsr/archs/ddcolor_arch_utils/__pycache__/convnext.cpython-39.pyc +0 -0
basicsr/archs/ddcolor_arch_utils/__pycache__/position_encoding.cpython-38.pyc +0 -0
basicsr/archs/ddcolor_arch_utils/__pycache__/position_encoding.cpython-39.pyc +0 -0
basicsr/archs/ddcolor_arch_utils/__pycache__/transformer.cpython-38.pyc +0 -0
basicsr/archs/ddcolor_arch_utils/__pycache__/transformer.cpython-39.pyc +0 -0
basicsr/archs/ddcolor_arch_utils/__pycache__/transformer_utils.cpython-38.pyc +0 -0
basicsr/archs/ddcolor_arch_utils/__pycache__/transformer_utils.cpython-39.pyc +0 -0
basicsr/archs/ddcolor_arch_utils/__pycache__/unet.cpython-38.pyc +0 -0
basicsr/archs/ddcolor_arch_utils/__pycache__/unet.cpython-39.pyc +0 -0
basicsr/archs/ddcolor_arch_utils/convnext.py +155 -0
basicsr/archs/ddcolor_arch_utils/position_encoding.py +52 -0
basicsr/archs/ddcolor_arch_utils/transformer.py +368 -0
basicsr/archs/ddcolor_arch_utils/transformer_utils.py +192 -0
basicsr/archs/ddcolor_arch_utils/unet.py +208 -0
basicsr/archs/ddcolor_arch_utils/util.py +63 -0
basicsr/archs/discriminator_arch.py +28 -0
basicsr/archs/vgg_arch.py +165 -0
basicsr/data/__init__.py +101 -0
basicsr/data/__pycache__/__init__.cpython-39.pyc +0 -0
basicsr/data/__pycache__/data_sampler.cpython-39.pyc +0 -0
basicsr/data/__pycache__/fmix.cpython-39.pyc +0 -0
basicsr/data/__pycache__/lab_dataset.cpython-39.pyc +0 -0
basicsr/data/__pycache__/prefetch_dataloader.cpython-39.pyc +0 -0
basicsr/data/__pycache__/transforms.cpython-39.pyc +0 -0
basicsr/data/data_sampler.py +48 -0
basicsr/data/data_util.py +313 -0
basicsr/data/fmix.py +206 -0
basicsr/data/lab_dataset.py +159 -0
basicsr/data/prefetch_dataloader.py +125 -0
basicsr/data/transforms.py +192 -0
basicsr/losses/__init__.py +26 -0
basicsr/losses/__pycache__/__init__.cpython-39.pyc +0 -0
basicsr/losses/__pycache__/loss_util.cpython-39.pyc +0 -0
basicsr/losses/__pycache__/losses.cpython-39.pyc +0 -0
basicsr/losses/loss_util.py +95 -0
basicsr/losses/losses.py +551 -0
basicsr/metrics/__init__.py +20 -0
basicsr/metrics/__pycache__/__init__.cpython-39.pyc +0 -0

basicsr/__init__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# https://github.com/xinntao/BasicSR
+# flake8: noqa
+from .archs import *
+from .data import *
+from .losses import *
+from .metrics import *
+from .models import *
+# from .ops import *
+# from .test import *
+from .train import *
+from .utils import *
+from .version import __gitsha__, __version__

basicsr/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (342 Bytes). View file

basicsr/__pycache__/train.cpython-39.pyc ADDED Viewed

Binary file (6.57 kB). View file

basicsr/__pycache__/version.cpython-39.pyc ADDED Viewed

Binary file (242 Bytes). View file

basicsr/archs/__init__.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import importlib
+from copy import deepcopy
+from os import path as osp
+from basicsr.utils import get_root_logger, scandir
+from basicsr.utils.registry import ARCH_REGISTRY
+__all__ = ['build_network']
+# automatically scan and import arch modules for registry
+# scan all the files under the 'archs' folder and collect files ending with
+# '_arch.py'
+arch_folder = osp.dirname(osp.abspath(__file__))
+arch_filenames = [osp.splitext(osp.basename(v))[0] for v in scandir(arch_folder) if v.endswith('_arch.py')]
+# import all the arch modules
+_arch_modules = [importlib.import_module(f'basicsr.archs.{file_name}') for file_name in arch_filenames]
+def build_network(opt):
+    opt = deepcopy(opt)
+    network_type = opt.pop('type')
+    net = ARCH_REGISTRY.get(network_type)(**opt)
+    logger = get_root_logger()
+    logger.info(f'Network [{net.__class__.__name__}] is created.')
+    return net

basicsr/archs/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (1.14 kB). View file

basicsr/archs/__pycache__/ddcolor_arch.cpython-39.pyc ADDED Viewed

Binary file (10.4 kB). View file

basicsr/archs/__pycache__/discriminator_arch.cpython-39.pyc ADDED Viewed

Binary file (1.37 kB). View file

basicsr/archs/__pycache__/vgg_arch.cpython-39.pyc ADDED Viewed

Binary file (4.88 kB). View file

basicsr/archs/ddcolor_arch.py ADDED Viewed

	@@ -0,0 +1,385 @@

+import torch
+import torch.nn as nn
+from basicsr.archs.ddcolor_arch_utils.unet import Hook, CustomPixelShuffle_ICNR,  UnetBlockWide, NormType, custom_conv_layer
+from basicsr.archs.ddcolor_arch_utils.convnext import ConvNeXt
+from basicsr.archs.ddcolor_arch_utils.transformer_utils import SelfAttentionLayer, CrossAttentionLayer, FFNLayer, MLP
+from basicsr.archs.ddcolor_arch_utils.position_encoding import PositionEmbeddingSine
+from basicsr.archs.ddcolor_arch_utils.transformer import Transformer
+from basicsr.utils.registry import ARCH_REGISTRY
+@ARCH_REGISTRY.register()
+class DDColor(nn.Module):
+    def __init__(self,
+                 encoder_name='convnext-l',
+                 decoder_name='MultiScaleColorDecoder',
+                 num_input_channels=3,
+                 input_size=(256, 256),
+                 nf=512,
+                 num_output_channels=3,
+                 last_norm='Weight',
+                 do_normalize=False,
+                 num_queries=256,
+                 num_scales=3,
+                 dec_layers=9,
+                 encoder_from_pretrain=False):
+        super().__init__()
+        self.encoder = Encoder(encoder_name, ['norm0', 'norm1', 'norm2', 'norm3'], from_pretrain=encoder_from_pretrain)
+        self.encoder.eval()
+        test_input = torch.randn(1, num_input_channels, *input_size)
+        self.encoder(test_input)
+        self.decoder = Decoder(
+            self.encoder.hooks,
+            nf=nf,
+            last_norm=last_norm,
+            num_queries=num_queries,
+            num_scales=num_scales,
+            dec_layers=dec_layers,
+            decoder_name=decoder_name
+        )
+        self.refine_net = nn.Sequential(custom_conv_layer(num_queries + 3, num_output_channels, ks=1, use_activ=False, norm_type=NormType.Spectral))
+        self.do_normalize = do_normalize
+        self.register_buffer('mean', torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1))
+        self.register_buffer('std', torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1))
+    def normalize(self, img):
+        return (img - self.mean) / self.std
+    def denormalize(self, img):
+        return img * self.std + self.mean
+    def forward(self, x):
+        if x.shape[1] == 3:
+            x = self.normalize(x)
+        self.encoder(x)
+        out_feat = self.decoder()
+        coarse_input = torch.cat([out_feat, x], dim=1)
+        out = self.refine_net(coarse_input)
+        if self.do_normalize:
+            out = self.denormalize(out)
+        return out
+class Decoder(nn.Module):
+    def __init__(self,
+                 hooks,
+                 nf=512,
+                 blur=True,
+                 last_norm='Weight',
+                 num_queries=256,
+                 num_scales=3,
+                 dec_layers=9,
+                 decoder_name='MultiScaleColorDecoder'):
+        super().__init__()
+        self.hooks = hooks
+        self.nf = nf
+        self.blur = blur
+        self.last_norm = getattr(NormType, last_norm)
+        self.decoder_name = decoder_name
+        self.layers = self.make_layers()
+        embed_dim = nf // 2
+        self.last_shuf = CustomPixelShuffle_ICNR(embed_dim, embed_dim, blur=self.blur, norm_type=self.last_norm, scale=4)
+        if self.decoder_name == 'MultiScaleColorDecoder':
+            self.color_decoder = MultiScaleColorDecoder(
+                in_channels=[512, 512, 256],
+                num_queries=num_queries,
+                num_scales=num_scales,
+                dec_layers=dec_layers,
+            )
+        else:
+            self.color_decoder = SingleColorDecoder(
+                in_channels=hooks[-1].feature.shape[1],
+                num_queries=num_queries,
+            )
+    def forward(self):
+        encode_feat = self.hooks[-1].feature
+        out0 = self.layers[0](encode_feat)
+        out1 = self.layers[1](out0)
+        out2 = self.layers[2](out1)
+        out3 = self.last_shuf(out2)
+        if self.decoder_name == 'MultiScaleColorDecoder':
+            out = self.color_decoder([out0, out1, out2], out3)
+        else:
+            out = self.color_decoder(out3, encode_feat)
+        return out
+    def make_layers(self):
+        decoder_layers = []
+        e_in_c = self.hooks[-1].feature.shape[1]
+        in_c = e_in_c
+        out_c = self.nf
+        setup_hooks = self.hooks[-2::-1]
+        for layer_index, hook in enumerate(setup_hooks):
+            feature_c = hook.feature.shape[1]
+            if layer_index == len(setup_hooks) - 1:
+                out_c = out_c // 2
+            decoder_layers.append(
+                UnetBlockWide(
+                    in_c, feature_c, out_c, hook, blur=self.blur, self_attention=False, norm_type=NormType.Spectral))
+            in_c = out_c
+        return nn.Sequential(*decoder_layers)
+class Encoder(nn.Module):
+    def __init__(self, encoder_name, hook_names, from_pretrain, **kwargs):
+        super().__init__()
+        if encoder_name == 'convnext-t' or encoder_name == 'convnext':
+            self.arch = ConvNeXt()
+        elif encoder_name == 'convnext-s':
+            self.arch = ConvNeXt(depths=[3, 3, 27, 3], dims=[96, 192, 384, 768])
+        elif encoder_name == 'convnext-b':
+            self.arch = ConvNeXt(depths=[3, 3, 27, 3], dims=[128, 256, 512, 1024])
+        elif encoder_name == 'convnext-l':
+            self.arch = ConvNeXt(depths=[3, 3, 27, 3], dims=[192, 384, 768, 1536])
+        else:
+            raise NotImplementedError
+        self.encoder_name = encoder_name
+        self.hook_names = hook_names
+        self.hooks = self.setup_hooks()
+        if from_pretrain:
+            self.load_pretrain_model()
+    def setup_hooks(self):
+        hooks = [Hook(self.arch._modules[name]) for name in self.hook_names]
+        return hooks
+    def forward(self, x):
+        return self.arch(x)
+    def load_pretrain_model(self):
+        if self.encoder_name == 'convnext-t' or self.encoder_name == 'convnext':
+            self.load('pretrain/convnext_tiny_22k_224.pth')
+        elif self.encoder_name == 'convnext-s':
+            self.load('pretrain/convnext_small_22k_224.pth')
+        elif self.encoder_name == 'convnext-b':
+            self.load('pretrain/convnext_base_22k_224.pth')
+        elif self.encoder_name == 'convnext-l':
+            self.load('pretrain/convnext_large_22k_224.pth')
+        else:
+            raise NotImplementedError
+        print('Loaded pretrained convnext model.')
+    def load(self, path):
+        from basicsr.utils import get_root_logger
+        logger = get_root_logger()
+        if not path:
+            logger.info("No checkpoint found. Initializing model from scratch")
+            return
+        logger.info("[Encoder] Loading from {} ...".format(path))
+        checkpoint = torch.load(path, map_location=torch.device("cpu"))
+        checkpoint_state_dict = checkpoint['model'] if 'model' in checkpoint.keys() else checkpoint
+        incompatible = self.arch.load_state_dict(checkpoint_state_dict, strict=False)
+        if incompatible.missing_keys:
+            msg = "Some model parameters or buffers are not found in the checkpoint:\n"
+            msg += str(incompatible.missing_keys)
+            logger.warning(msg)
+        if incompatible.unexpected_keys:
+            msg = "The checkpoint state_dict contains keys that are not used by the model:\n"
+            msg += str(incompatible.unexpected_keys)
+            logger.warning(msg)
+class MultiScaleColorDecoder(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        hidden_dim=256,
+        num_queries=100,
+        nheads=8,
+        dim_feedforward=2048,
+        dec_layers=9,
+        pre_norm=False,
+        color_embed_dim=256,
+        enforce_input_project=True,
+        num_scales=3
+    ):
+        super().__init__()
+        # positional encoding
+        N_steps = hidden_dim // 2
+        self.pe_layer = PositionEmbeddingSine(N_steps, normalize=True)
+        # define Transformer decoder here
+        self.num_heads = nheads
+        self.num_layers = dec_layers
+        self.transformer_self_attention_layers = nn.ModuleList()
+        self.transformer_cross_attention_layers = nn.ModuleList()
+        self.transformer_ffn_layers = nn.ModuleList()
+        for _ in range(self.num_layers):
+            self.transformer_self_attention_layers.append(
+                SelfAttentionLayer(
+                    d_model=hidden_dim,
+                    nhead=nheads,
+                    dropout=0.0,
+                    normalize_before=pre_norm,
+                )
+            )
+            self.transformer_cross_attention_layers.append(
+                CrossAttentionLayer(
+                    d_model=hidden_dim,
+                    nhead=nheads,
+                    dropout=0.0,
+                    normalize_before=pre_norm,
+                )
+            )
+            self.transformer_ffn_layers.append(
+                FFNLayer(
+                    d_model=hidden_dim,
+                    dim_feedforward=dim_feedforward,
+                    dropout=0.0,
+                    normalize_before=pre_norm,
+                )
+            )
+        self.decoder_norm = nn.LayerNorm(hidden_dim)
+        self.num_queries = num_queries
+        # learnable color query features
+        self.query_feat = nn.Embedding(num_queries, hidden_dim)
+        # learnable color query p.e.
+        self.query_embed = nn.Embedding(num_queries, hidden_dim)
+        # level embedding
+        self.num_feature_levels = num_scales
+        self.level_embed = nn.Embedding(self.num_feature_levels, hidden_dim)
+        # input projections
+        self.input_proj = nn.ModuleList()
+        for i in range(self.num_feature_levels):
+            if in_channels[i] != hidden_dim or enforce_input_project:
+                self.input_proj.append(nn.Conv2d(in_channels[i], hidden_dim, kernel_size=1))
+                nn.init.kaiming_uniform_(self.input_proj[-1].weight, a=1)
+                if self.input_proj[-1].bias is not None:
+                    nn.init.constant_(self.input_proj[-1].bias, 0)
+            else:
+                self.input_proj.append(nn.Sequential())
+        # output FFNs
+        self.color_embed = MLP(hidden_dim, hidden_dim, color_embed_dim, 3)
+    def forward(self, x, img_features):
+        # x is a list of multi-scale feature
+        assert len(x) == self.num_feature_levels
+        src = []
+        pos = []
+        for i in range(self.num_feature_levels):
+            pos.append(self.pe_layer(x[i], None).flatten(2))
+            src.append(self.input_proj[i](x[i]).flatten(2) + self.level_embed.weight[i][None, :, None])
+            # flatten NxCxHxW to HWxNxC
+            pos[-1] = pos[-1].permute(2, 0, 1)
+            src[-1] = src[-1].permute(2, 0, 1)
+        _, bs, _ = src[0].shape
+        # QxNxC
+        query_embed = self.query_embed.weight.unsqueeze(1).repeat(1, bs, 1)
+        output = self.query_feat.weight.unsqueeze(1).repeat(1, bs, 1)
+        for i in range(self.num_layers):
+            level_index = i % self.num_feature_levels
+            # attention: cross-attention first
+            output = self.transformer_cross_attention_layers[i](
+                output, src[level_index],
+                memory_mask=None,
+                memory_key_padding_mask=None,
+                pos=pos[level_index], query_pos=query_embed
+            )
+            output = self.transformer_self_attention_layers[i](
+                output, tgt_mask=None,
+                tgt_key_padding_mask=None,
+                query_pos=query_embed
+            )
+            # FFN
+            output = self.transformer_ffn_layers[i](
+                output
+            )
+        decoder_output = self.decoder_norm(output)
+        decoder_output = decoder_output.transpose(0, 1)  # [N, bs, C]  -> [bs, N, C]
+        color_embed = self.color_embed(decoder_output)
+        out = torch.einsum("bqc,bchw->bqhw", color_embed, img_features)
+        return out
+class SingleColorDecoder(nn.Module):
+    def __init__(
+        self,
+        in_channels=768,
+        hidden_dim=256,
+        num_queries=256,  # 100
+        nheads=8,
+        dropout=0.1,
+        dim_feedforward=2048,
+        enc_layers=0,
+        dec_layers=6,
+        pre_norm=False,
+        deep_supervision=True,
+        enforce_input_project=True,
+    ):
+        super().__init__()
+        N_steps = hidden_dim // 2
+        self.pe_layer = PositionEmbeddingSine(N_steps, normalize=True)
+        transformer = Transformer(
+            d_model=hidden_dim,
+            dropout=dropout,
+            nhead=nheads,
+            dim_feedforward=dim_feedforward,
+            num_encoder_layers=enc_layers,
+            num_decoder_layers=dec_layers,
+            normalize_before=pre_norm,
+            return_intermediate_dec=deep_supervision,
+        )
+        self.num_queries = num_queries
+        self.transformer = transformer
+        self.query_embed = nn.Embedding(num_queries, hidden_dim)
+        if in_channels != hidden_dim or enforce_input_project:
+            self.input_proj = nn.Conv2d(in_channels, hidden_dim, kernel_size=1)
+            nn.init.kaiming_uniform_(self.input_proj.weight, a=1)
+            if self.input_proj.bias is not None:
+                nn.init.constant_(self.input_proj.bias, 0)
+        else:
+            self.input_proj = nn.Sequential()
+    def forward(self, img_features, encode_feat):
+        pos = self.pe_layer(encode_feat)
+        src = encode_feat
+        mask = None
+        hs, memory = self.transformer(self.input_proj(src), mask, self.query_embed.weight, pos)
+        color_embed = hs[-1]
+        color_preds = torch.einsum('bqc,bchw->bqhw', color_embed, img_features)
+        return color_preds

basicsr/archs/ddcolor_arch_utils/__int__.py ADDED Viewed

File without changes

basicsr/archs/ddcolor_arch_utils/__pycache__/convnext.cpython-38.pyc ADDED Viewed

Binary file (6.2 kB). View file

basicsr/archs/ddcolor_arch_utils/__pycache__/convnext.cpython-39.pyc ADDED Viewed

Binary file (6.12 kB). View file

basicsr/archs/ddcolor_arch_utils/__pycache__/position_encoding.cpython-38.pyc ADDED Viewed

Binary file (2.03 kB). View file

basicsr/archs/ddcolor_arch_utils/__pycache__/position_encoding.cpython-39.pyc ADDED Viewed

Binary file (2.05 kB). View file

basicsr/archs/ddcolor_arch_utils/__pycache__/transformer.cpython-38.pyc ADDED Viewed

Binary file (8.81 kB). View file

basicsr/archs/ddcolor_arch_utils/__pycache__/transformer.cpython-39.pyc ADDED Viewed

Binary file (8.77 kB). View file

basicsr/archs/ddcolor_arch_utils/__pycache__/transformer_utils.cpython-38.pyc ADDED Viewed

Binary file (6.57 kB). View file

basicsr/archs/ddcolor_arch_utils/__pycache__/transformer_utils.cpython-39.pyc ADDED Viewed

Binary file (6.57 kB). View file

basicsr/archs/ddcolor_arch_utils/__pycache__/unet.cpython-38.pyc ADDED Viewed

Binary file (7.37 kB). View file

basicsr/archs/ddcolor_arch_utils/__pycache__/unet.cpython-39.pyc ADDED Viewed

Binary file (7.39 kB). View file

basicsr/archs/ddcolor_arch_utils/convnext.py ADDED Viewed

	@@ -0,0 +1,155 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from timm.models.layers import trunc_normal_, DropPath
+class Block(nn.Module):
+    r""" ConvNeXt Block. There are two equivalent implementations:
+    (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
+    (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
+    We use (2) as we find it slightly faster in PyTorch
+    Args:
+        dim (int): Number of input channels.
+        drop_path (float): Stochastic depth rate. Default: 0.0
+        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
+    """
+    def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6):
+        super().__init__()
+        self.dwconv = nn.Conv2d(dim, dim, kernel_size=7, padding=3, groups=dim) # depthwise conv
+        self.norm = LayerNorm(dim, eps=1e-6)
+        self.pwconv1 = nn.Linear(dim, 4 * dim) # pointwise/1x1 convs, implemented with linear layers
+        self.act = nn.GELU()
+        self.pwconv2 = nn.Linear(4 * dim, dim)
+        self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim)),
+                                    requires_grad=True) if layer_scale_init_value > 0 else None
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+    def forward(self, x):
+        input = x
+        x = self.dwconv(x)
+        x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C)
+        x = self.norm(x)
+        x = self.pwconv1(x)
+        x = self.act(x)
+        x = self.pwconv2(x)
+        if self.gamma is not None:
+            x = self.gamma * x
+        x = x.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W)
+        x = input + self.drop_path(x)
+        return x
+class ConvNeXt(nn.Module):
+    r""" ConvNeXt
+        A PyTorch impl of : `A ConvNet for the 2020s`  -
+          https://arxiv.org/pdf/2201.03545.pdf
+    Args:
+        in_chans (int): Number of input image channels. Default: 3
+        num_classes (int): Number of classes for classification head. Default: 1000
+        depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
+        dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
+        drop_path_rate (float): Stochastic depth rate. Default: 0.
+        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
+        head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1.
+    """
+    def __init__(self, in_chans=3, num_classes=1000,
+                 depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], drop_path_rate=0.,
+                 layer_scale_init_value=1e-6, head_init_scale=1.,
+                 ):
+        super().__init__()
+        self.downsample_layers = nn.ModuleList() # stem and 3 intermediate downsampling conv layers
+        stem = nn.Sequential(
+            nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4),
+            LayerNorm(dims[0], eps=1e-6, data_format="channels_first")
+        )
+        self.downsample_layers.append(stem)
+        for i in range(3):
+            downsample_layer = nn.Sequential(
+                    LayerNorm(dims[i], eps=1e-6, data_format="channels_first"),
+                    nn.Conv2d(dims[i], dims[i+1], kernel_size=2, stride=2),
+            )
+            self.downsample_layers.append(downsample_layer)
+        self.stages = nn.ModuleList() # 4 feature resolution stages, each consisting of multiple residual blocks
+        dp_rates=[x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]
+        cur = 0
+        for i in range(4):
+            stage = nn.Sequential(
+                *[Block(dim=dims[i], drop_path=dp_rates[cur + j],
+                layer_scale_init_value=layer_scale_init_value) for j in range(depths[i])]
+            )
+            self.stages.append(stage)
+            cur += depths[i]
+        # add norm layers for each output
+        out_indices = (0, 1, 2, 3)
+        for i in out_indices:
+            layer = LayerNorm(dims[i], eps=1e-6, data_format="channels_first")
+            # layer = nn.Identity()
+            layer_name = f'norm{i}'
+            self.add_module(layer_name, layer)
+        self.norm = nn.LayerNorm(dims[-1], eps=1e-6) # final norm layer
+        # self.head_cls = nn.Linear(dims[-1], 4)
+        self.apply(self._init_weights)
+        # self.head_cls.weight.data.mul_(head_init_scale)
+        # self.head_cls.bias.data.mul_(head_init_scale)
+    def _init_weights(self, m):
+        if isinstance(m, (nn.Conv2d, nn.Linear)):
+            trunc_normal_(m.weight, std=.02)
+            nn.init.constant_(m.bias, 0)
+    def forward_features(self, x):
+        for i in range(4):
+            x = self.downsample_layers[i](x)
+            x = self.stages[i](x)
+            # add extra norm
+            norm_layer = getattr(self, f'norm{i}')
+            # x = norm_layer(x)
+            norm_layer(x)
+        return self.norm(x.mean([-2, -1])) # global average pooling, (N, C, H, W) -> (N, C)
+    def forward(self, x):
+        x = self.forward_features(x)
+        # x = self.head_cls(x)
+        return x
+class LayerNorm(nn.Module):
+    r""" LayerNorm that supports two data formats: channels_last (default) or channels_first.
+    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with
+    shape (batch_size, height, width, channels) while channels_first corresponds to inputs
+    with shape (batch_size, channels, height, width).
+    """
+    def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(normalized_shape))
+        self.bias = nn.Parameter(torch.zeros(normalized_shape))
+        self.eps = eps
+        self.data_format = data_format
+        if self.data_format not in ["channels_last", "channels_first"]:
+            raise NotImplementedError
+        self.normalized_shape = (normalized_shape, )
+    def forward(self, x):
+        if self.data_format == "channels_last":  # B H W C
+            return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
+        elif self.data_format == "channels_first":  # B C H W
+            u = x.mean(1, keepdim=True)
+            s = (x - u).pow(2).mean(1, keepdim=True)
+            x = (x - u) / torch.sqrt(s + self.eps)
+            x = self.weight[:, None, None] * x + self.bias[:, None, None]
+            return x

basicsr/archs/ddcolor_arch_utils/position_encoding.py ADDED Viewed

	@@ -0,0 +1,52 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# Modified from: https://github.com/facebookresearch/detr/blob/master/models/position_encoding.py
+"""
+Various positional encodings for the transformer.
+"""
+import math
+import torch
+from torch import nn
+class PositionEmbeddingSine(nn.Module):
+    """
+    This is a more standard version of the position embedding, very similar to the one
+    used by the Attention is all you need paper, generalized to work on images.
+    """
+    def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None):
+        super().__init__()
+        self.num_pos_feats = num_pos_feats
+        self.temperature = temperature
+        self.normalize = normalize
+        if scale is not None and normalize is False:
+            raise ValueError("normalize should be True if scale is passed")
+        if scale is None:
+            scale = 2 * math.pi
+        self.scale = scale
+    def forward(self, x, mask=None):
+        if mask is None:
+            mask = torch.zeros((x.size(0), x.size(2), x.size(3)), device=x.device, dtype=torch.bool)
+        not_mask = ~mask
+        y_embed = not_mask.cumsum(1, dtype=torch.float32)
+        x_embed = not_mask.cumsum(2, dtype=torch.float32)
+        if self.normalize:
+            eps = 1e-6
+            y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
+            x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
+        dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
+        dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
+        pos_x = x_embed[:, :, :, None] / dim_t
+        pos_y = y_embed[:, :, :, None] / dim_t
+        pos_x = torch.stack(
+            (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4
+        ).flatten(3)
+        pos_y = torch.stack(
+            (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4
+        ).flatten(3)
+        pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
+        return pos

basicsr/archs/ddcolor_arch_utils/transformer.py ADDED Viewed

	@@ -0,0 +1,368 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# Modified from: https://github.com/facebookresearch/detr/blob/master/models/transformer.py
+"""
+Transformer class.
+Copy-paste from torch.nn.Transformer with modifications:
+    * positional encodings are passed in MHattention
+    * extra LN at the end of encoder is removed
+    * decoder returns a stack of activations from all decoding layers
+"""
+import copy
+from typing import List, Optional
+import torch
+import torch.nn.functional as F
+from torch import Tensor, nn
+class Transformer(nn.Module):
+    def __init__(
+        self,
+        d_model=512,
+        nhead=8,
+        num_encoder_layers=6,
+        num_decoder_layers=6,
+        dim_feedforward=2048,
+        dropout=0.1,
+        activation="relu",
+        normalize_before=False,
+        return_intermediate_dec=False,
+    ):
+        super().__init__()
+        encoder_layer = TransformerEncoderLayer(
+            d_model, nhead, dim_feedforward, dropout, activation, normalize_before
+        )
+        encoder_norm = nn.LayerNorm(d_model) if normalize_before else None
+        self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm)
+        decoder_layer = TransformerDecoderLayer(
+            d_model, nhead, dim_feedforward, dropout, activation, normalize_before
+        )
+        decoder_norm = nn.LayerNorm(d_model)
+        self.decoder = TransformerDecoder(
+            decoder_layer,
+            num_decoder_layers,
+            decoder_norm,
+            return_intermediate=return_intermediate_dec,
+        )
+        self._reset_parameters()
+        self.d_model = d_model
+        self.nhead = nhead
+    def _reset_parameters(self):
+        for p in self.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+    def forward(self, src, mask, query_embed, pos_embed):
+        # flatten NxCxHxW to HWxNxC
+        bs, c, h, w = src.shape
+        src = src.flatten(2).permute(2, 0, 1)
+        pos_embed = pos_embed.flatten(2).permute(2, 0, 1)
+        query_embed = query_embed.unsqueeze(1).repeat(1, bs, 1)
+        if mask is not None:
+            mask = mask.flatten(1)
+        tgt = torch.zeros_like(query_embed)
+        memory = self.encoder(src, src_key_padding_mask=mask, pos=pos_embed)
+        hs = self.decoder(
+            tgt, memory, memory_key_padding_mask=mask, pos=pos_embed, query_pos=query_embed
+        )
+        return hs.transpose(1, 2), memory.permute(1, 2, 0).view(bs, c, h, w)
+class TransformerEncoder(nn.Module):
+    def __init__(self, encoder_layer, num_layers, norm=None):
+        super().__init__()
+        self.layers = _get_clones(encoder_layer, num_layers)
+        self.num_layers = num_layers
+        self.norm = norm
+    def forward(
+        self,
+        src,
+        mask: Optional[Tensor] = None,
+        src_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+    ):
+        output = src
+        for layer in self.layers:
+            output = layer(
+                output, src_mask=mask, src_key_padding_mask=src_key_padding_mask, pos=pos
+            )
+        if self.norm is not None:
+            output = self.norm(output)
+        return output
+class TransformerDecoder(nn.Module):
+    def __init__(self, decoder_layer, num_layers, norm=None, return_intermediate=False):
+        super().__init__()
+        self.layers = _get_clones(decoder_layer, num_layers)
+        self.num_layers = num_layers
+        self.norm = norm
+        self.return_intermediate = return_intermediate
+    def forward(
+        self,
+        tgt,
+        memory,
+        tgt_mask: Optional[Tensor] = None,
+        memory_mask: Optional[Tensor] = None,
+        tgt_key_padding_mask: Optional[Tensor] = None,
+        memory_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+        query_pos: Optional[Tensor] = None,
+    ):
+        output = tgt
+        intermediate = []
+        for layer in self.layers:
+            output = layer(
+                output,
+                memory,
+                tgt_mask=tgt_mask,
+                memory_mask=memory_mask,
+                tgt_key_padding_mask=tgt_key_padding_mask,
+                memory_key_padding_mask=memory_key_padding_mask,
+                pos=pos,
+                query_pos=query_pos,
+            )
+            if self.return_intermediate:
+                intermediate.append(self.norm(output))
+        if self.norm is not None:
+            output = self.norm(output)
+            if self.return_intermediate:
+                intermediate.pop()
+                intermediate.append(output)
+        if self.return_intermediate:
+            return torch.stack(intermediate)
+        return output.unsqueeze(0)
+class TransformerEncoderLayer(nn.Module):
+    def __init__(
+        self,
+        d_model,
+        nhead,
+        dim_feedforward=2048,
+        dropout=0.1,
+        activation="relu",
+        normalize_before=False,
+    ):
+        super().__init__()
+        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
+        # Implementation of Feedforward model
+        self.linear1 = nn.Linear(d_model, dim_feedforward)
+        self.dropout = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_feedforward, d_model)
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.activation = _get_activation_fn(activation)
+        self.normalize_before = normalize_before
+    def with_pos_embed(self, tensor, pos: Optional[Tensor]):
+        return tensor if pos is None else tensor + pos
+    def forward_post(
+        self,
+        src,
+        src_mask: Optional[Tensor] = None,
+        src_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+    ):
+        q = k = self.with_pos_embed(src, pos)
+        src2 = self.self_attn(
+            q, k, value=src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask
+        )[0]
+        src = src + self.dropout1(src2)
+        src = self.norm1(src)
+        src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
+        src = src + self.dropout2(src2)
+        src = self.norm2(src)
+        return src
+    def forward_pre(
+        self,
+        src,
+        src_mask: Optional[Tensor] = None,
+        src_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+    ):
+        src2 = self.norm1(src)
+        q = k = self.with_pos_embed(src2, pos)
+        src2 = self.self_attn(
+            q, k, value=src2, attn_mask=src_mask, key_padding_mask=src_key_padding_mask
+        )[0]
+        src = src + self.dropout1(src2)
+        src2 = self.norm2(src)
+        src2 = self.linear2(self.dropout(self.activation(self.linear1(src2))))
+        src = src + self.dropout2(src2)
+        return src
+    def forward(
+        self,
+        src,
+        src_mask: Optional[Tensor] = None,
+        src_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+    ):
+        if self.normalize_before:
+            return self.forward_pre(src, src_mask, src_key_padding_mask, pos)
+        return self.forward_post(src, src_mask, src_key_padding_mask, pos)
+class TransformerDecoderLayer(nn.Module):
+    def __init__(
+        self,
+        d_model,
+        nhead,
+        dim_feedforward=2048,
+        dropout=0.1,
+        activation="relu",
+        normalize_before=False,
+    ):
+        super().__init__()
+        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
+        self.multihead_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
+        # Implementation of Feedforward model
+        self.linear1 = nn.Linear(d_model, dim_feedforward)
+        self.dropout = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_feedforward, d_model)
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.norm3 = nn.LayerNorm(d_model)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.dropout3 = nn.Dropout(dropout)
+        self.activation = _get_activation_fn(activation)
+        self.normalize_before = normalize_before
+    def with_pos_embed(self, tensor, pos: Optional[Tensor]):
+        return tensor if pos is None else tensor + pos
+    def forward_post(
+        self,
+        tgt,
+        memory,
+        tgt_mask: Optional[Tensor] = None,
+        memory_mask: Optional[Tensor] = None,
+        tgt_key_padding_mask: Optional[Tensor] = None,
+        memory_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+        query_pos: Optional[Tensor] = None,
+    ):
+        q = k = self.with_pos_embed(tgt, query_pos)
+        tgt2 = self.self_attn(
+            q, k, value=tgt, attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask
+        )[0]
+        tgt = tgt + self.dropout1(tgt2)
+        tgt = self.norm1(tgt)
+        tgt2 = self.multihead_attn(
+            query=self.with_pos_embed(tgt, query_pos),
+            key=self.with_pos_embed(memory, pos),
+            value=memory,
+            attn_mask=memory_mask,
+            key_padding_mask=memory_key_padding_mask,
+        )[0]
+        tgt = tgt + self.dropout2(tgt2)
+        tgt = self.norm2(tgt)
+        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
+        tgt = tgt + self.dropout3(tgt2)
+        tgt = self.norm3(tgt)
+        return tgt
+    def forward_pre(
+        self,
+        tgt,
+        memory,
+        tgt_mask: Optional[Tensor] = None,
+        memory_mask: Optional[Tensor] = None,
+        tgt_key_padding_mask: Optional[Tensor] = None,
+        memory_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+        query_pos: Optional[Tensor] = None,
+    ):
+        tgt2 = self.norm1(tgt)
+        q = k = self.with_pos_embed(tgt2, query_pos)
+        tgt2 = self.self_attn(
+            q, k, value=tgt2, attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask
+        )[0]
+        tgt = tgt + self.dropout1(tgt2)
+        tgt2 = self.norm2(tgt)
+        tgt2 = self.multihead_attn(
+            query=self.with_pos_embed(tgt2, query_pos),
+            key=self.with_pos_embed(memory, pos),
+            value=memory,
+            attn_mask=memory_mask,
+            key_padding_mask=memory_key_padding_mask,
+        )[0]
+        tgt = tgt + self.dropout2(tgt2)
+        tgt2 = self.norm3(tgt)
+        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt2))))
+        tgt = tgt + self.dropout3(tgt2)
+        return tgt
+    def forward(
+        self,
+        tgt,
+        memory,
+        tgt_mask: Optional[Tensor] = None,
+        memory_mask: Optional[Tensor] = None,
+        tgt_key_padding_mask: Optional[Tensor] = None,
+        memory_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+        query_pos: Optional[Tensor] = None,
+    ):
+        if self.normalize_before:
+            return self.forward_pre(
+                tgt,
+                memory,
+                tgt_mask,
+                memory_mask,
+                tgt_key_padding_mask,
+                memory_key_padding_mask,
+                pos,
+                query_pos,
+            )
+        return self.forward_post(
+            tgt,
+            memory,
+            tgt_mask,
+            memory_mask,
+            tgt_key_padding_mask,
+            memory_key_padding_mask,
+            pos,
+            query_pos,
+        )
+def _get_clones(module, N):
+    return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
+def _get_activation_fn(activation):
+    """Return an activation function given a string"""
+    if activation == "relu":
+        return F.relu
+    if activation == "gelu":
+        return F.gelu
+    if activation == "glu":
+        return F.glu
+    raise RuntimeError(f"activation should be relu/gelu, not {activation}.")

basicsr/archs/ddcolor_arch_utils/transformer_utils.py ADDED Viewed

	@@ -0,0 +1,192 @@

+from typing import Optional
+from torch import nn, Tensor
+from torch.nn import functional as F
+class SelfAttentionLayer(nn.Module):
+    def __init__(self, d_model, nhead, dropout=0.0,
+                 activation="relu", normalize_before=False):
+        super().__init__()
+        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
+        self.norm = nn.LayerNorm(d_model)
+        self.dropout = nn.Dropout(dropout)
+        self.activation = _get_activation_fn(activation)
+        self.normalize_before = normalize_before
+        self._reset_parameters()
+    def _reset_parameters(self):
+        for p in self.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+    def with_pos_embed(self, tensor, pos: Optional[Tensor]):
+        return tensor if pos is None else tensor + pos
+    def forward_post(self, tgt,
+                     tgt_mask: Optional[Tensor] = None,
+                     tgt_key_padding_mask: Optional[Tensor] = None,
+                     query_pos: Optional[Tensor] = None):
+        q = k = self.with_pos_embed(tgt, query_pos)
+        tgt2 = self.self_attn(q, k, value=tgt, attn_mask=tgt_mask,
+                              key_padding_mask=tgt_key_padding_mask)[0]
+        tgt = tgt + self.dropout(tgt2)
+        tgt = self.norm(tgt)
+        return tgt
+    def forward_pre(self, tgt,
+                    tgt_mask: Optional[Tensor] = None,
+                    tgt_key_padding_mask: Optional[Tensor] = None,
+                    query_pos: Optional[Tensor] = None):
+        tgt2 = self.norm(tgt)
+        q = k = self.with_pos_embed(tgt2, query_pos)
+        tgt2 = self.self_attn(q, k, value=tgt2, attn_mask=tgt_mask,
+                              key_padding_mask=tgt_key_padding_mask)[0]
+        tgt = tgt + self.dropout(tgt2)
+        return tgt
+    def forward(self, tgt,
+                tgt_mask: Optional[Tensor] = None,
+                tgt_key_padding_mask: Optional[Tensor] = None,
+                query_pos: Optional[Tensor] = None):
+        if self.normalize_before:
+            return self.forward_pre(tgt, tgt_mask,
+                                    tgt_key_padding_mask, query_pos)
+        return self.forward_post(tgt, tgt_mask,
+                                 tgt_key_padding_mask, query_pos)
+class CrossAttentionLayer(nn.Module):
+    def __init__(self, d_model, nhead, dropout=0.0,
+                 activation="relu", normalize_before=False):
+        super().__init__()
+        self.multihead_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
+        self.norm = nn.LayerNorm(d_model)
+        self.dropout = nn.Dropout(dropout)
+        self.activation = _get_activation_fn(activation)
+        self.normalize_before = normalize_before
+        self._reset_parameters()
+    def _reset_parameters(self):
+        for p in self.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+    def with_pos_embed(self, tensor, pos: Optional[Tensor]):
+        return tensor if pos is None else tensor + pos
+    def forward_post(self, tgt, memory,
+                     memory_mask: Optional[Tensor] = None,
+                     memory_key_padding_mask: Optional[Tensor] = None,
+                     pos: Optional[Tensor] = None,
+                     query_pos: Optional[Tensor] = None):
+        tgt2 = self.multihead_attn(query=self.with_pos_embed(tgt, query_pos),
+                                   key=self.with_pos_embed(memory, pos),
+                                   value=memory, attn_mask=memory_mask,
+                                   key_padding_mask=memory_key_padding_mask)[0]
+        tgt = tgt + self.dropout(tgt2)
+        tgt = self.norm(tgt)
+        return tgt
+    def forward_pre(self, tgt, memory,
+                    memory_mask: Optional[Tensor] = None,
+                    memory_key_padding_mask: Optional[Tensor] = None,
+                    pos: Optional[Tensor] = None,
+                    query_pos: Optional[Tensor] = None):
+        tgt2 = self.norm(tgt)
+        tgt2 = self.multihead_attn(query=self.with_pos_embed(tgt2, query_pos),
+                                   key=self.with_pos_embed(memory, pos),
+                                   value=memory, attn_mask=memory_mask,
+                                   key_padding_mask=memory_key_padding_mask)[0]
+        tgt = tgt + self.dropout(tgt2)
+        return tgt
+    def forward(self, tgt, memory,
+                memory_mask: Optional[Tensor] = None,
+                memory_key_padding_mask: Optional[Tensor] = None,
+                pos: Optional[Tensor] = None,
+                query_pos: Optional[Tensor] = None):
+        if self.normalize_before:
+            return self.forward_pre(tgt, memory, memory_mask,
+                                    memory_key_padding_mask, pos, query_pos)
+        return self.forward_post(tgt, memory, memory_mask,
+                                 memory_key_padding_mask, pos, query_pos)
+class FFNLayer(nn.Module):
+    def __init__(self, d_model, dim_feedforward=2048, dropout=0.0,
+                 activation="relu", normalize_before=False):
+        super().__init__()
+        # Implementation of Feedforward model
+        self.linear1 = nn.Linear(d_model, dim_feedforward)
+        self.dropout = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_feedforward, d_model)
+        self.norm = nn.LayerNorm(d_model)
+        self.activation = _get_activation_fn(activation)
+        self.normalize_before = normalize_before
+        self._reset_parameters()
+    def _reset_parameters(self):
+        for p in self.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+    def with_pos_embed(self, tensor, pos: Optional[Tensor]):
+        return tensor if pos is None else tensor + pos
+    def forward_post(self, tgt):
+        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
+        tgt = tgt + self.dropout(tgt2)
+        tgt = self.norm(tgt)
+        return tgt
+    def forward_pre(self, tgt):
+        tgt2 = self.norm(tgt)
+        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt2))))
+        tgt = tgt + self.dropout(tgt2)
+        return tgt
+    def forward(self, tgt):
+        if self.normalize_before:
+            return self.forward_pre(tgt)
+        return self.forward_post(tgt)
+def _get_activation_fn(activation):
+    """Return an activation function given a string"""
+    if activation == "relu":
+        return F.relu
+    if activation == "gelu":
+        return F.gelu
+    if activation == "glu":
+        return F.glu
+    raise RuntimeError(F"activation should be relu/gelu, not {activation}.")
+class MLP(nn.Module):
+    """ Very simple multi-layer perceptron (also called FFN)"""
+    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
+        super().__init__()
+        self.num_layers = num_layers
+        h = [hidden_dim] * (num_layers - 1)
+        self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
+    def forward(self, x):
+        for i, layer in enumerate(self.layers):
+            x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
+        return x

basicsr/archs/ddcolor_arch_utils/unet.py ADDED Viewed

	@@ -0,0 +1,208 @@

+from enum import Enum
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+import collections
+NormType = Enum('NormType', 'Batch BatchZero Weight Spectral')
+class Hook:
+    feature = None
+    def __init__(self, module):
+        self.hook = module.register_forward_hook(self.hook_fn)
+    def hook_fn(self, module, input, output):
+        if isinstance(output, torch.Tensor):
+            self.feature = output
+        elif isinstance(output, collections.OrderedDict):
+            self.feature = output['out']
+    def remove(self):
+        self.hook.remove()
+class SelfAttention(nn.Module):
+    "Self attention layer for nd."
+    def __init__(self, n_channels: int):
+        super().__init__()
+        self.query = conv1d(n_channels, n_channels // 8)
+        self.key = conv1d(n_channels, n_channels // 8)
+        self.value = conv1d(n_channels, n_channels)
+        self.gamma = nn.Parameter(torch.tensor([0.]))
+    def forward(self, x):
+        #Notation from https://arxiv.org/pdf/1805.08318.pdf
+        size = x.size()
+        x = x.view(*size[:2], -1)
+        f, g, h = self.query(x), self.key(x), self.value(x)
+        beta = F.softmax(torch.bmm(f.permute(0, 2, 1).contiguous(), g), dim=1)
+        o = self.gamma * torch.bmm(h, beta) + x
+        return o.view(*size).contiguous()
+def batchnorm_2d(nf: int, norm_type: NormType = NormType.Batch):
+    "A batchnorm2d layer with `nf` features initialized depending on `norm_type`."
+    bn = nn.BatchNorm2d(nf)
+    with torch.no_grad():
+        bn.bias.fill_(1e-3)
+        bn.weight.fill_(0. if norm_type == NormType.BatchZero else 1.)
+    return bn
+def init_default(m: nn.Module, func=nn.init.kaiming_normal_) -> None:
+    "Initialize `m` weights with `func` and set `bias` to 0."
+    if func:
+        if hasattr(m, 'weight'): func(m.weight)
+        if hasattr(m, 'bias') and hasattr(m.bias, 'data'): m.bias.data.fill_(0.)
+    return m
+def icnr(x, scale=2, init=nn.init.kaiming_normal_):
+    "ICNR init of `x`, with `scale` and `init` function."
+    ni, nf, h, w = x.shape
+    ni2 = int(ni / (scale**2))
+    k = init(torch.zeros([ni2, nf, h, w])).transpose(0, 1)
+    k = k.contiguous().view(ni2, nf, -1)
+    k = k.repeat(1, 1, scale**2)
+    k = k.contiguous().view([nf, ni, h, w]).transpose(0, 1)
+    x.data.copy_(k)
+def conv1d(ni: int, no: int, ks: int = 1, stride: int = 1, padding: int = 0, bias: bool = False):
+    "Create and initialize a `nn.Conv1d` layer with spectral normalization."
+    conv = nn.Conv1d(ni, no, ks, stride=stride, padding=padding, bias=bias)
+    nn.init.kaiming_normal_(conv.weight)
+    if bias: conv.bias.data.zero_()
+    return nn.utils.spectral_norm(conv)
+def custom_conv_layer(
+    ni: int,
+    nf: int,
+    ks: int = 3,
+    stride: int = 1,
+    padding: int = None,
+    bias: bool = None,
+    is_1d: bool = False,
+    norm_type=NormType.Batch,
+    use_activ: bool = True,
+    transpose: bool = False,
+    init=nn.init.kaiming_normal_,
+    self_attention: bool = False,
+    extra_bn: bool = False,
+):
+    "Create a sequence of convolutional (`ni` to `nf`), ReLU (if `use_activ`) and batchnorm (if `bn`) layers."
+    if padding is None:
+        padding = (ks - 1) // 2 if not transpose else 0
+    bn = norm_type in (NormType.Batch, NormType.BatchZero) or extra_bn == True
+    if bias is None:
+        bias = not bn
+    conv_func = nn.ConvTranspose2d if transpose else nn.Conv1d if is_1d else nn.Conv2d
+    conv = init_default(
+        conv_func(ni, nf, kernel_size=ks, bias=bias, stride=stride, padding=padding),
+        init,
+    )
+    if norm_type == NormType.Weight:
+        conv = nn.utils.weight_norm(conv)
+    elif norm_type == NormType.Spectral:
+        conv = nn.utils.spectral_norm(conv)
+    layers = [conv]
+    if use_activ:
+        layers.append(nn.ReLU(True))
+    if bn:
+        layers.append((nn.BatchNorm1d if is_1d else nn.BatchNorm2d)(nf))
+    if self_attention:
+        layers.append(SelfAttention(nf))
+    return nn.Sequential(*layers)
+def conv_layer(ni: int,
+               nf: int,
+               ks: int = 3,
+               stride: int = 1,
+               padding: int = None,
+               bias: bool = None,
+               is_1d: bool = False,
+               norm_type=NormType.Batch,
+               use_activ: bool = True,
+               transpose: bool = False,
+               init=nn.init.kaiming_normal_,
+               self_attention: bool = False):
+    "Create a sequence of convolutional (`ni` to `nf`), ReLU (if `use_activ`) and batchnorm (if `bn`) layers."
+    if padding is None: padding = (ks - 1) // 2 if not transpose else 0
+    bn = norm_type in (NormType.Batch, NormType.BatchZero)
+    if bias is None: bias = not bn
+    conv_func = nn.ConvTranspose2d if transpose else nn.Conv1d if is_1d else nn.Conv2d
+    conv = init_default(conv_func(ni, nf, kernel_size=ks, bias=bias, stride=stride, padding=padding), init)
+    if norm_type == NormType.Weight: conv = nn.utils.weight_norm(conv)
+    elif norm_type == NormType.Spectral: conv = nn.utils.spectral_norm(conv)
+    layers = [conv]
+    if use_activ: layers.append(nn.ReLU(True))
+    if bn: layers.append((nn.BatchNorm1d if is_1d else nn.BatchNorm2d)(nf))
+    if self_attention: layers.append(SelfAttention(nf))
+    return nn.Sequential(*layers)
+def _conv(ni: int, nf: int, ks: int = 3, stride: int = 1, **kwargs):
+    return conv_layer(ni, nf, ks=ks, stride=stride, norm_type=NormType.Spectral, **kwargs)
+class CustomPixelShuffle_ICNR(nn.Module):
+    "Upsample by `scale` from `ni` filters to `nf` (default `ni`), using `nn.PixelShuffle`, `icnr` init, and `weight_norm`."
+    def __init__(self,
+                 ni: int,
+                 nf: int = None,
+                 scale: int = 2,
+                 blur: bool = True,
+                 norm_type=NormType.Spectral,
+                 extra_bn=False):
+        super().__init__()
+        self.conv = custom_conv_layer(
+            ni, nf * (scale**2), ks=1, use_activ=False, norm_type=norm_type, extra_bn=extra_bn)
+        icnr(self.conv[0].weight)
+        self.shuf = nn.PixelShuffle(scale)
+        self.do_blur = blur
+        # Blurring over (h*w) kernel
+        # "Super-Resolution using Convolutional Neural Networks without Any Checkerboard Artifacts"
+        # - https://arxiv.org/abs/1806.02658
+        self.pad = nn.ReplicationPad2d((1, 0, 1, 0))
+        self.blur = nn.AvgPool2d(2, stride=1)
+        self.relu = nn.ReLU(True)
+    def forward(self, x):
+        x = self.shuf(self.relu(self.conv(x)))
+        return self.blur(self.pad(x)) if self.do_blur else x
+class UnetBlockWide(nn.Module):
+    "A quasi-UNet block, using `PixelShuffle_ICNR upsampling`."
+    def __init__(self,
+                 up_in_c: int,
+                 x_in_c: int,
+                 n_out: int,
+                 hook,
+                 blur: bool = False,
+                 self_attention: bool = False,
+                 norm_type=NormType.Spectral):
+        super().__init__()
+        self.hook = hook
+        up_out = n_out
+        self.shuf = CustomPixelShuffle_ICNR(up_in_c, up_out, blur=blur, norm_type=norm_type, extra_bn=True)
+        self.bn = batchnorm_2d(x_in_c)
+        ni = up_out + x_in_c
+        self.conv = custom_conv_layer(ni, n_out, norm_type=norm_type, self_attention=self_attention, extra_bn=True)
+        self.relu = nn.ReLU()
+    def forward(self, up_in):
+        s = self.hook.feature
+        up_out = self.shuf(up_in)
+        cat_x = self.relu(torch.cat([up_out, self.bn(s)], dim=1))
+        return self.conv(cat_x)

basicsr/archs/ddcolor_arch_utils/util.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import numpy as np
+import torch
+from skimage import color
+def rgb2lab(img_rgb):
+    img_lab = color.rgb2lab(img_rgb)
+    return img_lab[:, :, :1], img_lab[:, :, 1:]
+def tensor_lab2rgb(labs, illuminant="D65", observer="2"):
+    """
+    Args:
+        lab    : (B, C, H, W)
+    Returns:
+        tuple   : (B, C, H, W)
+    """
+    illuminants = \
+        {"A": {'2': (1.098466069456375, 1, 0.3558228003436005),
+            '10': (1.111420406956693, 1, 0.3519978321919493)},
+        "D50": {'2': (0.9642119944211994, 1, 0.8251882845188288),
+                '10': (0.9672062750333777, 1, 0.8142801513128616)},
+        "D55": {'2': (0.956797052643698, 1, 0.9214805860173273),
+                '10': (0.9579665682254781, 1, 0.9092525159847462)},
+        "D65": {'2': (0.95047, 1., 1.08883),  # This was: `lab_ref_white`
+                '10': (0.94809667673716, 1, 1.0730513595166162)},
+        "D75": {'2': (0.9497220898840717, 1, 1.226393520724154),
+                '10': (0.9441713925645873, 1, 1.2064272211720228)},
+        "E": {'2': (1.0, 1.0, 1.0),
+            '10': (1.0, 1.0, 1.0)}}
+    xyz_from_rgb = np.array([[0.412453, 0.357580, 0.180423], [0.212671, 0.715160, 0.072169],
+                            [0.019334, 0.119193, 0.950227]])
+    rgb_from_xyz = np.array([[3.240481340, -0.96925495, 0.055646640], [-1.53715152, 1.875990000, -0.20404134],
+                            [-0.49853633, 0.041555930, 1.057311070]])
+    B, C, H, W = labs.shape
+    arrs = labs.permute((0, 2, 3, 1)).contiguous()  # (B, 3, H, W) -> (B, H, W, 3)
+    L, a, b = arrs[:, :, :, 0:1], arrs[:, :, :, 1:2], arrs[:, :, :, 2:]
+    y = (L + 16.) / 116.
+    x = (a / 500.) + y
+    z = y - (b / 200.)
+    invalid = z.data < 0
+    z[invalid] = 0
+    xyz = torch.cat([x, y, z], dim=3)
+    mask = xyz.data > 0.2068966
+    mask_xyz = xyz.clone()
+    mask_xyz[mask] = torch.pow(xyz[mask], 3.0)
+    mask_xyz[~mask] = (xyz[~mask] - 16.0 / 116.) / 7.787
+    xyz_ref_white = illuminants[illuminant][observer]
+    for i in range(C):
+        mask_xyz[:, :, :, i] = mask_xyz[:, :, :, i] * xyz_ref_white[i]
+    rgb_trans = torch.mm(mask_xyz.view(-1, 3), torch.from_numpy(rgb_from_xyz).type_as(xyz)).view(B, H, W, C)
+    rgb = rgb_trans.permute((0, 3, 1, 2)).contiguous()
+    mask = rgb.data > 0.0031308
+    mask_rgb = rgb.clone()
+    mask_rgb[mask] = 1.055 * torch.pow(rgb[mask], 1 / 2.4) - 0.055
+    mask_rgb[~mask] = rgb[~mask] * 12.92
+    neg_mask = mask_rgb.data < 0
+    large_mask = mask_rgb.data > 1
+    mask_rgb[neg_mask] = 0
+    mask_rgb[large_mask] = 1
+    return mask_rgb

basicsr/archs/discriminator_arch.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import torch
+import torch.nn as nn
+from torchvision import models
+import numpy as np
+from basicsr.archs.ddcolor_arch_utils.unet import _conv
+from basicsr.utils.registry import ARCH_REGISTRY
+@ARCH_REGISTRY.register()
+class DynamicUNetDiscriminator(nn.Module):
+    def __init__(self, n_channels: int = 3, nf: int = 256, n_blocks: int = 3):
+        super().__init__()
+        layers = [_conv(n_channels, nf, ks=4, stride=2)]
+        for i in range(n_blocks):
+            layers += [
+                _conv(nf, nf, ks=3, stride=1),
+                _conv(nf, nf * 2, ks=4, stride=2, self_attention=(i == 0)),
+            ]
+            nf *= 2
+        layers += [_conv(nf, nf, ks=3, stride=1), _conv(nf, 1, ks=4, bias=False, padding=0, use_activ=False)]
+        self.layers = nn.Sequential(*layers)
+    def forward(self, x):
+        out = self.layers(x)
+        out = out.view(out.size(0), -1)
+        return out

basicsr/archs/vgg_arch.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import os
+import torch
+from collections import OrderedDict
+from torch import nn as nn
+from torchvision.models import vgg as vgg
+from basicsr.utils.registry import ARCH_REGISTRY
+VGG_PRETRAIN_PATH = {
+    'vgg19': './pretrain/vgg19-dcbb9e9d.pth',
+    'vgg16_bn': './pretrain/vgg16_bn-6c64b313.pth'
+}
+NAMES = {
+    'vgg11': [
+        'conv1_1', 'relu1_1', 'pool1', 'conv2_1', 'relu2_1', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2',
+        'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2',
+        'pool5'
+    ],
+    'vgg13': [
+        'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
+        'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'pool4',
+        'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'pool5'
+    ],
+    'vgg16': [
+        'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
+        'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2',
+        'relu4_2', 'conv4_3', 'relu4_3', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3',
+        'pool5'
+    ],
+    'vgg19': [
+        'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
+        'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 'conv4_1',
+        'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 'conv5_1', 'relu5_1',
+        'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4', 'pool5'
+    ]
+}
+def insert_bn(names):
+    """Insert bn layer after each conv.
+    Args:
+        names (list): The list of layer names.
+    Returns:
+        list: The list of layer names with bn layers.
+    """
+    names_bn = []
+    for name in names:
+        names_bn.append(name)
+        if 'conv' in name:
+            position = name.replace('conv', '')
+            names_bn.append('bn' + position)
+    return names_bn
+@ARCH_REGISTRY.register()
+class VGGFeatureExtractor(nn.Module):
+    """VGG network for feature extraction.
+    In this implementation, we allow users to choose whether use normalization
+    in the input feature and the type of vgg network. Note that the pretrained
+    path must fit the vgg type.
+    Args:
+        layer_name_list (list[str]): Forward function returns the corresponding
+            features according to the layer_name_list.
+            Example: {'relu1_1', 'relu2_1', 'relu3_1'}.
+        vgg_type (str): Set the type of vgg network. Default: 'vgg19'.
+        use_input_norm (bool): If True, normalize the input image. Importantly,
+            the input feature must in the range [0, 1]. Default: True.
+        range_norm (bool): If True, norm images with range [-1, 1] to [0, 1].
+            Default: False.
+        requires_grad (bool): If true, the parameters of VGG network will be
+            optimized. Default: False.
+        remove_pooling (bool): If true, the max pooling operations in VGG net
+            will be removed. Default: False.
+        pooling_stride (int): The stride of max pooling operation. Default: 2.
+    """
+    def __init__(self,
+                 layer_name_list,
+                 vgg_type='vgg19',
+                 use_input_norm=True,
+                 range_norm=False,
+                 requires_grad=False,
+                 remove_pooling=False,
+                 pooling_stride=2):
+        super(VGGFeatureExtractor, self).__init__()
+        self.layer_name_list = layer_name_list
+        self.use_input_norm = use_input_norm
+        self.range_norm = range_norm
+        self.names = NAMES[vgg_type.replace('_bn', '')]
+        if 'bn' in vgg_type:
+            self.names = insert_bn(self.names)
+        # only borrow layers that will be used to avoid unused params
+        max_idx = 0
+        for v in layer_name_list:
+            idx = self.names.index(v)
+            if idx > max_idx:
+                max_idx = idx
+        if os.path.exists(VGG_PRETRAIN_PATH[vgg_type]):
+            vgg_net = getattr(vgg, vgg_type)(pretrained=False)
+            state_dict = torch.load(VGG_PRETRAIN_PATH[vgg_type], map_location=lambda storage, loc: storage)
+            vgg_net.load_state_dict(state_dict)
+        else:
+            vgg_net = getattr(vgg, vgg_type)(pretrained=True)
+        features = vgg_net.features[:max_idx + 1]
+        modified_net = OrderedDict()
+        for k, v in zip(self.names, features):
+            if 'pool' in k:
+                # if remove_pooling is true, pooling operation will be removed
+                if remove_pooling:
+                    continue
+                else:
+                    # in some cases, we may want to change the default stride
+                    modified_net[k] = nn.MaxPool2d(kernel_size=2, stride=pooling_stride)
+            else:
+                modified_net[k] = v
+        self.vgg_net = nn.Sequential(modified_net)
+        if not requires_grad:
+            self.vgg_net.eval()
+            for param in self.parameters():
+                param.requires_grad = False
+        else:
+            self.vgg_net.train()
+            for param in self.parameters():
+                param.requires_grad = True
+        if self.use_input_norm:
+            # the mean is for image with range [0, 1]
+            self.register_buffer('mean', torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1))
+            # the std is for image with range [0, 1]
+            self.register_buffer('std', torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1))
+    def forward(self, x):
+        """Forward function.
+        Args:
+            x (Tensor): Input tensor with shape (n, c, h, w).
+        Returns:
+            Tensor: Forward results.
+        """
+        if self.range_norm:
+            x = (x + 1) / 2
+        if self.use_input_norm:
+            x = (x - self.mean) / self.std
+        output = {}
+        for key, layer in self.vgg_net._modules.items():
+            x = layer(x)
+            if key in self.layer_name_list:
+                output[key] = x.clone()
+        return output

basicsr/data/__init__.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import importlib
+import numpy as np
+import random
+import torch
+import torch.utils.data
+from copy import deepcopy
+from functools import partial
+from os import path as osp
+from basicsr.data.prefetch_dataloader import PrefetchDataLoader
+from basicsr.utils import get_root_logger, scandir
+from basicsr.utils.dist_util import get_dist_info
+from basicsr.utils.registry import DATASET_REGISTRY
+__all__ = ['build_dataset', 'build_dataloader']
+# automatically scan and import dataset modules for registry
+# scan all the files under the data folder with '_dataset' in file names
+data_folder = osp.dirname(osp.abspath(__file__))
+dataset_filenames = [osp.splitext(osp.basename(v))[0] for v in scandir(data_folder) if v.endswith('_dataset.py')]
+# import all the dataset modules
+_dataset_modules = [importlib.import_module(f'basicsr.data.{file_name}') for file_name in dataset_filenames]
+def build_dataset(dataset_opt):
+    """Build dataset from options.
+    Args:
+        dataset_opt (dict): Configuration for dataset. It must contain:
+            name (str): Dataset name.
+            type (str): Dataset type.
+    """
+    dataset_opt = deepcopy(dataset_opt)
+    dataset = DATASET_REGISTRY.get(dataset_opt['type'])(dataset_opt)
+    logger = get_root_logger()
+    logger.info(f'Dataset [{dataset.__class__.__name__}] - {dataset_opt["name"]} ' 'is built.')
+    return dataset
+def build_dataloader(dataset, dataset_opt, num_gpu=1, dist=False, sampler=None, seed=None):
+    """Build dataloader.
+    Args:
+        dataset (torch.utils.data.Dataset): Dataset.
+        dataset_opt (dict): Dataset options. It contains the following keys:
+            phase (str): 'train' or 'val'.
+            num_worker_per_gpu (int): Number of workers for each GPU.
+            batch_size_per_gpu (int): Training batch size for each GPU.
+        num_gpu (int): Number of GPUs. Used only in the train phase.
+            Default: 1.
+        dist (bool): Whether in distributed training. Used only in the train
+            phase. Default: False.
+        sampler (torch.utils.data.sampler): Data sampler. Default: None.
+        seed (int | None): Seed. Default: None
+    """
+    phase = dataset_opt['phase']
+    rank, _ = get_dist_info()
+    if phase == 'train':
+        if dist:  # distributed training
+            batch_size = dataset_opt['batch_size_per_gpu']
+            num_workers = dataset_opt['num_worker_per_gpu']
+        else:  # non-distributed training
+            multiplier = 1 if num_gpu == 0 else num_gpu
+            batch_size = dataset_opt['batch_size_per_gpu'] * multiplier
+            num_workers = dataset_opt['num_worker_per_gpu'] * multiplier
+        dataloader_args = dict(
+            dataset=dataset,
+            batch_size=batch_size,
+            shuffle=False,
+            num_workers=num_workers,
+            sampler=sampler,
+            drop_last=True)
+        if sampler is None:
+            dataloader_args['shuffle'] = True
+        dataloader_args['worker_init_fn'] = partial(
+            worker_init_fn, num_workers=num_workers, rank=rank, seed=seed) if seed is not None else None
+    elif phase in ['val', 'test']:  # validation
+        dataloader_args = dict(dataset=dataset, batch_size=1, shuffle=False, num_workers=0)
+    else:
+        raise ValueError(f'Wrong dataset phase: {phase}. ' "Supported ones are 'train', 'val' and 'test'.")
+    dataloader_args['pin_memory'] = dataset_opt.get('pin_memory', False)
+    dataloader_args['persistent_workers'] = dataset_opt.get('persistent_workers', False)
+    prefetch_mode = dataset_opt.get('prefetch_mode')
+    if prefetch_mode == 'cpu':  # CPUPrefetcher
+        num_prefetch_queue = dataset_opt.get('num_prefetch_queue', 1)
+        logger = get_root_logger()
+        logger.info(f'Use {prefetch_mode} prefetch dataloader: num_prefetch_queue = {num_prefetch_queue}')
+        return PrefetchDataLoader(num_prefetch_queue=num_prefetch_queue, **dataloader_args)
+    else:
+        # prefetch_mode=None: Normal dataloader
+        # prefetch_mode='cuda': dataloader for CUDAPrefetcher
+        return torch.utils.data.DataLoader(**dataloader_args)
+def worker_init_fn(worker_id, num_workers, rank, seed):
+    # Set the worker seed to num_workers * rank + worker_id + seed
+    worker_seed = num_workers * rank + worker_id + seed
+    np.random.seed(worker_seed)
+    random.seed(worker_seed)

basicsr/data/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (3.58 kB). View file

basicsr/data/__pycache__/data_sampler.cpython-39.pyc ADDED Viewed

Binary file (2.14 kB). View file

basicsr/data/__pycache__/fmix.cpython-39.pyc ADDED Viewed

Binary file (7.01 kB). View file

basicsr/data/__pycache__/lab_dataset.cpython-39.pyc ADDED Viewed

Binary file (4.77 kB). View file

basicsr/data/__pycache__/prefetch_dataloader.cpython-39.pyc ADDED Viewed

Binary file (4.38 kB). View file

basicsr/data/__pycache__/transforms.cpython-39.pyc ADDED Viewed

Binary file (6.4 kB). View file

basicsr/data/data_sampler.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import math
+import torch
+from torch.utils.data.sampler import Sampler
+class EnlargedSampler(Sampler):
+    """Sampler that restricts data loading to a subset of the dataset.
+    Modified from torch.utils.data.distributed.DistributedSampler
+    Support enlarging the dataset for iteration-based training, for saving
+    time when restart the dataloader after each epoch
+    Args:
+        dataset (torch.utils.data.Dataset): Dataset used for sampling.
+        num_replicas (int | None): Number of processes participating in
+            the training. It is usually the world_size.
+        rank (int | None): Rank of the current process within num_replicas.
+        ratio (int): Enlarging ratio. Default: 1.
+    """
+    def __init__(self, dataset, num_replicas, rank, ratio=1):
+        self.dataset = dataset
+        self.num_replicas = num_replicas
+        self.rank = rank
+        self.epoch = 0
+        self.num_samples = math.ceil(len(self.dataset) * ratio / self.num_replicas)
+        self.total_size = self.num_samples * self.num_replicas
+    def __iter__(self):
+        # deterministically shuffle based on epoch
+        g = torch.Generator()
+        g.manual_seed(self.epoch)
+        indices = torch.randperm(self.total_size, generator=g).tolist()
+        dataset_size = len(self.dataset)
+        indices = [v % dataset_size for v in indices]
+        # subsample
+        indices = indices[self.rank:self.total_size:self.num_replicas]
+        assert len(indices) == self.num_samples
+        return iter(indices)
+    def __len__(self):
+        return self.num_samples
+    def set_epoch(self, epoch):
+        self.epoch = epoch

basicsr/data/data_util.py ADDED Viewed

	@@ -0,0 +1,313 @@

+import cv2
+import numpy as np
+import torch
+from os import path as osp
+from torch.nn import functional as F
+from basicsr.data.transforms import mod_crop
+from basicsr.utils import img2tensor, scandir
+def read_img_seq(path, require_mod_crop=False, scale=1, return_imgname=False):
+    """Read a sequence of images from a given folder path.
+    Args:
+        path (list[str] | str): List of image paths or image folder path.
+        require_mod_crop (bool): Require mod crop for each image.
+            Default: False.
+        scale (int): Scale factor for mod_crop. Default: 1.
+        return_imgname(bool): Whether return image names. Default False.
+    Returns:
+        Tensor: size (t, c, h, w), RGB, [0, 1].
+        list[str]: Returned image name list.
+    """
+    if isinstance(path, list):
+        img_paths = path
+    else:
+        img_paths = sorted(list(scandir(path, full_path=True)))
+    imgs = [cv2.imread(v).astype(np.float32) / 255. for v in img_paths]
+    if require_mod_crop:
+        imgs = [mod_crop(img, scale) for img in imgs]
+    imgs = img2tensor(imgs, bgr2rgb=True, float32=True)
+    imgs = torch.stack(imgs, dim=0)
+    if return_imgname:
+        imgnames = [osp.splitext(osp.basename(path))[0] for path in img_paths]
+        return imgs, imgnames
+    else:
+        return imgs
+def generate_frame_indices(crt_idx, max_frame_num, num_frames, padding='reflection'):
+    """Generate an index list for reading `num_frames` frames from a sequence
+    of images.
+    Args:
+        crt_idx (int): Current center index.
+        max_frame_num (int): Max number of the sequence of images (from 1).
+        num_frames (int): Reading num_frames frames.
+        padding (str): Padding mode, one of
+            'replicate' | 'reflection' | 'reflection_circle' | 'circle'
+            Examples: current_idx = 0, num_frames = 5
+            The generated frame indices under different padding mode:
+            replicate: [0, 0, 0, 1, 2]
+            reflection: [2, 1, 0, 1, 2]
+            reflection_circle: [4, 3, 0, 1, 2]
+            circle: [3, 4, 0, 1, 2]
+    Returns:
+        list[int]: A list of indices.
+    """
+    assert num_frames % 2 == 1, 'num_frames should be an odd number.'
+    assert padding in ('replicate', 'reflection', 'reflection_circle', 'circle'), f'Wrong padding mode: {padding}.'
+    max_frame_num = max_frame_num - 1  # start from 0
+    num_pad = num_frames // 2
+    indices = []
+    for i in range(crt_idx - num_pad, crt_idx + num_pad + 1):
+        if i < 0:
+            if padding == 'replicate':
+                pad_idx = 0
+            elif padding == 'reflection':
+                pad_idx = -i
+            elif padding == 'reflection_circle':
+                pad_idx = crt_idx + num_pad - i
+            else:
+                pad_idx = num_frames + i
+        elif i > max_frame_num:
+            if padding == 'replicate':
+                pad_idx = max_frame_num
+            elif padding == 'reflection':
+                pad_idx = max_frame_num * 2 - i
+            elif padding == 'reflection_circle':
+                pad_idx = (crt_idx - num_pad) - (i - max_frame_num)
+            else:
+                pad_idx = i - num_frames
+        else:
+            pad_idx = i
+        indices.append(pad_idx)
+    return indices
+def paired_paths_from_lmdb(folders, keys):
+    """Generate paired paths from lmdb files.
+    Contents of lmdb. Taking the `lq.lmdb` for example, the file structure is:
+    lq.lmdb
+    ├── data.mdb
+    ├── lock.mdb
+    ├── meta_info.txt
+    The data.mdb and lock.mdb are standard lmdb files and you can refer to
+    https://lmdb.readthedocs.io/en/release/ for more details.
+    The meta_info.txt is a specified txt file to record the meta information
+    of our datasets. It will be automatically created when preparing
+    datasets by our provided dataset tools.
+    Each line in the txt file records
+    1)image name (with extension),
+    2)image shape,
+    3)compression level, separated by a white space.
+    Example: `baboon.png (120,125,3) 1`
+    We use the image name without extension as the lmdb key.
+    Note that we use the same key for the corresponding lq and gt images.
+    Args:
+        folders (list[str]): A list of folder path. The order of list should
+            be [input_folder, gt_folder].
+        keys (list[str]): A list of keys identifying folders. The order should
+            be in consistent with folders, e.g., ['lq', 'gt'].
+            Note that this key is different from lmdb keys.
+    Returns:
+        list[str]: Returned path list.
+    """
+    assert len(folders) == 2, ('The len of folders should be 2 with [input_folder, gt_folder]. '
+                               f'But got {len(folders)}')
+    assert len(keys) == 2, f'The len of keys should be 2 with [input_key, gt_key]. But got {len(keys)}'
+    input_folder, gt_folder = folders
+    input_key, gt_key = keys
+    if not (input_folder.endswith('.lmdb') and gt_folder.endswith('.lmdb')):
+        raise ValueError(f'{input_key} folder and {gt_key} folder should both in lmdb '
+                         f'formats. But received {input_key}: {input_folder}; '
+                         f'{gt_key}: {gt_folder}')
+    # ensure that the two meta_info files are the same
+    with open(osp.join(input_folder, 'meta_info.txt')) as fin:
+        input_lmdb_keys = [line.split('.')[0] for line in fin]
+    with open(osp.join(gt_folder, 'meta_info.txt')) as fin:
+        gt_lmdb_keys = [line.split('.')[0] for line in fin]
+    if set(input_lmdb_keys) != set(gt_lmdb_keys):
+        raise ValueError(f'Keys in {input_key}_folder and {gt_key}_folder are different.')
+    else:
+        paths = []
+        for lmdb_key in sorted(input_lmdb_keys):
+            paths.append(dict([(f'{input_key}_path', lmdb_key), (f'{gt_key}_path', lmdb_key)]))
+        return paths
+def paired_paths_from_meta_info_file(folders, keys, meta_info_file, filename_tmpl):
+    """Generate paired paths from an meta information file.
+    Each line in the meta information file contains the image names and
+    image shape (usually for gt), separated by a white space.
+    Example of an meta information file:
+    ```
+    0001_s001.png (480,480,3)
+    0001_s002.png (480,480,3)
+    ```
+    Args:
+        folders (list[str]): A list of folder path. The order of list should
+            be [input_folder, gt_folder].
+        keys (list[str]): A list of keys identifying folders. The order should
+            be in consistent with folders, e.g., ['lq', 'gt'].
+        meta_info_file (str): Path to the meta information file.
+        filename_tmpl (str): Template for each filename. Note that the
+            template excludes the file extension. Usually the filename_tmpl is
+            for files in the input folder.
+    Returns:
+        list[str]: Returned path list.
+    """
+    assert len(folders) == 2, ('The len of folders should be 2 with [input_folder, gt_folder]. '
+                               f'But got {len(folders)}')
+    assert len(keys) == 2, f'The len of keys should be 2 with [input_key, gt_key]. But got {len(keys)}'
+    input_folder, gt_folder = folders
+    input_key, gt_key = keys
+    with open(meta_info_file, 'r') as fin:
+        gt_names = [line.split(' ')[0] for line in fin]
+    paths = []
+    for gt_name in gt_names:
+        basename, ext = osp.splitext(osp.basename(gt_name))
+        input_name = f'{filename_tmpl.format(basename)}{ext}'
+        input_path = osp.join(input_folder, input_name)
+        gt_path = osp.join(gt_folder, gt_name)
+        paths.append(dict([(f'{input_key}_path', input_path), (f'{gt_key}_path', gt_path)]))
+    return paths
+def paired_paths_from_folder(folders, keys, filename_tmpl):
+    """Generate paired paths from folders.
+    Args:
+        folders (list[str]): A list of folder path. The order of list should
+            be [input_folder, gt_folder].
+        keys (list[str]): A list of keys identifying folders. The order should
+            be in consistent with folders, e.g., ['lq', 'gt'].
+        filename_tmpl (str): Template for each filename. Note that the
+            template excludes the file extension. Usually the filename_tmpl is
+            for files in the input folder.
+    Returns:
+        list[str]: Returned path list.
+    """
+    assert len(folders) == 2, ('The len of folders should be 2 with [input_folder, gt_folder]. '
+                               f'But got {len(folders)}')
+    assert len(keys) == 2, f'The len of keys should be 2 with [input_key, gt_key]. But got {len(keys)}'
+    input_folder, gt_folder = folders
+    input_key, gt_key = keys
+    input_paths = list(scandir(input_folder))
+    gt_paths = list(scandir(gt_folder))
+    assert len(input_paths) == len(gt_paths), (f'{input_key} and {gt_key} datasets have different number of images: '
+                                               f'{len(input_paths)}, {len(gt_paths)}.')
+    paths = []
+    for gt_path in gt_paths:
+        basename, ext = osp.splitext(osp.basename(gt_path))
+        input_name = f'{filename_tmpl.format(basename)}{ext}'
+        input_path = osp.join(input_folder, input_name)
+        assert input_name in input_paths, f'{input_name} is not in {input_key}_paths.'
+        gt_path = osp.join(gt_folder, gt_path)
+        paths.append(dict([(f'{input_key}_path', input_path), (f'{gt_key}_path', gt_path)]))
+    return paths
+def paths_from_folder(folder):
+    """Generate paths from folder.
+    Args:
+        folder (str): Folder path.
+    Returns:
+        list[str]: Returned path list.
+    """
+    paths = list(scandir(folder))
+    paths = [osp.join(folder, path) for path in paths]
+    return paths
+def paths_from_lmdb(folder):
+    """Generate paths from lmdb.
+    Args:
+        folder (str): Folder path.
+    Returns:
+        list[str]: Returned path list.
+    """
+    if not folder.endswith('.lmdb'):
+        raise ValueError(f'Folder {folder}folder should in lmdb format.')
+    with open(osp.join(folder, 'meta_info.txt')) as fin:
+        paths = [line.split('.')[0] for line in fin]
+    return paths
+def generate_gaussian_kernel(kernel_size=13, sigma=1.6):
+    """Generate Gaussian kernel used in `duf_downsample`.
+    Args:
+        kernel_size (int): Kernel size. Default: 13.
+        sigma (float): Sigma of the Gaussian kernel. Default: 1.6.
+    Returns:
+        np.array: The Gaussian kernel.
+    """
+    from scipy.ndimage import filters as filters
+    kernel = np.zeros((kernel_size, kernel_size))
+    # set element at the middle to one, a dirac delta
+    kernel[kernel_size // 2, kernel_size // 2] = 1
+    # gaussian-smooth the dirac, resulting in a gaussian filter
+    return filters.gaussian_filter(kernel, sigma)
+def duf_downsample(x, kernel_size=13, scale=4):
+    """Downsamping with Gaussian kernel used in the DUF official code.
+    Args:
+        x (Tensor): Frames to be downsampled, with shape (b, t, c, h, w).
+        kernel_size (int): Kernel size. Default: 13.
+        scale (int): Downsampling factor. Supported scale: (2, 3, 4).
+            Default: 4.
+    Returns:
+        Tensor: DUF downsampled frames.
+    """
+    assert scale in (2, 3, 4), f'Only support scale (2, 3, 4), but got {scale}.'
+    squeeze_flag = False
+    if x.ndim == 4:
+        squeeze_flag = True
+        x = x.unsqueeze(0)
+    b, t, c, h, w = x.size()
+    x = x.view(-1, 1, h, w)
+    pad_w, pad_h = kernel_size // 2 + scale * 2, kernel_size // 2 + scale * 2
+    x = F.pad(x, (pad_w, pad_w, pad_h, pad_h), 'reflect')
+    gaussian_filter = generate_gaussian_kernel(kernel_size, 0.4 * scale)
+    gaussian_filter = torch.from_numpy(gaussian_filter).type_as(x).unsqueeze(0).unsqueeze(0)
+    x = F.conv2d(x, gaussian_filter, stride=scale)
+    x = x[:, :, 2:-2, 2:-2]
+    x = x.view(b, t, c, x.size(2), x.size(3))
+    if squeeze_flag:
+        x = x.squeeze(0)
+    return x

basicsr/data/fmix.py ADDED Viewed

	@@ -0,0 +1,206 @@

+'''
+Fmix paper from arxiv: https://arxiv.org/abs/2002.12047
+Fmix code from github : https://github.com/ecs-vlc/FMix
+'''
+import math
+import random
+import numpy as np
+from scipy.stats import beta
+def fftfreqnd(h, w=None, z=None):
+    """ Get bin values for discrete fourier transform of size (h, w, z)
+    :param h: Required, first dimension size
+    :param w: Optional, second dimension size
+    :param z: Optional, third dimension size
+    """
+    fz = fx = 0
+    fy = np.fft.fftfreq(h)
+    if w is not None:
+        fy = np.expand_dims(fy, -1)
+        if w % 2 == 1:
+            fx = np.fft.fftfreq(w)[: w // 2 + 2]
+        else:
+            fx = np.fft.fftfreq(w)[: w // 2 + 1]
+    if z is not None:
+        fy = np.expand_dims(fy, -1)
+        if z % 2 == 1:
+            fz = np.fft.fftfreq(z)[:, None]
+        else:
+            fz = np.fft.fftfreq(z)[:, None]
+    return np.sqrt(fx * fx + fy * fy + fz * fz)
+def get_spectrum(freqs, decay_power, ch, h, w=0, z=0):
+    """ Samples a fourier image with given size and frequencies decayed by decay power
+    :param freqs: Bin values for the discrete fourier transform
+    :param decay_power: Decay power for frequency decay prop 1/f**d
+    :param ch: Number of channels for the resulting mask
+    :param h: Required, first dimension size
+    :param w: Optional, second dimension size
+    :param z: Optional, third dimension size
+    """
+    scale = np.ones(1) / (np.maximum(freqs, np.array([1. / max(w, h, z)])) ** decay_power)
+    param_size = [ch] + list(freqs.shape) + [2]
+    param = np.random.randn(*param_size)
+    scale = np.expand_dims(scale, -1)[None, :]
+    return scale * param
+def make_low_freq_image(decay, shape, ch=1):
+    """ Sample a low frequency image from fourier space
+    :param decay_power: Decay power for frequency decay prop 1/f**d
+    :param shape: Shape of desired mask, list up to 3 dims
+    :param ch: Number of channels for desired mask
+    """
+    freqs = fftfreqnd(*shape)
+    spectrum = get_spectrum(freqs, decay, ch, *shape)#.reshape((1, *shape[:-1], -1))
+    spectrum = spectrum[:, 0] + 1j * spectrum[:, 1]
+    mask = np.real(np.fft.irfftn(spectrum, shape))
+    if len(shape) == 1:
+        mask = mask[:1, :shape[0]]
+    if len(shape) == 2:
+        mask = mask[:1, :shape[0], :shape[1]]
+    if len(shape) == 3:
+        mask = mask[:1, :shape[0], :shape[1], :shape[2]]
+    mask = mask
+    mask = (mask - mask.min())
+    mask = mask / mask.max()
+    return mask
+def sample_lam(alpha, reformulate=False):
+    """ Sample a lambda from symmetric beta distribution with given alpha
+    :param alpha: Alpha value for beta distribution
+    :param reformulate: If True, uses the reformulation of [1].
+    """
+    if reformulate:
+        lam = beta.rvs(alpha+1, alpha) # rvs(arg1,arg2,loc=期望, scale=标准差, size=生成随机数的个数) 从分布中生成指定个数的随机数
+    else:
+        lam = beta.rvs(alpha, alpha)   # rvs(arg1,arg2,loc=期望, scale=标准差, size=生成随机数的个数) 从分布中生成指定个数的随机数
+    return lam
+def binarise_mask(mask, lam, in_shape, max_soft=0.0):
+    """ Binarises a given low frequency image such that it has mean lambda.
+    :param mask: Low frequency image, usually the result of `make_low_freq_image`
+    :param lam: Mean value of final mask
+    :param in_shape: Shape of inputs
+    :param max_soft: Softening value between 0 and 0.5 which smooths hard edges in the mask.
+    :return:
+    """
+    idx = mask.reshape(-1).argsort()[::-1]
+    mask = mask.reshape(-1)
+    num = math.ceil(lam * mask.size) if random.random() > 0.5 else math.floor(lam * mask.size)
+    eff_soft = max_soft
+    if max_soft > lam or max_soft > (1-lam):
+        eff_soft = min(lam, 1-lam)
+    soft = int(mask.size * eff_soft)
+    num_low = num - soft
+    num_high = num + soft
+    mask[idx[:num_high]] = 1
+    mask[idx[num_low:]] = 0
+    mask[idx[num_low:num_high]] = np.linspace(1, 0, (num_high - num_low))
+    mask = mask.reshape((1, *in_shape))
+    return mask
+def sample_mask(alpha, decay_power, shape, max_soft=0.0, reformulate=False):
+    """ Samples a mean lambda from beta distribution parametrised by alpha, creates a low frequency image and binarises
+    it based on this lambda
+    :param alpha: Alpha value for beta distribution from which to sample mean of mask
+    :param decay_power: Decay power for frequency decay prop 1/f**d
+    :param shape: Shape of desired mask, list up to 3 dims
+    :param max_soft: Softening value between 0 and 0.5 which smooths hard edges in the mask.
+    :param reformulate: If True, uses the reformulation of [1].
+    """
+    if isinstance(shape, int):
+        shape = (shape,)
+    # Choose lambda
+    lam = sample_lam(alpha, reformulate)
+    # Make mask, get mean / std
+    mask = make_low_freq_image(decay_power, shape)
+    mask = binarise_mask(mask, lam, shape, max_soft)
+    return lam, mask
+def sample_and_apply(x, alpha, decay_power, shape, max_soft=0.0, reformulate=False):
+    """
+    :param x: Image batch on which to apply fmix of shape [b, c, shape*]
+    :param alpha: Alpha value for beta distribution from which to sample mean of mask
+    :param decay_power: Decay power for frequency decay prop 1/f**d
+    :param shape: Shape of desired mask, list up to 3 dims
+    :param max_soft: Softening value between 0 and 0.5 which smooths hard edges in the mask.
+    :param reformulate: If True, uses the reformulation of [1].
+    :return: mixed input, permutation indices, lambda value of mix,
+    """
+    lam, mask = sample_mask(alpha, decay_power, shape, max_soft, reformulate)
+    index = np.random.permutation(x.shape[0])
+    x1, x2 = x * mask, x[index] * (1-mask)
+    return x1+x2, index, lam
+class FMixBase:
+    """ FMix augmentation
+        Args:
+            decay_power (float): Decay power for frequency decay prop 1/f**d
+            alpha (float): Alpha value for beta distribution from which to sample mean of mask
+            size ([int] | [int, int] | [int, int, int]): Shape of desired mask, list up to 3 dims
+            max_soft (float): Softening value between 0 and 0.5 which smooths hard edges in the mask.
+            reformulate (bool): If True, uses the reformulation of [1].
+    """
+    def __init__(self, decay_power=3, alpha=1, size=(32, 32), max_soft=0.0, reformulate=False):
+        super().__init__()
+        self.decay_power = decay_power
+        self.reformulate = reformulate
+        self.size = size
+        self.alpha = alpha
+        self.max_soft = max_soft
+        self.index = None
+        self.lam = None
+    def __call__(self, x):
+        raise NotImplementedError
+    def loss(self, *args, **kwargs):
+        raise NotImplementedError
+if __name__ == '__main__':
+    # para = {'alpha':1.,'decay_power':3.,'shape':(10,10),'max_soft':0.0,'reformulate':False}
+    # lam, mask = sample_mask(**para)
+    # mask = mask.transpose(1, 2, 0)
+    # img1 = np.zeros((10, 10, 3))
+    # img2 = np.ones((10, 10, 3))
+    # img_gt = mask * img1 + (1. - mask) * img2
+    # import ipdb; ipdb.set_trace()
+    # test
+    import cv2
+    i1 = cv2.imread('output/ILSVRC2012_val_00000001.JPEG')
+    i2 = cv2.imread('output/ILSVRC2012_val_00000002.JPEG')
+    para = {'alpha':1.,'decay_power':3.,'shape':(256, 256),'max_soft':0.0,'reformulate':False}
+    lam, mask = sample_mask(**para)
+    mask = mask.transpose(1, 2, 0)
+    i = mask * i1 + (1. - mask) * i2
+    #i = i.astype(np.uint8)
+    cv2.imwrite('fmix.jpg', i)

basicsr/data/lab_dataset.py ADDED Viewed

	@@ -0,0 +1,159 @@

+import cv2
+import random
+import time
+import numpy as np
+import torch
+from torch.utils import data as data
+from basicsr.data.transforms import rgb2lab
+from basicsr.utils import FileClient, get_root_logger, imfrombytes, img2tensor
+from basicsr.utils.registry import DATASET_REGISTRY
+from basicsr.data.fmix import sample_mask
+@DATASET_REGISTRY.register()
+class LabDataset(data.Dataset):
+    """
+    Dataset used for Lab colorizaion
+    """
+    def __init__(self, opt):
+        super(LabDataset, self).__init__()
+        self.opt = opt
+        # file client (io backend)
+        self.file_client = None
+        self.io_backend_opt = opt['io_backend']
+        self.gt_folder = opt['dataroot_gt']
+        meta_info_file = self.opt['meta_info_file']
+        assert meta_info_file is not None
+        if not isinstance(meta_info_file, list):
+            meta_info_file = [meta_info_file]
+        self.paths = []
+        for meta_info in meta_info_file:
+            with open(meta_info, 'r') as fin:
+                self.paths.extend([line.strip() for line in fin])
+        self.min_ab, self.max_ab = -128, 128
+        self.interval_ab = 4
+        self.ab_palette = [i for i in range(self.min_ab, self.max_ab + self.interval_ab, self.interval_ab)]
+        # print(self.ab_palette)
+        self.do_fmix = opt['do_fmix']
+        self.fmix_params = {'alpha':1.,'decay_power':3.,'shape':(256,256),'max_soft':0.0,'reformulate':False}
+        self.fmix_p = opt['fmix_p']
+        self.do_cutmix = opt['do_cutmix']
+        self.cutmix_params = {'alpha':1.}
+        self.cutmix_p = opt['cutmix_p']
+    def __getitem__(self, index):
+        if self.file_client is None:
+            self.file_client = FileClient(self.io_backend_opt.pop('type'), **self.io_backend_opt)
+        # -------------------------------- Load gt images -------------------------------- #
+        # Shape: (h, w, c); channel order: BGR; image range: [0, 1], float32.
+        gt_path = self.paths[index]
+        gt_size = self.opt['gt_size']
+        # avoid errors caused by high latency in reading files
+        retry = 3
+        while retry > 0:
+            try:
+                img_bytes = self.file_client.get(gt_path, 'gt')
+            except Exception as e:
+                logger = get_root_logger()
+                logger.warn(f'File client error: {e}, remaining retry times: {retry - 1}')
+                # change another file to read
+                index = random.randint(0, self.__len__())
+                gt_path = self.paths[index]
+                time.sleep(1)  # sleep 1s for occasional server congestion
+            else:
+                break
+            finally:
+                retry -= 1
+        img_gt = imfrombytes(img_bytes, float32=True)
+        img_gt = cv2.resize(img_gt, (gt_size, gt_size))  # TODO: 直接resize是否是最佳方案？
+        # -------------------------------- (Optional) CutMix & FMix -------------------------------- #
+        if self.do_fmix and np.random.uniform(0., 1., size=1)[0] > self.fmix_p:
+            with torch.no_grad():
+                lam, mask = sample_mask(**self.fmix_params)
+                fmix_index = random.randint(0, self.__len__())
+                fmix_img_path = self.paths[fmix_index]
+                fmix_img_bytes = self.file_client.get(fmix_img_path, 'gt')
+                fmix_img = imfrombytes(fmix_img_bytes, float32=True)
+                fmix_img = cv2.resize(fmix_img, (gt_size, gt_size))
+                mask = mask.transpose(1, 2, 0)  # (1, 256, 256) ->  # (256, 256, 1)
+                img_gt = mask * img_gt + (1. - mask) * fmix_img
+                img_gt = img_gt.astype(np.float32)
+        if self.do_cutmix and np.random.uniform(0., 1., size=1)[0] > self.cutmix_p:
+            with torch.no_grad():
+                cmix_index = random.randint(0, self.__len__())
+                cmix_img_path = self.paths[cmix_index]
+                cmix_img_bytes = self.file_client.get(cmix_img_path, 'gt')
+                cmix_img = imfrombytes(cmix_img_bytes, float32=True)
+                cmix_img = cv2.resize(cmix_img, (gt_size, gt_size))
+                lam = np.clip(np.random.beta(self.cutmix_params['alpha'], self.cutmix_params['alpha']), 0.3, 0.4)
+                bbx1, bby1, bbx2, bby2 = rand_bbox(cmix_img.shape[:2], lam)
+                img_gt[:, bbx1:bbx2, bby1:bby2] = cmix_img[:, bbx1:bbx2, bby1:bby2]
+        # ----------------------------- Get gray lq, to tentor ----------------------------- #
+        # convert to gray
+        img_gt = cv2.cvtColor(img_gt, cv2.COLOR_BGR2RGB)
+        img_l, img_ab = rgb2lab(img_gt)
+        target_a, target_b = self.ab2int(img_ab)
+        # numpy to tensor
+        img_l, img_ab = img2tensor([img_l, img_ab], bgr2rgb=False, float32=True)
+        target_a, target_b = torch.LongTensor(target_a), torch.LongTensor(target_b)
+        return_d = {
+            'lq': img_l,
+            'gt': img_ab,
+            'target_a': target_a,
+            'target_b': target_b,
+            'lq_path': gt_path,
+            'gt_path': gt_path
+        }
+        return return_d
+    def ab2int(self, img_ab):
+        img_a, img_b = img_ab[:, :, 0], img_ab[:, :, 1]
+        int_a = (img_a - self.min_ab) / self.interval_ab
+        int_b = (img_b - self.min_ab) / self.interval_ab
+        return np.round(int_a), np.round(int_b)
+    def __len__(self):
+        return len(self.paths)
+def rand_bbox(size, lam):
+    '''cutmix 的 bbox 截取函数
+    Args:
+        size : tuple 图片尺寸 e.g (256,256)
+        lam  : float 截取比例
+    Returns:
+        bbox 的左上角和右下角坐标
+        int,int,int,int
+    '''
+    W = size[0]  # 截取图片的宽度
+    H = size[1]  # 截取图片的高度
+    cut_rat = np.sqrt(1. - lam)  # 需要截取的 bbox 比例
+    cut_w = np.int(W * cut_rat)  # 需要截取的 bbox 宽度
+    cut_h = np.int(H * cut_rat)  # 需要截取的 bbox 高度
+    cx = np.random.randint(W)  # 均匀分布采样，随机选择截取的 bbox 的中心点 x 坐标
+    cy = np.random.randint(H)  # 均匀分布采样，随机选择截取的 bbox 的中心点 y 坐标
+    bbx1 = np.clip(cx - cut_w // 2, 0, W)  # 左上角 x 坐标
+    bby1 = np.clip(cy - cut_h // 2, 0, H)  # 左上角 y 坐标
+    bbx2 = np.clip(cx + cut_w // 2, 0, W)  # 右下角 x 坐标
+    bby2 = np.clip(cy + cut_h // 2, 0, H)  # 右下角 y 坐标
+    return bbx1, bby1, bbx2, bby2

basicsr/data/prefetch_dataloader.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import queue as Queue
+import threading
+import torch
+from torch.utils.data import DataLoader
+class PrefetchGenerator(threading.Thread):
+    """A general prefetch generator.
+    Ref:
+    https://stackoverflow.com/questions/7323664/python-generator-pre-fetch
+    Args:
+        generator: Python generator.
+        num_prefetch_queue (int): Number of prefetch queue.
+    """
+    def __init__(self, generator, num_prefetch_queue):
+        threading.Thread.__init__(self)
+        self.queue = Queue.Queue(num_prefetch_queue)
+        self.generator = generator
+        self.daemon = True
+        self.start()
+    def run(self):
+        for item in self.generator:
+            self.queue.put(item)
+        self.queue.put(None)
+    def __next__(self):
+        next_item = self.queue.get()
+        if next_item is None:
+            raise StopIteration
+        return next_item
+    def __iter__(self):
+        return self
+class PrefetchDataLoader(DataLoader):
+    """Prefetch version of dataloader.
+    Ref:
+    https://github.com/IgorSusmelj/pytorch-styleguide/issues/5#
+    TODO:
+    Need to test on single gpu and ddp (multi-gpu). There is a known issue in
+    ddp.
+    Args:
+        num_prefetch_queue (int): Number of prefetch queue.
+        kwargs (dict): Other arguments for dataloader.
+    """
+    def __init__(self, num_prefetch_queue, **kwargs):
+        self.num_prefetch_queue = num_prefetch_queue
+        super(PrefetchDataLoader, self).__init__(**kwargs)
+    def __iter__(self):
+        return PrefetchGenerator(super().__iter__(), self.num_prefetch_queue)
+class CPUPrefetcher():
+    """CPU prefetcher.
+    Args:
+        loader: Dataloader.
+    """
+    def __init__(self, loader):
+        self.ori_loader = loader
+        self.loader = iter(loader)
+    def next(self):
+        try:
+            return next(self.loader)
+        except StopIteration:
+            return None
+    def reset(self):
+        self.loader = iter(self.ori_loader)
+class CUDAPrefetcher():
+    """CUDA prefetcher.
+    Ref:
+    https://github.com/NVIDIA/apex/issues/304#
+    It may consums more GPU memory.
+    Args:
+        loader: Dataloader.
+        opt (dict): Options.
+    """
+    def __init__(self, loader, opt):
+        self.ori_loader = loader
+        self.loader = iter(loader)
+        self.opt = opt
+        self.stream = torch.cuda.Stream()
+        self.device = torch.device('cuda' if opt['num_gpu'] != 0 else 'cpu')
+        self.preload()
+    def preload(self):
+        try:
+            self.batch = next(self.loader)  # self.batch is a dict
+        except StopIteration:
+            self.batch = None
+            return None
+        # put tensors to gpu
+        with torch.cuda.stream(self.stream):
+            for k, v in self.batch.items():
+                if torch.is_tensor(v):
+                    self.batch[k] = self.batch[k].to(device=self.device, non_blocking=True)
+    def next(self):
+        torch.cuda.current_stream().wait_stream(self.stream)
+        batch = self.batch
+        self.preload()
+        return batch
+    def reset(self):
+        self.loader = iter(self.ori_loader)
+        self.preload()

basicsr/data/transforms.py ADDED Viewed

	@@ -0,0 +1,192 @@

+import cv2
+import numpy as np
+import torch
+import random
+from scipy import special
+from skimage import color
+def mod_crop(img, scale):
+    """Mod crop images, used during testing.
+    Args:
+        img (ndarray): Input image.
+        scale (int): Scale factor.
+    Returns:
+        ndarray: Result image.
+    """
+    img = img.copy()
+    if img.ndim in (2, 3):
+        h, w = img.shape[0], img.shape[1]
+        h_remainder, w_remainder = h % scale, w % scale
+        img = img[:h - h_remainder, :w - w_remainder, ...]
+    else:
+        raise ValueError(f'Wrong img ndim: {img.ndim}.')
+    return img
+def paired_random_crop(img_gts, img_lqs, gt_patch_size, scale, gt_path=None):
+    """Paired random crop. Support Numpy array and Tensor inputs.
+    It crops lists of lq and gt images with corresponding locations.
+    Args:
+        img_gts (list[ndarray] | ndarray | list[Tensor] | Tensor): GT images. Note that all images
+            should have the same shape. If the input is an ndarray, it will
+            be transformed to a list containing itself.
+        img_lqs (list[ndarray] | ndarray): LQ images. Note that all images
+            should have the same shape. If the input is an ndarray, it will
+            be transformed to a list containing itself.
+        gt_patch_size (int): GT patch size.
+        scale (int): Scale factor.
+        gt_path (str): Path to ground-truth. Default: None.
+    Returns:
+        list[ndarray] | ndarray: GT images and LQ images. If returned results
+            only have one element, just return ndarray.
+    """
+    if not isinstance(img_gts, list):
+        img_gts = [img_gts]
+    if not isinstance(img_lqs, list):
+        img_lqs = [img_lqs]
+    # determine input type: Numpy array or Tensor
+    input_type = 'Tensor' if torch.is_tensor(img_gts[0]) else 'Numpy'
+    if input_type == 'Tensor':
+        h_lq, w_lq = img_lqs[0].size()[-2:]
+        h_gt, w_gt = img_gts[0].size()[-2:]
+    else:
+        h_lq, w_lq = img_lqs[0].shape[0:2]
+        h_gt, w_gt = img_gts[0].shape[0:2]
+    lq_patch_size = gt_patch_size // scale
+    if h_gt != h_lq * scale or w_gt != w_lq * scale:
+        raise ValueError(f'Scale mismatches. GT ({h_gt}, {w_gt}) is not {scale}x ',
+                         f'multiplication of LQ ({h_lq}, {w_lq}).')
+    if h_lq < lq_patch_size or w_lq < lq_patch_size:
+        raise ValueError(f'LQ ({h_lq}, {w_lq}) is smaller than patch size '
+                         f'({lq_patch_size}, {lq_patch_size}). '
+                         f'Please remove {gt_path}.')
+    # randomly choose top and left coordinates for lq patch
+    top = random.randint(0, h_lq - lq_patch_size)
+    left = random.randint(0, w_lq - lq_patch_size)
+    # crop lq patch
+    if input_type == 'Tensor':
+        img_lqs = [v[:, :, top:top + lq_patch_size, left:left + lq_patch_size] for v in img_lqs]
+    else:
+        img_lqs = [v[top:top + lq_patch_size, left:left + lq_patch_size, ...] for v in img_lqs]
+    # crop corresponding gt patch
+    top_gt, left_gt = int(top * scale), int(left * scale)
+    if input_type == 'Tensor':
+        img_gts = [v[:, :, top_gt:top_gt + gt_patch_size, left_gt:left_gt + gt_patch_size] for v in img_gts]
+    else:
+        img_gts = [v[top_gt:top_gt + gt_patch_size, left_gt:left_gt + gt_patch_size, ...] for v in img_gts]
+    if len(img_gts) == 1:
+        img_gts = img_gts[0]
+    if len(img_lqs) == 1:
+        img_lqs = img_lqs[0]
+    return img_gts, img_lqs
+def augment(imgs, hflip=True, rotation=True, flows=None, return_status=False):
+    """Augment: horizontal flips OR rotate (0, 90, 180, 270 degrees).
+    We use vertical flip and transpose for rotation implementation.
+    All the images in the list use the same augmentation.
+    Args:
+        imgs (list[ndarray] | ndarray): Images to be augmented. If the input
+            is an ndarray, it will be transformed to a list.
+        hflip (bool): Horizontal flip. Default: True.
+        rotation (bool): Ratotation. Default: True.
+        flows (list[ndarray]: Flows to be augmented. If the input is an
+            ndarray, it will be transformed to a list.
+            Dimension is (h, w, 2). Default: None.
+        return_status (bool): Return the status of flip and rotation.
+            Default: False.
+    Returns:
+        list[ndarray] | ndarray: Augmented images and flows. If returned
+            results only have one element, just return ndarray.
+    """
+    hflip = hflip and random.random() < 0.5
+    vflip = rotation and random.random() < 0.5
+    rot90 = rotation and random.random() < 0.5
+    def _augment(img):
+        if hflip:  # horizontal
+            cv2.flip(img, 1, img)
+        if vflip:  # vertical
+            cv2.flip(img, 0, img)
+        if rot90:
+            img = img.transpose(1, 0, 2)
+        return img
+    def _augment_flow(flow):
+        if hflip:  # horizontal
+            cv2.flip(flow, 1, flow)
+            flow[:, :, 0] *= -1
+        if vflip:  # vertical
+            cv2.flip(flow, 0, flow)
+            flow[:, :, 1] *= -1
+        if rot90:
+            flow = flow.transpose(1, 0, 2)
+            flow = flow[:, :, [1, 0]]
+        return flow
+    if not isinstance(imgs, list):
+        imgs = [imgs]
+    imgs = [_augment(img) for img in imgs]
+    if len(imgs) == 1:
+        imgs = imgs[0]
+    if flows is not None:
+        if not isinstance(flows, list):
+            flows = [flows]
+        flows = [_augment_flow(flow) for flow in flows]
+        if len(flows) == 1:
+            flows = flows[0]
+        return imgs, flows
+    else:
+        if return_status:
+            return imgs, (hflip, vflip, rot90)
+        else:
+            return imgs
+def img_rotate(img, angle, center=None, scale=1.0, borderMode=cv2.BORDER_CONSTANT, borderValue=0.):
+    """Rotate image.
+    Args:
+        img (ndarray): Image to be rotated.
+        angle (float): Rotation angle in degrees. Positive values mean
+            counter-clockwise rotation.
+        center (tuple[int]): Rotation center. If the center is None,
+            initialize it as the center of the image. Default: None.
+        scale (float): Isotropic scale factor. Default: 1.0.
+    """
+    (h, w) = img.shape[:2]
+    if center is None:
+        center = (w // 2, h // 2)
+    matrix = cv2.getRotationMatrix2D(center, angle, scale)
+    rotated_img = cv2.warpAffine(img, matrix, (w, h), borderMode=borderMode, borderValue=borderValue)
+    return rotated_img
+def rgb2lab(img_rgb):
+    img_lab = color.rgb2lab(img_rgb)
+    img_l = img_lab[:, :, :1]
+    img_ab = img_lab[:, :, 1:]
+    return img_l, img_ab

basicsr/losses/__init__.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from copy import deepcopy
+from basicsr.utils import get_root_logger
+from basicsr.utils.registry import LOSS_REGISTRY
+from .losses import (CharbonnierLoss, GANLoss, L1Loss, MSELoss, PerceptualLoss, WeightedTVLoss, g_path_regularize,
+                     gradient_penalty_loss, r1_penalty)
+__all__ = [
+    'L1Loss', 'MSELoss', 'CharbonnierLoss', 'WeightedTVLoss', 'PerceptualLoss', 'GANLoss', 'gradient_penalty_loss',
+    'r1_penalty', 'g_path_regularize'
+]
+def build_loss(opt):
+    """Build loss from options.
+    Args:
+        opt (dict): Configuration. It must contain:
+            type (str): Model type.
+    """
+    opt = deepcopy(opt)
+    loss_type = opt.pop('type')
+    loss = LOSS_REGISTRY.get(loss_type)(**opt)
+    logger = get_root_logger()
+    logger.info(f'Loss [{loss.__class__.__name__}] is created.')
+    return loss

basicsr/losses/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (1.02 kB). View file

basicsr/losses/__pycache__/loss_util.cpython-39.pyc ADDED Viewed

Binary file (2.7 kB). View file

basicsr/losses/__pycache__/losses.cpython-39.pyc ADDED Viewed

Binary file (17.9 kB). View file

basicsr/losses/loss_util.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import functools
+from torch.nn import functional as F
+def reduce_loss(loss, reduction):
+    """Reduce loss as specified.
+    Args:
+        loss (Tensor): Elementwise loss tensor.
+        reduction (str): Options are 'none', 'mean' and 'sum'.
+    Returns:
+        Tensor: Reduced loss tensor.
+    """
+    reduction_enum = F._Reduction.get_enum(reduction)
+    # none: 0, elementwise_mean:1, sum: 2
+    if reduction_enum == 0:
+        return loss
+    elif reduction_enum == 1:
+        return loss.mean()
+    else:
+        return loss.sum()
+def weight_reduce_loss(loss, weight=None, reduction='mean'):
+    """Apply element-wise weight and reduce loss.
+    Args:
+        loss (Tensor): Element-wise loss.
+        weight (Tensor): Element-wise weights. Default: None.
+        reduction (str): Same as built-in losses of PyTorch. Options are
+            'none', 'mean' and 'sum'. Default: 'mean'.
+    Returns:
+        Tensor: Loss values.
+    """
+    # if weight is specified, apply element-wise weight
+    if weight is not None:
+        assert weight.dim() == loss.dim()
+        assert weight.size(1) == 1 or weight.size(1) == loss.size(1)
+        loss = loss * weight
+    # if weight is not specified or reduction is sum, just reduce the loss
+    if weight is None or reduction == 'sum':
+        loss = reduce_loss(loss, reduction)
+    # if reduction is mean, then compute mean over weight region
+    elif reduction == 'mean':
+        if weight.size(1) > 1:
+            weight = weight.sum()
+        else:
+            weight = weight.sum() * loss.size(1)
+        loss = loss.sum() / weight
+    return loss
+def weighted_loss(loss_func):
+    """Create a weighted version of a given loss function.
+    To use this decorator, the loss function must have the signature like
+    `loss_func(pred, target, **kwargs)`. The function only needs to compute
+    element-wise loss without any reduction. This decorator will add weight
+    and reduction arguments to the function. The decorated function will have
+    the signature like `loss_func(pred, target, weight=None, reduction='mean',
+    **kwargs)`.
+    :Example:
+    >>> import torch
+    >>> @weighted_loss
+    >>> def l1_loss(pred, target):
+    >>>     return (pred - target).abs()
+    >>> pred = torch.Tensor([0, 2, 3])
+    >>> target = torch.Tensor([1, 1, 1])
+    >>> weight = torch.Tensor([1, 0, 1])
+    >>> l1_loss(pred, target)
+    tensor(1.3333)
+    >>> l1_loss(pred, target, weight)
+    tensor(1.5000)
+    >>> l1_loss(pred, target, reduction='none')
+    tensor([1., 1., 2.])
+    >>> l1_loss(pred, target, weight, reduction='sum')
+    tensor(3.)
+    """
+    @functools.wraps(loss_func)
+    def wrapper(pred, target, weight=None, reduction='mean', **kwargs):
+        # get element-wise loss
+        loss = loss_func(pred, target, **kwargs)
+        loss = weight_reduce_loss(loss, weight, reduction)
+        return loss
+    return wrapper

basicsr/losses/losses.py ADDED Viewed

	@@ -0,0 +1,551 @@

+import math
+import torch
+from torch import autograd as autograd
+from torch import nn as nn
+from torch.nn import functional as F
+from basicsr.archs.vgg_arch import VGGFeatureExtractor
+from basicsr.utils.registry import LOSS_REGISTRY
+from .loss_util import weighted_loss
+_reduction_modes = ['none', 'mean', 'sum']
+@weighted_loss
+def l1_loss(pred, target):
+    return F.l1_loss(pred, target, reduction='none')
+@weighted_loss
+def mse_loss(pred, target):
+    return F.mse_loss(pred, target, reduction='none')
+@weighted_loss
+def charbonnier_loss(pred, target, eps=1e-12):
+    return torch.sqrt((pred - target)**2 + eps)
+@LOSS_REGISTRY.register()
+class L1Loss(nn.Module):
+    """L1 (mean absolute error, MAE) loss.
+    Args:
+        loss_weight (float): Loss weight for L1 loss. Default: 1.0.
+        reduction (str): Specifies the reduction to apply to the output.
+            Supported choices are 'none' | 'mean' | 'sum'. Default: 'mean'.
+    """
+    def __init__(self, loss_weight=1.0, reduction='mean'):
+        super(L1Loss, self).__init__()
+        if reduction not in ['none', 'mean', 'sum']:
+            raise ValueError(f'Unsupported reduction mode: {reduction}. Supported ones are: {_reduction_modes}')
+        self.loss_weight = loss_weight
+        self.reduction = reduction
+    def forward(self, pred, target, weight=None, **kwargs):
+        """
+        Args:
+            pred (Tensor): of shape (N, C, H, W). Predicted tensor.
+            target (Tensor): of shape (N, C, H, W). Ground truth tensor.
+            weight (Tensor, optional): of shape (N, C, H, W). Element-wise
+                weights. Default: None.
+        """
+        return self.loss_weight * l1_loss(pred, target, weight, reduction=self.reduction)
+@LOSS_REGISTRY.register()
+class MSELoss(nn.Module):
+    """MSE (L2) loss.
+    Args:
+        loss_weight (float): Loss weight for MSE loss. Default: 1.0.
+        reduction (str): Specifies the reduction to apply to the output.
+            Supported choices are 'none' | 'mean' | 'sum'. Default: 'mean'.
+    """
+    def __init__(self, loss_weight=1.0, reduction='mean'):
+        super(MSELoss, self).__init__()
+        if reduction not in ['none', 'mean', 'sum']:
+            raise ValueError(f'Unsupported reduction mode: {reduction}. Supported ones are: {_reduction_modes}')
+        self.loss_weight = loss_weight
+        self.reduction = reduction
+    def forward(self, pred, target, weight=None, **kwargs):
+        """
+        Args:
+            pred (Tensor): of shape (N, C, H, W). Predicted tensor.
+            target (Tensor): of shape (N, C, H, W). Ground truth tensor.
+            weight (Tensor, optional): of shape (N, C, H, W). Element-wise
+                weights. Default: None.
+        """
+        return self.loss_weight * mse_loss(pred, target, weight, reduction=self.reduction)
+@LOSS_REGISTRY.register()
+class CharbonnierLoss(nn.Module):
+    """Charbonnier loss (one variant of Robust L1Loss, a differentiable
+    variant of L1Loss).
+    Described in "Deep Laplacian Pyramid Networks for Fast and Accurate
+        Super-Resolution".
+    Args:
+        loss_weight (float): Loss weight for L1 loss. Default: 1.0.
+        reduction (str): Specifies the reduction to apply to the output.
+            Supported choices are 'none' | 'mean' | 'sum'. Default: 'mean'.
+        eps (float): A value used to control the curvature near zero.
+            Default: 1e-12.
+    """
+    def __init__(self, loss_weight=1.0, reduction='mean', eps=1e-12):
+        super(CharbonnierLoss, self).__init__()
+        if reduction not in ['none', 'mean', 'sum']:
+            raise ValueError(f'Unsupported reduction mode: {reduction}. Supported ones are: {_reduction_modes}')
+        self.loss_weight = loss_weight
+        self.reduction = reduction
+        self.eps = eps
+    def forward(self, pred, target, weight=None, **kwargs):
+        """
+        Args:
+            pred (Tensor): of shape (N, C, H, W). Predicted tensor.
+            target (Tensor): of shape (N, C, H, W). Ground truth tensor.
+            weight (Tensor, optional): of shape (N, C, H, W). Element-wise
+                weights. Default: None.
+        """
+        return self.loss_weight * charbonnier_loss(pred, target, weight, eps=self.eps, reduction=self.reduction)
+@LOSS_REGISTRY.register()
+class WeightedTVLoss(L1Loss):
+    """Weighted TV loss.
+        Args:
+            loss_weight (float): Loss weight. Default: 1.0.
+    """
+    def __init__(self, loss_weight=1.0):
+        super(WeightedTVLoss, self).__init__(loss_weight=loss_weight)
+    def forward(self, pred, weight=None):
+        if weight is None:
+            y_weight = None
+            x_weight = None
+        else:
+            y_weight = weight[:, :, :-1, :]
+            x_weight = weight[:, :, :, :-1]
+        y_diff = super(WeightedTVLoss, self).forward(pred[:, :, :-1, :], pred[:, :, 1:, :], weight=y_weight)
+        x_diff = super(WeightedTVLoss, self).forward(pred[:, :, :, :-1], pred[:, :, :, 1:], weight=x_weight)
+        loss = x_diff + y_diff
+        return loss
+@LOSS_REGISTRY.register()
+class PerceptualLoss(nn.Module):
+    """Perceptual loss with commonly used style loss.
+    Args:
+        layer_weights (dict): The weight for each layer of vgg feature.
+            Here is an example: {'conv5_4': 1.}, which means the conv5_4
+            feature layer (before relu5_4) will be extracted with weight
+            1.0 in calculating losses.
+        vgg_type (str): The type of vgg network used as feature extractor.
+            Default: 'vgg19'.
+        use_input_norm (bool):  If True, normalize the input image in vgg.
+            Default: True.
+        range_norm (bool): If True, norm images with range [-1, 1] to [0, 1].
+            Default: False.
+        perceptual_weight (float): If `perceptual_weight > 0`, the perceptual
+            loss will be calculated and the loss will multiplied by the
+            weight. Default: 1.0.
+        style_weight (float): If `style_weight > 0`, the style loss will be
+            calculated and the loss will multiplied by the weight.
+            Default: 0.
+        criterion (str): Criterion used for perceptual loss. Default: 'l1'.
+    """
+    def __init__(self,
+                 layer_weights,
+                 vgg_type='vgg19',
+                 use_input_norm=True,
+                 range_norm=False,
+                 perceptual_weight=1.0,
+                 style_weight=0.,
+                 criterion='l1'):
+        super(PerceptualLoss, self).__init__()
+        self.perceptual_weight = perceptual_weight
+        self.style_weight = style_weight
+        self.layer_weights = layer_weights
+        self.vgg = VGGFeatureExtractor(
+            layer_name_list=list(layer_weights.keys()),
+            vgg_type=vgg_type,
+            use_input_norm=use_input_norm,
+            range_norm=range_norm)
+        self.criterion_type = criterion
+        if self.criterion_type == 'l1':
+            self.criterion = torch.nn.L1Loss()
+        elif self.criterion_type == 'l2':
+            self.criterion = torch.nn.L2loss()
+        elif self.criterion_type == 'fro':
+            self.criterion = None
+        else:
+            raise NotImplementedError(f'{criterion} criterion has not been supported.')
+    def forward(self, x, gt):
+        """Forward function.
+        Args:
+            x (Tensor): Input tensor with shape (n, c, h, w).
+            gt (Tensor): Ground-truth tensor with shape (n, c, h, w).
+        Returns:
+            Tensor: Forward results.
+        """
+        # extract vgg features
+        x_features = self.vgg(x)
+        gt_features = self.vgg(gt.detach())
+        # calculate perceptual loss
+        if self.perceptual_weight > 0:
+            percep_loss = 0
+            for k in x_features.keys():
+                if self.criterion_type == 'fro':
+                    percep_loss += torch.norm(x_features[k] - gt_features[k], p='fro') * self.layer_weights[k]
+                else:
+                    percep_loss += self.criterion(x_features[k], gt_features[k]) * self.layer_weights[k]
+            percep_loss *= self.perceptual_weight
+        else:
+            percep_loss = None
+        # calculate style loss
+        if self.style_weight > 0:
+            style_loss = 0
+            for k in x_features.keys():
+                if self.criterion_type == 'fro':
+                    style_loss += torch.norm(
+                        self._gram_mat(x_features[k]) - self._gram_mat(gt_features[k]), p='fro') * self.layer_weights[k]
+                else:
+                    style_loss += self.criterion(self._gram_mat(x_features[k]), self._gram_mat(
+                        gt_features[k])) * self.layer_weights[k]
+            style_loss *= self.style_weight
+        else:
+            style_loss = None
+        return percep_loss, style_loss
+    def _gram_mat(self, x):
+        """Calculate Gram matrix.
+        Args:
+            x (torch.Tensor): Tensor with shape of (n, c, h, w).
+        Returns:
+            torch.Tensor: Gram matrix.
+        """
+        n, c, h, w = x.size()
+        features = x.view(n, c, w * h)
+        features_t = features.transpose(1, 2)
+        gram = features.bmm(features_t) / (c * h * w)
+        return gram
+@LOSS_REGISTRY.register()
+class GANLoss(nn.Module):
+    """Define GAN loss.
+    Args:
+        gan_type (str): Support 'vanilla', 'lsgan', 'wgan', 'hinge'.
+        real_label_val (float): The value for real label. Default: 1.0.
+        fake_label_val (float): The value for fake label. Default: 0.0.
+        loss_weight (float): Loss weight. Default: 1.0.
+            Note that loss_weight is only for generators; and it is always 1.0
+            for discriminators.
+    """
+    def __init__(self, gan_type, real_label_val=1.0, fake_label_val=0.0, loss_weight=1.0):
+        super(GANLoss, self).__init__()
+        self.gan_type = gan_type
+        self.loss_weight = loss_weight
+        self.real_label_val = real_label_val
+        self.fake_label_val = fake_label_val
+        if self.gan_type == 'vanilla':
+            self.loss = nn.BCEWithLogitsLoss()
+        elif self.gan_type == 'lsgan':
+            self.loss = nn.MSELoss()
+        elif self.gan_type == 'wgan':
+            self.loss = self._wgan_loss
+        elif self.gan_type == 'wgan_softplus':
+            self.loss = self._wgan_softplus_loss
+        elif self.gan_type == 'hinge':
+            self.loss = nn.ReLU()
+        else:
+            raise NotImplementedError(f'GAN type {self.gan_type} is not implemented.')
+    def _wgan_loss(self, input, target):
+        """wgan loss.
+        Args:
+            input (Tensor): Input tensor.
+            target (bool): Target label.
+        Returns:
+            Tensor: wgan loss.
+        """
+        return -input.mean() if target else input.mean()
+    def _wgan_softplus_loss(self, input, target):
+        """wgan loss with soft plus. softplus is a smooth approximation to the
+        ReLU function.
+        In StyleGAN2, it is called:
+            Logistic loss for discriminator;
+            Non-saturating loss for generator.
+        Args:
+            input (Tensor): Input tensor.
+            target (bool): Target label.
+        Returns:
+            Tensor: wgan loss.
+        """
+        return F.softplus(-input).mean() if target else F.softplus(input).mean()
+    def get_target_label(self, input, target_is_real):
+        """Get target label.
+        Args:
+            input (Tensor): Input tensor.
+            target_is_real (bool): Whether the target is real or fake.
+        Returns:
+            (bool | Tensor): Target tensor. Return bool for wgan, otherwise,
+                return Tensor.
+        """
+        if self.gan_type in ['wgan', 'wgan_softplus']:
+            return target_is_real
+        target_val = (self.real_label_val if target_is_real else self.fake_label_val)
+        return input.new_ones(input.size()) * target_val
+    def forward(self, input, target_is_real, is_disc=False):
+        """
+        Args:
+            input (Tensor): The input for the loss module, i.e., the network
+                prediction.
+            target_is_real (bool): Whether the targe is real or fake.
+            is_disc (bool): Whether the loss for discriminators or not.
+                Default: False.
+        Returns:
+            Tensor: GAN loss value.
+        """
+        target_label = self.get_target_label(input, target_is_real)
+        if self.gan_type == 'hinge':
+            if is_disc:  # for discriminators in hinge-gan
+                input = -input if target_is_real else input
+                loss = self.loss(1 + input).mean()
+            else:  # for generators in hinge-gan
+                loss = -input.mean()
+        else:  # other gan types
+            loss = self.loss(input, target_label)
+        # loss_weight is always 1.0 for discriminators
+        return loss if is_disc else loss * self.loss_weight
+@LOSS_REGISTRY.register()
+class MultiScaleGANLoss(GANLoss):
+    """
+    MultiScaleGANLoss accepts a list of predictions
+    """
+    def __init__(self, gan_type, real_label_val=1.0, fake_label_val=0.0, loss_weight=1.0):
+        super(MultiScaleGANLoss, self).__init__(gan_type, real_label_val, fake_label_val, loss_weight)
+    def forward(self, input, target_is_real, is_disc=False):
+        """
+        The input is a list of tensors, or a list of (a list of tensors)
+        """
+        if isinstance(input, list):
+            loss = 0
+            for pred_i in input:
+                if isinstance(pred_i, list):
+                    # Only compute GAN loss for the last layer
+                    # in case of multiscale feature matching
+                    pred_i = pred_i[-1]
+                # Safe operation: 0-dim tensor calling self.mean() does nothing
+                loss_tensor = super().forward(pred_i, target_is_real, is_disc).mean()
+                loss += loss_tensor
+            return loss / len(input)
+        else:
+            return super().forward(input, target_is_real, is_disc)
+def r1_penalty(real_pred, real_img):
+    """R1 regularization for discriminator. The core idea is to
+        penalize the gradient on real data alone: when the
+        generator distribution produces the true data distribution
+        and the discriminator is equal to 0 on the data manifold, the
+        gradient penalty ensures that the discriminator cannot create
+        a non-zero gradient orthogonal to the data manifold without
+        suffering a loss in the GAN game.
+        Ref:
+        Eq. 9 in Which training methods for GANs do actually converge.
+        """
+    grad_real = autograd.grad(outputs=real_pred.sum(), inputs=real_img, create_graph=True)[0]
+    grad_penalty = grad_real.pow(2).view(grad_real.shape[0], -1).sum(1).mean()
+    return grad_penalty
+def g_path_regularize(fake_img, latents, mean_path_length, decay=0.01):
+    noise = torch.randn_like(fake_img) / math.sqrt(fake_img.shape[2] * fake_img.shape[3])
+    grad = autograd.grad(outputs=(fake_img * noise).sum(), inputs=latents, create_graph=True)[0]
+    path_lengths = torch.sqrt(grad.pow(2).sum(2).mean(1))
+    path_mean = mean_path_length + decay * (path_lengths.mean() - mean_path_length)
+    path_penalty = (path_lengths - path_mean).pow(2).mean()
+    return path_penalty, path_lengths.detach().mean(), path_mean.detach()
+def gradient_penalty_loss(discriminator, real_data, fake_data, weight=None):
+    """Calculate gradient penalty for wgan-gp.
+    Args:
+        discriminator (nn.Module): Network for the discriminator.
+        real_data (Tensor): Real input data.
+        fake_data (Tensor): Fake input data.
+        weight (Tensor): Weight tensor. Default: None.
+    Returns:
+        Tensor: A tensor for gradient penalty.
+    """
+    batch_size = real_data.size(0)
+    alpha = real_data.new_tensor(torch.rand(batch_size, 1, 1, 1))
+    # interpolate between real_data and fake_data
+    interpolates = alpha * real_data + (1. - alpha) * fake_data
+    interpolates = autograd.Variable(interpolates, requires_grad=True)
+    disc_interpolates = discriminator(interpolates)
+    gradients = autograd.grad(
+        outputs=disc_interpolates,
+        inputs=interpolates,
+        grad_outputs=torch.ones_like(disc_interpolates),
+        create_graph=True,
+        retain_graph=True,
+        only_inputs=True)[0]
+    if weight is not None:
+        gradients = gradients * weight
+    gradients_penalty = ((gradients.norm(2, dim=1) - 1)**2).mean()
+    if weight is not None:
+        gradients_penalty /= torch.mean(weight)
+    return gradients_penalty
+@LOSS_REGISTRY.register()
+class GANFeatLoss(nn.Module):
+    """Define feature matching loss for gans
+    Args:
+        criterion (str): Support 'l1', 'l2', 'charbonnier'.
+        loss_weight (float): Loss weight. Default: 1.0.
+        reduction (str): Specifies the reduction to apply to the output.
+            Supported choices are 'none' | 'mean' | 'sum'. Default: 'mean'.
+    """
+    def __init__(self, criterion='l1', loss_weight=1.0, reduction='mean'):
+        super(GANFeatLoss, self).__init__()
+        if criterion == 'l1':
+            self.loss_op = L1Loss(loss_weight, reduction)
+        elif criterion == 'l2':
+            self.loss_op = MSELoss(loss_weight, reduction)
+        elif criterion == 'charbonnier':
+            self.loss_op = CharbonnierLoss(loss_weight, reduction)
+        else:
+            raise ValueError(f'Unsupported loss mode: {criterion}. Supported ones are: l1|l2|charbonnier')
+        self.loss_weight = loss_weight
+    def forward(self, pred_fake, pred_real):
+        num_d = len(pred_fake)
+        loss = 0
+        for i in range(num_d):  # for each discriminator
+            # last output is the final prediction, exclude it
+            num_intermediate_outputs = len(pred_fake[i]) - 1
+            for j in range(num_intermediate_outputs):  # for each layer output
+                unweighted_loss = self.loss_op(pred_fake[i][j], pred_real[i][j].detach())
+                loss += unweighted_loss / num_d
+        return loss * self.loss_weight
+class sobel_loss(nn.Module):
+    def __init__(self, weight=1.0):
+        super().__init__()
+        kernel_x = torch.Tensor([[-1.0, 0.0, 1.0], [-2.0, 0.0, 2.0], [-1.0, 0.0, 1.0]])
+        kernel_y = torch.Tensor([[-1.0, -2.0, -1.0], [0.0, 0.0, 0.0], [1.0, 2.0, 1.0]])
+        kernel = torch.stack([kernel_x, kernel_y])
+        kernel.requires_grad = False
+        kernel = kernel.unsqueeze(1)
+        self.register_buffer('sobel_kernel', kernel)
+        self.weight = weight
+    def forward(self, input_tensor, target_tensor):
+        b, c, h, w = input_tensor.size()
+        input_tensor = input_tensor.view(b * c, 1, h, w)
+        input_edge = F.conv2d(input_tensor, self.sobel_kernel, padding=1)
+        input_edge = input_edge.view(b, 2*c, h, w)
+        target_tensor = target_tensor.view(-1, 1, h, w)
+        target_edge = F.conv2d(target_tensor, self.sobel_kernel, padding=1)
+        target_edge = target_edge.view(b, 2*c, h, w)
+        return self.weight * F.l1_loss(input_edge, target_edge)
+@LOSS_REGISTRY.register()
+class ColorfulnessLoss(nn.Module):
+    """Colorfulness loss.
+    Args:
+        loss_weight (float): Loss weight for Colorfulness loss. Default: 1.0.
+    """
+    def __init__(self, loss_weight=1.0):
+        super(ColorfulnessLoss, self).__init__()
+        self.loss_weight = loss_weight
+    def forward(self, pred, **kwargs):
+        """
+        Args:
+            pred (Tensor): of shape (N, C, H, W). Predicted tensor.
+        """
+        colorfulness_loss = 0
+        for i in range(pred.shape[0]):
+            (R, G, B) = pred[i][0], pred[i][1], pred[i][2]
+            rg = torch.abs(R - G)
+            yb = torch.abs(0.5 * (R+G) - B)
+            (rbMean, rbStd) = (torch.mean(rg), torch.std(rg))
+            (ybMean, ybStd) = (torch.mean(yb), torch.std(yb))
+            stdRoot = torch.sqrt((rbStd ** 2) + (ybStd ** 2))
+            meanRoot = torch.sqrt((rbMean ** 2) + (ybMean ** 2))
+            colorfulness = stdRoot + (0.3 * meanRoot)
+            colorfulness_loss += (1 - colorfulness)
+        return self.loss_weight * colorfulness_loss

basicsr/metrics/__init__.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from copy import deepcopy
+from basicsr.utils.registry import METRIC_REGISTRY
+from .psnr_ssim import calculate_psnr, calculate_ssim
+from .colorfulness import calculate_cf
+__all__ = ['calculate_psnr', 'calculate_ssim', 'calculate_cf']
+def calculate_metric(data, opt):
+    """Calculate metric from data and options.
+    Args:
+        opt (dict): Configuration. It must contain:
+            type (str): Model type.
+    """
+    opt = deepcopy(opt)
+    metric_type = opt.pop('type')
+    metric = METRIC_REGISTRY.get(metric_type)(**data, **opt)
+    return metric

basicsr/metrics/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (776 Bytes). View file