Spaces:

cnywt
/

SyncTalk

Build error

App Files Files Community

yinwentao commited on May 20

Commit

8d34f50

•

1 Parent(s): 4ceb8ab

DockerFile

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +2 -0
Dockerfile +16 -0
LICENSE +13 -0
app.py +16 -0
assets/image/synctalk.png +3 -0
data_utils/UNFaceFlow/core/__init__.py +0 -0
data_utils/UNFaceFlow/core/corr.py +91 -0
data_utils/UNFaceFlow/core/datasets.py +235 -0
data_utils/UNFaceFlow/core/extractor.py +266 -0
data_utils/UNFaceFlow/core/nnutils.py +233 -0
data_utils/UNFaceFlow/core/raft.py +259 -0
data_utils/UNFaceFlow/core/update.py +169 -0
data_utils/UNFaceFlow/core/utils_core/__init__.py +0 -0
data_utils/UNFaceFlow/core/utils_core/augmentor.py +246 -0
data_utils/UNFaceFlow/core/utils_core/flow_viz.py +132 -0
data_utils/UNFaceFlow/core/utils_core/frame_utils.py +137 -0
data_utils/UNFaceFlow/core/utils_core/utils.py +86 -0
data_utils/UNFaceFlow/core/warp_utils.py +118 -0
data_utils/UNFaceFlow/data_test_flow/__init__.py +94 -0
data_utils/UNFaceFlow/data_test_flow/base_dataset.py +98 -0
data_utils/UNFaceFlow/data_test_flow/dd_dataset.py +108 -0
data_utils/UNFaceFlow/data_test_flow/dd_dataset_bak.py +107 -0
data_utils/UNFaceFlow/models/network_test_flow.py +88 -0
data_utils/UNFaceFlow/options_test_flow.py +123 -0
data_utils/UNFaceFlow/pretrain_model/raft-small.pth +3 -0
data_utils/UNFaceFlow/sgd_NNRT_model_epoch19008_50000.pth +3 -0
data_utils/UNFaceFlow/test_flow.py +62 -0
data_utils/UNFaceFlow/utils.py +84 -0
data_utils/blendshape_capture/face_landmarker.task +3 -0
data_utils/blendshape_capture/main.py +86 -0
data_utils/deepspeech_features/README.md +20 -0
data_utils/deepspeech_features/deepspeech_features.py +275 -0
data_utils/deepspeech_features/deepspeech_store.py +172 -0
data_utils/deepspeech_features/extract_ds_features.py +132 -0
data_utils/deepspeech_features/extract_wav.py +87 -0
data_utils/deepspeech_features/fea_win.py +11 -0
data_utils/face_parsing/logger.py +23 -0
data_utils/face_parsing/model.py +285 -0
data_utils/face_parsing/resnet.py +109 -0
data_utils/face_parsing/test.py +148 -0
data_utils/face_tracking/3DMM/lands_info.txt +403 -0
data_utils/face_tracking/3DMM/tris.txt +0 -0
data_utils/face_tracking/3DMM/vert_tris.txt +0 -0
data_utils/face_tracking/__init__.py +0 -0
data_utils/face_tracking/bundle_adjustment.py +63 -0
data_utils/face_tracking/convert_BFM.py +39 -0
data_utils/face_tracking/data_loader.py +23 -0
data_utils/face_tracking/face_tracker.py +146 -0
data_utils/face_tracking/facemodel.py +60 -0
data_utils/face_tracking/geo_transform.py +60 -0

.gitattributes CHANGED Viewed

@@ -32,4 +32,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.task filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,16 @@

+FROM nvidia/cuda:11.3.1-cudnn8-devel-ubuntu18.04
+RUN apt-get update && apt-get install -y \
+    python3.8.8 \
+    python3-pip \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+COPY . .
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

LICENSE ADDED Viewed

	@@ -0,0 +1,13 @@

+Copyright (c) 2024 Peng Ziqiao
+This work is licensed under the Creative Commons Attribution-NonCommercial 4.0 International License (CC BY-NC 4.0). To view a copy of this license, visit http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, and distribute the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+1. Attribution — You must give appropriate credit, provide a link to the license, and indicate if changes were made. You may do so in any reasonable manner, but not in any way that suggests the licensor endorses you or your use.
+2. NonCommercial — You may not use the material for commercial purposes.
+3. No additional restrictions — You may not apply legal terms or technological measures that legally restrict others from doing anything the license permits.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

app.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import os
+os.system(f"pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py38_cu113_pyt1121/download.html")
+os.system(f"pip install tensorflow-gpu==2.8.1")
+os.system(f"pip install ./freqencoder")
+os.system(f"pip install ./shencoder")
+os.system(f"pip install ./gridencoder")
+os.system(f"pip install ./raymarching")
+    #os.system(f"pip install -v -U git+https://github.com/facebookresearch/xformers.git@main#egg=xformers")

assets/image/synctalk.png ADDED Viewed

Git LFS Details

SHA256: 1c6f87ed6137d6aeb639aa3d13ec28ce8df15b3f8d926d3f5662d8de3bab7300
Pointer size: 132 Bytes
Size of remote file: 3.49 MB

data_utils/UNFaceFlow/core/__init__.py ADDED Viewed

File without changes

data_utils/UNFaceFlow/core/corr.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import torch
+import torch.nn.functional as F
+from utils_core.utils import bilinear_sampler, coords_grid
+try:
+    import alt_cuda_corr
+except:
+    # alt_cuda_corr is not compiled
+    pass
+class CorrBlock:
+    def __init__(self, fmap1, fmap2, num_levels=4, radius=4):
+        self.num_levels = num_levels
+        self.radius = radius
+        self.corr_pyramid = []
+        # all pairs correlation
+        corr = CorrBlock.corr(fmap1, fmap2)
+        batch, h1, w1, dim, h2, w2 = corr.shape
+        corr = corr.reshape(batch*h1*w1, dim, h2, w2)
+        self.corr_pyramid.append(corr)
+        for i in range(self.num_levels-1):
+            corr = F.avg_pool2d(corr, 2, stride=2)
+            self.corr_pyramid.append(corr)
+    def __call__(self, coords):
+        r = self.radius
+        coords = coords.permute(0, 2, 3, 1)
+        batch, h1, w1, _ = coords.shape
+        out_pyramid = []
+        for i in range(self.num_levels):
+            corr = self.corr_pyramid[i]
+            dx = torch.linspace(-r, r, 2*r+1)
+            dy = torch.linspace(-r, r, 2*r+1)
+            delta = torch.stack(torch.meshgrid(dy, dx), axis=-1).to(coords.device)
+            centroid_lvl = coords.reshape(batch*h1*w1, 1, 1, 2) / 2**i
+            delta_lvl = delta.view(1, 2*r+1, 2*r+1, 2)
+            coords_lvl = centroid_lvl + delta_lvl
+            corr = bilinear_sampler(corr, coords_lvl)
+            corr = corr.view(batch, h1, w1, -1)
+            out_pyramid.append(corr)
+        out = torch.cat(out_pyramid, dim=-1)
+        return out.permute(0, 3, 1, 2).contiguous().float()
+    @staticmethod
+    def corr(fmap1, fmap2):
+        batch, dim, ht, wd = fmap1.shape
+        fmap1 = fmap1.view(batch, dim, ht*wd)
+        fmap2 = fmap2.view(batch, dim, ht*wd)
+        corr = torch.matmul(fmap1.transpose(1,2), fmap2)
+        corr = corr.view(batch, ht, wd, 1, ht, wd)
+        return corr  / torch.sqrt(torch.tensor(dim).float())
+class AlternateCorrBlock:
+    def __init__(self, fmap1, fmap2, num_levels=4, radius=4):
+        self.num_levels = num_levels
+        self.radius = radius
+        self.pyramid = [(fmap1, fmap2)]
+        for i in range(self.num_levels):
+            fmap1 = F.avg_pool2d(fmap1, 2, stride=2)
+            fmap2 = F.avg_pool2d(fmap2, 2, stride=2)
+            self.pyramid.append((fmap1, fmap2))
+    def __call__(self, coords):
+        coords = coords.permute(0, 2, 3, 1)
+        B, H, W, _ = coords.shape
+        dim = self.pyramid[0][0].shape[1]
+        corr_list = []
+        for i in range(self.num_levels):
+            r = self.radius
+            fmap1_i = self.pyramid[0][0].permute(0, 2, 3, 1).contiguous()
+            fmap2_i = self.pyramid[i][1].permute(0, 2, 3, 1).contiguous()
+            coords_i = (coords / 2**i).reshape(B, 1, H, W, 2).contiguous()
+            corr, = alt_cuda_corr.forward(fmap1_i, fmap2_i, coords_i, r)
+            corr_list.append(corr.squeeze(1))
+        corr = torch.stack(corr_list, dim=1)
+        corr = corr.reshape(B, -1, H, W)
+        return corr / torch.sqrt(torch.tensor(dim).float())

data_utils/UNFaceFlow/core/datasets.py ADDED Viewed

	@@ -0,0 +1,235 @@

+# Data loading based on https://github.com/NVIDIA/flownet2-pytorch
+import numpy as np
+import torch
+import torch.utils.data as data
+import torch.nn.functional as F
+import os
+import math
+import random
+from glob import glob
+import os.path as osp
+from utils import frame_utils
+from utils.augmentor import FlowAugmentor, SparseFlowAugmentor
+class FlowDataset(data.Dataset):
+    def __init__(self, aug_params=None, sparse=False):
+        self.augmentor = None
+        self.sparse = sparse
+        if aug_params is not None:
+            if sparse:
+                self.augmentor = SparseFlowAugmentor(**aug_params)
+            else:
+                self.augmentor = FlowAugmentor(**aug_params)
+        self.is_test = False
+        self.init_seed = False
+        self.flow_list = []
+        self.image_list = []
+        self.extra_info = []
+    def __getitem__(self, index):
+        if self.is_test:
+            img1 = frame_utils.read_gen(self.image_list[index][0])
+            img2 = frame_utils.read_gen(self.image_list[index][1])
+            img1 = np.array(img1).astype(np.uint8)[..., :3]
+            img2 = np.array(img2).astype(np.uint8)[..., :3]
+            img1 = torch.from_numpy(img1).permute(2, 0, 1).float()
+            img2 = torch.from_numpy(img2).permute(2, 0, 1).float()
+            return img1, img2, self.extra_info[index]
+        if not self.init_seed:
+            worker_info = torch.utils.data.get_worker_info()
+            if worker_info is not None:
+                torch.manual_seed(worker_info.id)
+                np.random.seed(worker_info.id)
+                random.seed(worker_info.id)
+                self.init_seed = True
+        index = index % len(self.image_list)
+        valid = None
+        if self.sparse:
+            flow, valid = frame_utils.readFlowKITTI(self.flow_list[index])
+        else:
+            flow = frame_utils.read_gen(self.flow_list[index])
+        img1 = frame_utils.read_gen(self.image_list[index][0])
+        img2 = frame_utils.read_gen(self.image_list[index][1])
+        flow = np.array(flow).astype(np.float32)
+        img1 = np.array(img1).astype(np.uint8)
+        img2 = np.array(img2).astype(np.uint8)
+        # grayscale images
+        if len(img1.shape) == 2:
+            img1 = np.tile(img1[...,None], (1, 1, 3))
+            img2 = np.tile(img2[...,None], (1, 1, 3))
+        else:
+            img1 = img1[..., :3]
+            img2 = img2[..., :3]
+        if self.augmentor is not None:
+            if self.sparse:
+                img1, img2, flow, valid = self.augmentor(img1, img2, flow, valid)
+            else:
+                img1, img2, flow = self.augmentor(img1, img2, flow)
+        img1 = torch.from_numpy(img1).permute(2, 0, 1).float()
+        img2 = torch.from_numpy(img2).permute(2, 0, 1).float()
+        flow = torch.from_numpy(flow).permute(2, 0, 1).float()
+        if valid is not None:
+            valid = torch.from_numpy(valid)
+        else:
+            valid = (flow[0].abs() < 1000) & (flow[1].abs() < 1000)
+        return img1, img2, flow, valid.float()
+    def __rmul__(self, v):
+        self.flow_list = v * self.flow_list
+        self.image_list = v * self.image_list
+        return self
+    def __len__(self):
+        return len(self.image_list)
+class MpiSintel(FlowDataset):
+    def __init__(self, aug_params=None, split='training', root='datasets/Sintel', dstype='clean'):
+        super(MpiSintel, self).__init__(aug_params)
+        flow_root = osp.join(root, split, 'flow')
+        image_root = osp.join(root, split, dstype)
+        if split == 'test':
+            self.is_test = True
+        for scene in os.listdir(image_root):
+            image_list = sorted(glob(osp.join(image_root, scene, '*.png')))
+            for i in range(len(image_list)-1):
+                self.image_list += [ [image_list[i], image_list[i+1]] ]
+                self.extra_info += [ (scene, i) ] # scene and frame_id
+            if split != 'test':
+                self.flow_list += sorted(glob(osp.join(flow_root, scene, '*.flo')))
+class FlyingChairs(FlowDataset):
+    def __init__(self, aug_params=None, split='train', root='datasets/FlyingChairs_release/data'):
+        super(FlyingChairs, self).__init__(aug_params)
+        images = sorted(glob(osp.join(root, '*.ppm')))
+        flows = sorted(glob(osp.join(root, '*.flo')))
+        assert (len(images)//2 == len(flows))
+        split_list = np.loadtxt('chairs_split.txt', dtype=np.int32)
+        for i in range(len(flows)):
+            xid = split_list[i]
+            if (split=='training' and xid==1) or (split=='validation' and xid==2):
+                self.flow_list += [ flows[i] ]
+                self.image_list += [ [images[2*i], images[2*i+1]] ]
+class FlyingThings3D(FlowDataset):
+    def __init__(self, aug_params=None, root='datasets/FlyingThings3D', dstype='frames_cleanpass'):
+        super(FlyingThings3D, self).__init__(aug_params)
+        for cam in ['left']:
+            for direction in ['into_future', 'into_past']:
+                image_dirs = sorted(glob(osp.join(root, dstype, 'TRAIN/*/*')))
+                image_dirs = sorted([osp.join(f, cam) for f in image_dirs])
+                flow_dirs = sorted(glob(osp.join(root, 'optical_flow/TRAIN/*/*')))
+                flow_dirs = sorted([osp.join(f, direction, cam) for f in flow_dirs])
+                for idir, fdir in zip(image_dirs, flow_dirs):
+                    images = sorted(glob(osp.join(idir, '*.png')) )
+                    flows = sorted(glob(osp.join(fdir, '*.pfm')) )
+                    for i in range(len(flows)-1):
+                        if direction == 'into_future':
+                            self.image_list += [ [images[i], images[i+1]] ]
+                            self.flow_list += [ flows[i] ]
+                        elif direction == 'into_past':
+                            self.image_list += [ [images[i+1], images[i]] ]
+                            self.flow_list += [ flows[i+1] ]
+class KITTI(FlowDataset):
+    def __init__(self, aug_params=None, split='training', root='datasets/KITTI'):
+        super(KITTI, self).__init__(aug_params, sparse=True)
+        if split == 'testing':
+            self.is_test = True
+        root = osp.join(root, split)
+        images1 = sorted(glob(osp.join(root, 'image_2/*_10.png')))
+        images2 = sorted(glob(osp.join(root, 'image_2/*_11.png')))
+        for img1, img2 in zip(images1, images2):
+            frame_id = img1.split('/')[-1]
+            self.extra_info += [ [frame_id] ]
+            self.image_list += [ [img1, img2] ]
+        if split == 'training':
+            self.flow_list = sorted(glob(osp.join(root, 'flow_occ/*_10.png')))
+class HD1K(FlowDataset):
+    def __init__(self, aug_params=None, root='datasets/HD1k'):
+        super(HD1K, self).__init__(aug_params, sparse=True)
+        seq_ix = 0
+        while 1:
+            flows = sorted(glob(os.path.join(root, 'hd1k_flow_gt', 'flow_occ/%06d_*.png' % seq_ix)))
+            images = sorted(glob(os.path.join(root, 'hd1k_input', 'image_2/%06d_*.png' % seq_ix)))
+            if len(flows) == 0:
+                break
+            for i in range(len(flows)-1):
+                self.flow_list += [flows[i]]
+                self.image_list += [ [images[i], images[i+1]] ]
+            seq_ix += 1
+def fetch_dataloader(args, TRAIN_DS='C+T+K+S+H'):
+    """ Create the data loader for the corresponding trainign set """
+    if args.stage == 'chairs':
+        aug_params = {'crop_size': args.image_size, 'min_scale': -0.1, 'max_scale': 1.0, 'do_flip': True}
+        train_dataset = FlyingChairs(aug_params, split='training')
+    elif args.stage == 'things':
+        aug_params = {'crop_size': args.image_size, 'min_scale': -0.4, 'max_scale': 0.8, 'do_flip': True}
+        clean_dataset = FlyingThings3D(aug_params, dstype='frames_cleanpass')
+        final_dataset = FlyingThings3D(aug_params, dstype='frames_finalpass')
+        train_dataset = clean_dataset + final_dataset
+    elif args.stage == 'sintel':
+        aug_params = {'crop_size': args.image_size, 'min_scale': -0.2, 'max_scale': 0.6, 'do_flip': True}
+        things = FlyingThings3D(aug_params, dstype='frames_cleanpass')
+        sintel_clean = MpiSintel(aug_params, split='training', dstype='clean')
+        sintel_final = MpiSintel(aug_params, split='training', dstype='final')
+        if TRAIN_DS == 'C+T+K+S+H':
+            kitti = KITTI({'crop_size': args.image_size, 'min_scale': -0.3, 'max_scale': 0.5, 'do_flip': True})
+            hd1k = HD1K({'crop_size': args.image_size, 'min_scale': -0.5, 'max_scale': 0.2, 'do_flip': True})
+            train_dataset = 100*sintel_clean + 100*sintel_final + 200*kitti + 5*hd1k + things
+        elif TRAIN_DS == 'C+T+K/S':
+            train_dataset = 100*sintel_clean + 100*sintel_final + things
+    elif args.stage == 'kitti':
+        aug_params = {'crop_size': args.image_size, 'min_scale': -0.2, 'max_scale': 0.4, 'do_flip': False}
+        train_dataset = KITTI(aug_params, split='training')
+    train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size,
+        pin_memory=False, shuffle=True, num_workers=4, drop_last=True)
+    print('Training with %d image pairs' % len(train_dataset))
+    return train_loader

data_utils/UNFaceFlow/core/extractor.py ADDED Viewed

	@@ -0,0 +1,266 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class ResidualBlock(nn.Module):
+    def __init__(self, in_planes, planes, norm_fn='group', stride=1):
+        super(ResidualBlock, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, padding=1, stride=stride)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)
+        self.relu = nn.ReLU(inplace=True)
+        num_groups = planes // 8
+        if norm_fn == 'group':
+            self.norm1 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
+            self.norm2 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
+            if not stride == 1:
+                self.norm3 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
+        elif norm_fn == 'batch':
+            self.norm1 = nn.BatchNorm2d(planes)
+            self.norm2 = nn.BatchNorm2d(planes)
+            if not stride == 1:
+                self.norm3 = nn.BatchNorm2d(planes)
+        elif norm_fn == 'instance':
+            self.norm1 = nn.InstanceNorm2d(planes)
+            self.norm2 = nn.InstanceNorm2d(planes)
+            if not stride == 1:
+                self.norm3 = nn.InstanceNorm2d(planes)
+        elif norm_fn == 'none':
+            self.norm1 = nn.Sequential()
+            self.norm2 = nn.Sequential()
+            if not stride == 1:
+                self.norm3 = nn.Sequential()
+        if stride == 1:
+            self.downsample = None
+        else:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride), self.norm3)
+    def forward(self, x):
+        y = x
+        y = self.relu(self.norm1(self.conv1(y)))
+        y = self.relu(self.norm2(self.conv2(y)))
+        if self.downsample is not None:
+            x = self.downsample(x)
+        return self.relu(x+y)
+class BottleneckBlock(nn.Module):
+    def __init__(self, in_planes, planes, norm_fn='group', stride=1):
+        super(BottleneckBlock, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, planes//4, kernel_size=1, padding=0)
+        self.conv2 = nn.Conv2d(planes//4, planes//4, kernel_size=3, padding=1, stride=stride)
+        self.conv3 = nn.Conv2d(planes//4, planes, kernel_size=1, padding=0)
+        self.relu = nn.ReLU(inplace=True)
+        num_groups = planes // 8
+        if norm_fn == 'group':
+            self.norm1 = nn.GroupNorm(num_groups=num_groups, num_channels=planes//4)
+            self.norm2 = nn.GroupNorm(num_groups=num_groups, num_channels=planes//4)
+            self.norm3 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
+            if not stride == 1:
+                self.norm4 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
+        elif norm_fn == 'batch':
+            self.norm1 = nn.BatchNorm2d(planes//4)
+            self.norm2 = nn.BatchNorm2d(planes//4)
+            self.norm3 = nn.BatchNorm2d(planes)
+            if not stride == 1:
+                self.norm4 = nn.BatchNorm2d(planes)
+        elif norm_fn == 'instance':
+            self.norm1 = nn.InstanceNorm2d(planes//4)
+            self.norm2 = nn.InstanceNorm2d(planes//4)
+            self.norm3 = nn.InstanceNorm2d(planes)
+            if not stride == 1:
+                self.norm4 = nn.InstanceNorm2d(planes)
+        elif norm_fn == 'none':
+            self.norm1 = nn.Sequential()
+            self.norm2 = nn.Sequential()
+            self.norm3 = nn.Sequential()
+            if not stride == 1:
+                self.norm4 = nn.Sequential()
+        if stride == 1:
+            self.downsample = None
+        else:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride), self.norm4)
+    def forward(self, x):
+        y = x
+        y = self.relu(self.norm1(self.conv1(y)))
+        y = self.relu(self.norm2(self.conv2(y)))
+        y = self.relu(self.norm3(self.conv3(y)))
+        if self.downsample is not None:
+            x = self.downsample(x)
+        return self.relu(x+y)
+class BasicEncoder(nn.Module):
+    def __init__(self, output_dim=128, norm_fn='batch', dropout=0.0):
+        super(BasicEncoder, self).__init__()
+        self.norm_fn = norm_fn
+        if self.norm_fn == 'group':
+            self.norm1 = nn.GroupNorm(num_groups=8, num_channels=64)
+        elif self.norm_fn == 'batch':
+            self.norm1 = nn.BatchNorm2d(64)
+        elif self.norm_fn == 'instance':
+            self.norm1 = nn.InstanceNorm2d(64)
+        elif self.norm_fn == 'none':
+            self.norm1 = nn.Sequential()
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
+        self.relu1 = nn.ReLU(inplace=True)
+        self.in_planes = 64
+        self.layer1 = self._make_layer(64,  stride=1)
+        self.layer2 = self._make_layer(96, stride=2)
+        self.layer3 = self._make_layer(128, stride=2)
+        # output convolution
+        self.conv2 = nn.Conv2d(128, output_dim, kernel_size=1)
+        self.dropout = None
+        if dropout > 0:
+            self.dropout = nn.Dropout2d(p=dropout)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d, nn.GroupNorm)):
+                if m.weight is not None:
+                    nn.init.constant_(m.weight, 1)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+    def _make_layer(self, dim, stride=1):
+        layer1 = ResidualBlock(self.in_planes, dim, self.norm_fn, stride=stride)
+        layer2 = ResidualBlock(dim, dim, self.norm_fn, stride=1)
+        layers = (layer1, layer2)
+        self.in_planes = dim
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        # if input is list, combine batch dimension
+        is_list = isinstance(x, tuple) or isinstance(x, list)
+        if is_list:
+            batch_dim = x[0].shape[0]
+            x = torch.cat(x, dim=0)
+        x = self.conv1(x)
+        x = self.norm1(x)
+        x = self.relu1(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.conv2(x)
+        if self.training and self.dropout is not None:
+            x = self.dropout(x)
+        if is_list:
+            x = torch.split(x, [batch_dim, batch_dim], dim=0)
+        return x
+class SmallEncoder(nn.Module):
+    def __init__(self, output_dim=128, norm_fn='batch', dropout=0.0):
+        super(SmallEncoder, self).__init__()
+        self.norm_fn = norm_fn
+        if self.norm_fn == 'group':
+            self.norm1 = nn.GroupNorm(num_groups=8, num_channels=32)
+        elif self.norm_fn == 'batch':
+            self.norm1 = nn.BatchNorm2d(32)
+        elif self.norm_fn == 'instance':
+            self.norm1 = nn.InstanceNorm2d(32)
+        elif self.norm_fn == 'none':
+            self.norm1 = nn.Sequential()
+        self.conv1 = nn.Conv2d(3, 32, kernel_size=7, stride=2, padding=3)
+        self.relu1 = nn.ReLU(inplace=True)
+        self.in_planes = 32
+        self.layer1 = self._make_layer(32,  stride=1)
+        self.layer2 = self._make_layer(64, stride=2)
+        self.layer3 = self._make_layer(96, stride=2)
+        self.dropout = None
+        if dropout > 0:
+            self.dropout = nn.Dropout2d(p=dropout)
+        self.conv2 = nn.Conv2d(96, output_dim, kernel_size=1)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d, nn.GroupNorm)):
+                if m.weight is not None:
+                    nn.init.constant_(m.weight, 1)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+    def _make_layer(self, dim, stride=1):
+        layer1 = BottleneckBlock(self.in_planes, dim, self.norm_fn, stride=stride)
+        layer2 = BottleneckBlock(dim, dim, self.norm_fn, stride=1)
+        layers = (layer1, layer2)
+        self.in_planes = dim
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        # if input is list, combine batch dimension
+        is_list = isinstance(x, tuple) or isinstance(x, list)
+        if is_list:
+            batch_dim = x[0].shape[0]
+            x = torch.cat(x, dim=0)
+        x = self.conv1(x)
+        x = self.norm1(x)
+        x = self.relu1(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.conv2(x)
+        if self.training and self.dropout is not None:
+            x = self.dropout(x)
+        if is_list:
+            x = torch.split(x, [batch_dim, batch_dim], dim=0)
+        return x

data_utils/UNFaceFlow/core/nnutils.py ADDED Viewed

	@@ -0,0 +1,233 @@

+import sys, os
+import numpy as np
+import torch
+def make_conv(n_in, n_out, n_blocks, kernel=3, normalization=torch.nn.BatchNorm3d, activation=torch.nn.ReLU):
+    blocks = []
+    for i in range(n_blocks):
+        in1 = n_in if i == 0 else n_out
+        blocks.append(torch.nn.Sequential(
+            torch.nn.Conv3d(in1, n_out, kernel_size=kernel, stride=1, padding=(kernel//2)),
+            normalization(n_out),
+            activation(inplace=True)
+        ))
+    return torch.nn.Sequential(*blocks)
+def make_conv_2d(n_in, n_out, n_blocks, kernel=3, normalization=torch.nn.BatchNorm2d, activation=torch.nn.ReLU):
+    blocks = []
+    for i in range(n_blocks):
+        in1 = n_in if i == 0 else n_out
+        blocks.append(torch.nn.Sequential(
+            torch.nn.Conv2d(in1, n_out, kernel_size=kernel, stride=1, padding=(kernel//2)),
+            normalization(n_out),
+            activation(inplace=True)
+        ))
+    return torch.nn.Sequential(*blocks)
+def make_downscale(n_in, n_out, kernel=4, normalization=torch.nn.BatchNorm3d, activation=torch.nn.ReLU):
+    block = torch.nn.Sequential(
+        torch.nn.Conv3d(n_in, n_out, kernel_size=kernel, stride=2, padding=(kernel-2)//2),
+        normalization(n_out),
+        activation(inplace=True)
+    )
+    return block
+def make_downscale_2d(n_in, n_out, kernel=4, normalization=torch.nn.BatchNorm2d, activation=torch.nn.ReLU):
+    block = torch.nn.Sequential(
+        torch.nn.Conv2d(n_in, n_out, kernel_size=kernel, stride=2, padding=(kernel-2)//2),
+        normalization(n_out),
+        activation(inplace=True)
+    )
+    return block
+def make_upscale(n_in, n_out, normalization=torch.nn.BatchNorm3d, activation=torch.nn.ReLU):
+    block = torch.nn.Sequential(
+        torch.nn.ConvTranspose3d(n_in, n_out, kernel_size=6, stride=2, padding=2),
+        normalization(n_out),
+        activation(inplace=True)
+    )
+    return block
+def make_upscale_2d(n_in, n_out, kernel=4, normalization=torch.nn.BatchNorm2d, activation=torch.nn.ReLU):
+    block = torch.nn.Sequential(
+        torch.nn.ConvTranspose2d(n_in, n_out, kernel_size=kernel, stride=2, padding=(kernel-2)//2),
+        normalization(n_out),
+        activation(inplace=True)
+    )
+    return block
+class ResBlock(torch.nn.Module):
+    def __init__(self, n_out, kernel=3, normalization=torch.nn.BatchNorm3d, activation=torch.nn.ReLU):
+        super().__init__()
+        self.block0 = torch.nn.Sequential(
+            torch.nn.Conv3d(n_out, n_out, kernel_size=kernel, stride=1, padding=(kernel//2)),
+            normalization(n_out),
+            activation(inplace=True)
+        )
+        self.block1 = torch.nn.Sequential(
+            torch.nn.Conv3d(n_out, n_out, kernel_size=kernel, stride=1, padding=(kernel//2)),
+            normalization(n_out),
+        )
+        self.block2 = torch.nn.ReLU()
+    def forward(self, x0):
+        x = self.block0(x0)
+        x = self.block1(x)
+        x = self.block2(x + x0)
+        return x
+class ResBlock2d(torch.nn.Module):
+    def __init__(self, n_out, kernel=3, normalization=torch.nn.BatchNorm2d, activation=torch.nn.ReLU):
+        super().__init__()
+        self.block0 = torch.nn.Sequential(
+            torch.nn.Conv2d(n_out, n_out, kernel_size=kernel, stride=1, padding=(kernel//2)),
+            normalization(n_out),
+            activation(inplace=True)
+        )
+        self.block1 = torch.nn.Sequential(
+            torch.nn.Conv2d(n_out, n_out, kernel_size=kernel, stride=1, padding=(kernel//2)),
+            normalization(n_out),
+        )
+        self.block2 = torch.nn.ReLU()
+    def forward(self, x0):
+        x = self.block0(x0)
+        x = self.block1(x)
+        x = self.block2(x + x0)
+        return x
+class Identity(torch.nn.Module):
+    def __init__(self, *args, **kwargs):
+        super().__init__()
+    def forward(self, x):
+        return x
+def downscale_gt_flow(flow_gt, flow_mask, image_height, image_width):
+    flow_gt_copy = flow_gt.clone()
+    flow_mask_copy = flow_mask.clone()
+    flow_gt_copy = flow_gt_copy / 20.0
+    flow_mask_copy = flow_mask_copy.float()
+    assert image_height % 64 == 0 and image_width % 64 == 0
+    flow_gt2 = torch.nn.functional.interpolate(input=flow_gt_copy, size=(image_height//4, image_width//4), mode='nearest')
+    flow_mask2 = torch.nn.functional.interpolate(input=flow_mask_copy, size=(image_height//4, image_width//4), mode='nearest').bool()
+    flow_gt3 = torch.nn.functional.interpolate(input=flow_gt_copy, size=(image_height//8, image_width//8), mode='nearest')
+    flow_mask3 = torch.nn.functional.interpolate(input=flow_mask_copy, size=(image_height//8, image_width//8), mode='nearest').bool()
+    flow_gt4 = torch.nn.functional.interpolate(input=flow_gt_copy, size=(image_height//16, image_width//16), mode='nearest')
+    flow_mask4 = torch.nn.functional.interpolate(input=flow_mask_copy, size=(image_height//16, image_width//16), mode='nearest').bool()
+    flow_gt5 = torch.nn.functional.interpolate(input=flow_gt_copy, size=(image_height//32, image_width//32), mode='nearest')
+    flow_mask5 = torch.nn.functional.interpolate(input=flow_mask_copy, size=(image_height//32, image_width//32), mode='nearest').bool()
+    flow_gt6 = torch.nn.functional.interpolate(input=flow_gt_copy, size=(image_height//64, image_width//64), mode='nearest')
+    flow_mask6 = torch.nn.functional.interpolate(input=flow_mask_copy, size=(image_height//64, image_width//64), mode='nearest').bool()
+    return [flow_gt2, flow_gt3, flow_gt4, flow_gt5, flow_gt6], [flow_mask2, flow_mask3, flow_mask4, flow_mask5, flow_mask6]
+def compute_baseline_mask_gt(
+    xy_coords_warped,
+    target_matches, valid_target_matches,
+    source_points, valid_source_points,
+    scene_flow_gt, scene_flow_mask, target_boundary_mask,
+    max_pos_flowed_source_to_target_dist, min_neg_flowed_source_to_target_dist
+):
+    # Scene flow mask
+    scene_flow_mask_0 = scene_flow_mask[:, 0].type(torch.bool)
+    # Boundary correspondences mask
+    # We use the nearest neighbor interpolation, since the boundary computations
+    # already marks any of 4 pixels as boundary.
+    target_nonboundary_mask = (~target_boundary_mask).type(torch.float32)
+    target_matches_nonboundary_mask = torch.nn.functional.grid_sample(target_nonboundary_mask, xy_coords_warped, padding_mode='zeros', mode='nearest', align_corners=False)
+    target_matches_nonboundary_mask = target_matches_nonboundary_mask[:, 0, :, :] >= 0.999
+    # Compute groundtruth mask (oracle)
+    flowed_source_points = source_points + scene_flow_gt
+    dist = torch.norm(flowed_source_points - target_matches, p=2, dim=1)
+    # Combine all masks
+    # We mark a correspondence as positive if;
+    # - it is close enough to groundtruth flow
+    # AND
+    # - there exists groundtruth flow
+    # AND
+    # - the target match is valid
+    # AND
+    # - the source point is valid
+    # AND
+    # - the target match is not on the boundary
+    mask_pos_gt = (dist <= max_pos_flowed_source_to_target_dist) & scene_flow_mask_0 & valid_target_matches & valid_source_points & target_matches_nonboundary_mask
+    # We mark a correspondence as negative if;
+    # - there exists groundtruth flow AND it is far away enough from the groundtruth flow AND source/target points are valid
+    # OR
+    # - the target match is on the boundary AND there exists groundtruth flow AND source/target points are valid
+    mask_neg_gt = ((dist > min_neg_flowed_source_to_target_dist) & scene_flow_mask_0 & valid_source_points & valid_target_matches) \
+            | (~target_matches_nonboundary_mask & scene_flow_mask_0 & valid_source_points & valid_target_matches)
+    # What remains is left undecided (masked out at loss).
+    # For groundtruth mask we set it to zero.
+    valid_mask_pixels = mask_pos_gt | mask_neg_gt
+    mask_gt = mask_pos_gt
+    mask_gt = mask_gt.type(torch.float32)
+    return mask_gt, valid_mask_pixels
+def compute_deformed_points_gt(
+    source_points, scene_flow_gt,
+    valid_solve, valid_correspondences,
+    deformed_points_idxs, deformed_points_subsampled
+):
+    batch_size = source_points.shape[0]
+    max_warped_points = deformed_points_idxs.shape[1]
+    deformed_points_gt = torch.zeros((batch_size, max_warped_points, 3), dtype=source_points.dtype, device=source_points.device)
+    deformed_points_mask = torch.zeros((batch_size, max_warped_points, 3), dtype=source_points.dtype, device=source_points.device)
+    for i in range(batch_size):
+        if valid_solve[i]:
+            valid_correspondences_idxs = torch.where(valid_correspondences[i])
+            # Compute deformed point groundtruth.
+            deformed_points_i_gt = source_points[i] + scene_flow_gt[i]
+            deformed_points_i_gt = deformed_points_i_gt.permute(1, 2, 0)
+            deformed_points_i_gt = deformed_points_i_gt[valid_correspondences_idxs[0], valid_correspondences_idxs[1], :].view(-1, 3, 1)
+            # Filter out points randomly, if too many are still left.
+            if deformed_points_subsampled[i]:
+                sampled_idxs_i = deformed_points_idxs[i]
+                deformed_points_i_gt  = deformed_points_i_gt[sampled_idxs_i]
+            num_points = deformed_points_i_gt.shape[0]
+            # Store the results.
+            deformed_points_gt[i, :num_points, :] = deformed_points_i_gt.view(1, num_points, 3)
+            deformed_points_mask[i, :num_points, :] = 1
+    return deformed_points_gt, deformed_points_mask

data_utils/UNFaceFlow/core/raft.py ADDED Viewed

	@@ -0,0 +1,259 @@

+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from update import BasicUpdateBlock, SmallUpdateBlock
+from extractor import BasicEncoder, SmallEncoder
+from corr import CorrBlock, AlternateCorrBlock
+from utils_core.utils import bilinear_sampler, coords_grid, upflow8
+try:
+    autocast = torch.cuda.amp.autocast
+except:
+    # dummy autocast for PyTorch < 1.6
+    class autocast:
+        def __init__(self, enabled):
+            pass
+        def __enter__(self):
+            pass
+        def __exit__(self, *args):
+            pass
+class RAFT(nn.Module):
+    def __init__(self, args):
+        super(RAFT, self).__init__()
+        self.args = args
+        if args.small:
+            self.hidden_dim = hdim = 96
+            self.context_dim = cdim = 64
+            args.corr_levels = 4
+            args.corr_radius = 3
+        else:
+            self.hidden_dim = hdim = 128
+            self.context_dim = cdim = 128
+            args.corr_levels = 4
+            args.corr_radius = 4
+        if 'dropout' not in self.args:
+            self.args.dropout = 0
+        if 'alternate_corr' not in self.args:
+            self.args.alternate_corr = False
+        # feature network, context network, and update block
+        if args.small:
+            self.fnet = SmallEncoder(output_dim=128, norm_fn='instance', dropout=args.dropout)
+            self.cnet = SmallEncoder(output_dim=hdim+cdim, norm_fn='none', dropout=args.dropout)
+            self.update_block = SmallUpdateBlock(self.args, hidden_dim=hdim)
+        else:
+            self.fnet = BasicEncoder(output_dim=256, norm_fn='instance', dropout=args.dropout)
+            self.cnet = BasicEncoder(output_dim=hdim+cdim, norm_fn='batch', dropout=args.dropout)
+            self.update_block = BasicUpdateBlock(self.args, hidden_dim=hdim)
+    def freeze_bn(self):
+        for m in self.modules():
+            if isinstance(m, nn.BatchNorm2d):
+                m.eval()
+    def initialize_flow(self, img):
+        """ Flow is represented as difference between two coordinate grids flow = coords1 - coords0"""
+        N, C, H, W = img.shape
+        coords0 = coords_grid(N, H//8, W//8).to(img.device)
+        coords1 = coords_grid(N, H//8, W//8).to(img.device)
+        # optical flow computed as difference: flow = coords1 - coords0
+        return coords0, coords1
+    def upsample_flow(self, flow, mask):
+        """ Upsample flow field [H/8, W/8, 2] -> [H, W, 2] using convex combination """
+        N, _, H, W = flow.shape
+        mask = mask.view(N, 1, 9, 8, 8, H, W)
+        mask = torch.softmax(mask, dim=2)
+        up_flow = F.unfold(8 * flow, [3,3], padding=1)
+        up_flow = up_flow.view(N, 2, 9, 1, 1, H, W)
+        up_flow = torch.sum(mask * up_flow, dim=2)
+        up_flow = up_flow.permute(0, 1, 4, 2, 5, 3)
+        return up_flow.reshape(N, 2, 8*H, 8*W)
+    def forward(self, image1, image2, iters=12, flow_init=None, upsample=True, test_mode=False):
+        """ Estimate optical flow between pair of frames """
+        image1 = 2 * (image1 / 255.0) - 1.0
+        image2 = 2 * (image2 / 255.0) - 1.0
+        image1 = image1.contiguous()
+        image2 = image2.contiguous()
+        hdim = self.hidden_dim
+        cdim = self.context_dim
+        # run the feature network
+        with autocast(enabled=self.args.mixed_precision):
+            fmap1, fmap2 = self.fnet([image1, image2])
+        fmap1 = fmap1.float()
+        fmap2 = fmap2.float()
+        # print("fmap mean: ", fmap1.mean(), fmap2.mean())
+        if self.args.alternate_corr:
+            corr_fn = AlternateCorrBlock(fmap1, fmap2, radius=self.args.corr_radius)
+        else:
+            corr_fn = CorrBlock(fmap1, fmap2, radius=self.args.corr_radius)
+        # run the context network
+        with autocast(enabled=self.args.mixed_precision):
+            cnet = self.cnet(image1)
+            net, inp = torch.split(cnet, [hdim, cdim], dim=1)
+            net = torch.tanh(net)
+            inp = torch.relu(inp)
+        coords0, coords1 = self.initialize_flow(image1)
+        if flow_init is not None:
+            coords1 = coords1 + flow_init
+        flow_predictions = []
+        for itr in range(iters):
+            coords1 = coords1.detach()
+            corr = corr_fn(coords1) # index correlation volume
+            flow = coords1 - coords0
+            with autocast(enabled=self.args.mixed_precision):
+                net, up_mask, delta_flow, feature = self.update_block(net, inp, corr, flow)
+            # print("delta flow mean: ", delta_flow.mean())
+            # F(t+1) = F(t) + \Delta(t)
+            coords1 = coords1 + delta_flow
+            # upsample predictions
+            if up_mask is None:
+                flow_up = upflow8(coords1 - coords0)
+            else:
+                flow_up = self.upsample_flow(coords1 - coords0, up_mask)
+        return flow_up, feature
+class RAFT_ALL(nn.Module):
+    def __init__(self, args):
+        super(RAFT_ALL, self).__init__()
+        self.args = args
+        if args.small:
+            self.hidden_dim = hdim = 96
+            self.context_dim = cdim = 64
+            args.corr_levels = 4
+            args.corr_radius = 3
+        else:
+            self.hidden_dim = hdim = 128
+            self.context_dim = cdim = 128
+            args.corr_levels = 4
+            args.corr_radius = 4
+        if 'dropout' not in self.args:
+            self.args.dropout = 0
+        if 'alternate_corr' not in self.args:
+            self.args.alternate_corr = False
+        # feature network, context network, and update block
+        if args.small:
+            self.fnet = SmallEncoder(output_dim=128, norm_fn='instance', dropout=args.dropout)
+            self.cnet = SmallEncoder(output_dim=hdim+cdim, norm_fn='none', dropout=args.dropout)
+            self.update_block = SmallUpdateBlock(self.args, hidden_dim=hdim)
+        else:
+            self.fnet = BasicEncoder(output_dim=256, norm_fn='instance', dropout=args.dropout)
+            self.cnet = BasicEncoder(output_dim=hdim+cdim, norm_fn='batch', dropout=args.dropout)
+            self.update_block = BasicUpdateBlock(self.args, hidden_dim=hdim)
+    def freeze_bn(self):
+        for m in self.modules():
+            if isinstance(m, nn.BatchNorm2d):
+                m.eval()
+    def initialize_flow(self, img):
+        """ Flow is represented as difference between two coordinate grids flow = coords1 - coords0"""
+        N, C, H, W = img.shape
+        coords0 = coords_grid(N, H//8, W//8).to(img.device)
+        coords1 = coords_grid(N, H//8, W//8).to(img.device)
+        # optical flow computed as difference: flow = coords1 - coords0
+        return coords0, coords1
+    def upsample_flow(self, flow, mask):
+        """ Upsample flow field [H/8, W/8, 2] -> [H, W, 2] using convex combination """
+        N, _, H, W = flow.shape
+        mask = mask.view(N, 1, 9, 8, 8, H, W)
+        mask = torch.softmax(mask, dim=2)
+        up_flow = F.unfold(8 * flow, [3,3], padding=1)
+        up_flow = up_flow.view(N, 2, 9, 1, 1, H, W)
+        up_flow = torch.sum(mask * up_flow, dim=2)
+        up_flow = up_flow.permute(0, 1, 4, 2, 5, 3)
+        return up_flow.reshape(N, 2, 8*H, 8*W)
+    def forward(self, image1, image2, iters=12, flow_init=None, upsample=True, test_mode=False):
+        """ Estimate optical flow between pair of frames """
+        image1 = 2 * (image1 / 255.0) - 1.0
+        image2 = 2 * (image2 / 255.0) - 1.0
+        image1 = image1.contiguous()
+        image2 = image2.contiguous()
+        hdim = self.hidden_dim
+        cdim = self.context_dim
+        # run the feature network
+        with autocast(enabled=self.args.mixed_precision):
+            fmap1, fmap2 = self.fnet([image1, image2])
+        fmap1 = fmap1.float()
+        fmap2 = fmap2.float()
+        # print("fmap mean: ", fmap1.mean(), fmap2.mean())
+        if self.args.alternate_corr:
+            corr_fn = AlternateCorrBlock(fmap1, fmap2, radius=self.args.corr_radius)
+        else:
+            corr_fn = CorrBlock(fmap1, fmap2, radius=self.args.corr_radius)
+        # run the context network
+        with autocast(enabled=self.args.mixed_precision):
+            cnet = self.cnet(image1)
+            net, inp = torch.split(cnet, [hdim, cdim], dim=1)
+            net = torch.tanh(net)
+            inp = torch.relu(inp)
+        coords0, coords1 = self.initialize_flow(image1)
+        if flow_init is not None:
+            coords1 = coords1 + flow_init
+        flow_predictions = []
+        for itr in range(iters):
+            coords1 = coords1.detach()
+            corr = corr_fn(coords1) # index correlation volume
+            flow = coords1 - coords0
+            with autocast(enabled=self.args.mixed_precision):
+                net, up_mask, delta_flow, feature = self.update_block(net, inp, corr, flow)
+            # print("delta flow mean: ", delta_flow.mean())
+            # F(t+1) = F(t) + \Delta(t)
+            coords1 = coords1 + delta_flow
+            # upsample predictions
+            if up_mask is None:
+                flow_up = upflow8(coords1 - coords0)
+            else:
+                flow_up = self.upsample_flow(coords1 - coords0, up_mask)
+            flow_predictions.append(flow_up)
+        return flow_predictions, feature

data_utils/UNFaceFlow/core/update.py ADDED Viewed

	@@ -0,0 +1,169 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from nnutils import make_conv_2d, make_upscale_2d, make_downscale_2d, ResBlock2d, Identity
+class FlowHead(nn.Module):
+    def __init__(self, input_dim=128, hidden_dim=256):
+        super(FlowHead, self).__init__()
+        self.conv1 = nn.Conv2d(input_dim, hidden_dim, 3, padding=1)
+        self.conv2 = nn.Conv2d(hidden_dim, 2, 3, padding=1)
+        self.relu = nn.ReLU(inplace=True)
+    def forward(self, x):
+        x = self.relu(self.conv1(x))
+        return self.conv2(x), x
+class ConvGRU(nn.Module):
+    def __init__(self, hidden_dim=128, input_dim=192+128):
+        super(ConvGRU, self).__init__()
+        self.convz = nn.Conv2d(hidden_dim+input_dim, hidden_dim, 3, padding=1)
+        self.convr = nn.Conv2d(hidden_dim+input_dim, hidden_dim, 3, padding=1)
+        self.convq = nn.Conv2d(hidden_dim+input_dim, hidden_dim, 3, padding=1)
+    def forward(self, h, x):
+        hx = torch.cat([h, x], dim=1)
+        z = torch.sigmoid(self.convz(hx))
+        r = torch.sigmoid(self.convr(hx))
+        q = torch.tanh(self.convq(torch.cat([r*h, x], dim=1)))
+        h = (1-z) * h + z * q
+        return h
+class SepConvGRU(nn.Module):
+    def __init__(self, hidden_dim=128, input_dim=192+128):
+        super(SepConvGRU, self).__init__()
+        self.convz1 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (1,5), padding=(0,2))
+        self.convr1 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (1,5), padding=(0,2))
+        self.convq1 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (1,5), padding=(0,2))
+        self.convz2 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (5,1), padding=(2,0))
+        self.convr2 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (5,1), padding=(2,0))
+        self.convq2 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (5,1), padding=(2,0))
+    def forward(self, h, x):
+        # horizontal
+        hx = torch.cat([h, x], dim=1)
+        z = torch.sigmoid(self.convz1(hx))
+        r = torch.sigmoid(self.convr1(hx))
+        q = torch.tanh(self.convq1(torch.cat([r*h, x], dim=1)))
+        h = (1-z) * h + z * q
+        # vertical
+        hx = torch.cat([h, x], dim=1)
+        z = torch.sigmoid(self.convz2(hx))
+        r = torch.sigmoid(self.convr2(hx))
+        q = torch.tanh(self.convq2(torch.cat([r*h, x], dim=1)))
+        h = (1-z) * h + z * q
+        return h
+class SmallMotionEncoder(nn.Module):
+    def __init__(self, args):
+        super(SmallMotionEncoder, self).__init__()
+        cor_planes = args.corr_levels * (2*args.corr_radius + 1)**2
+        self.convc1 = nn.Conv2d(cor_planes, 96, 1, padding=0)
+        self.convf1 = nn.Conv2d(2, 64, 7, padding=3)
+        self.convf2 = nn.Conv2d(64, 32, 3, padding=1)
+        self.conv = nn.Conv2d(128, 80, 3, padding=1)
+    def forward(self, flow, corr):
+        cor = F.relu(self.convc1(corr))
+        flo = F.relu(self.convf1(flow))
+        flo = F.relu(self.convf2(flo))
+        cor_flo = torch.cat([cor, flo], dim=1)
+        out = F.relu(self.conv(cor_flo))
+        return torch.cat([out, flow], dim=1)
+class BasicMotionEncoder(nn.Module):
+    def __init__(self, args):
+        super(BasicMotionEncoder, self).__init__()
+        cor_planes = args.corr_levels * (2*args.corr_radius + 1)**2
+        self.convc1 = nn.Conv2d(cor_planes, 256, 1, padding=0)
+        self.convc2 = nn.Conv2d(256, 192, 3, padding=1)
+        self.convf1 = nn.Conv2d(2, 128, 7, padding=3)
+        self.convf2 = nn.Conv2d(128, 64, 3, padding=1)
+        self.conv = nn.Conv2d(64+192, 128-2, 3, padding=1)
+    def forward(self, flow, corr):
+        cor = F.relu(self.convc1(corr))
+        cor = F.relu(self.convc2(cor))
+        flo = F.relu(self.convf1(flow))
+        flo = F.relu(self.convf2(flo))
+        cor_flo = torch.cat([cor, flo], dim=1)
+        out = F.relu(self.conv(cor_flo))
+        return torch.cat([out, flow], dim=1)
+class SmallUpdateBlock(nn.Module):
+    def __init__(self, args, hidden_dim=96):
+        super(SmallUpdateBlock, self).__init__()
+        self.encoder = SmallMotionEncoder(args)
+        self.gru = ConvGRU(hidden_dim=hidden_dim, input_dim=82+64)
+        self.flow_head = FlowHead(hidden_dim, hidden_dim=128)
+    def forward(self, net, inp, corr, flow):
+        motion_features = self.encoder(flow, corr)
+        inp = torch.cat([inp, motion_features], dim=1)
+        net = self.gru(net, inp)
+        delta_flow, feature = self.flow_head(net)
+        return net, None, delta_flow, feature
+class BasicUpdateBlock(nn.Module):
+    def __init__(self, args, hidden_dim=128, input_dim=128):
+        super(BasicUpdateBlock, self).__init__()
+        self.args = args
+        self.encoder = BasicMotionEncoder(args)
+        self.gru = SepConvGRU(hidden_dim=hidden_dim, input_dim=128+hidden_dim)
+        self.flow_head = FlowHead(hidden_dim, hidden_dim=256)
+        self.mask = nn.Sequential(
+            nn.Conv2d(128, 256, 3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(256, 64*9, 1, padding=0))
+    def forward(self, net, inp, corr, flow, upsample=True):
+        motion_features = self.encoder(flow, corr)
+        inp = torch.cat([inp, motion_features], dim=1)
+        net = self.gru(net, inp)
+        delta_flow, feature = self.flow_head(net)
+        # scale mask to balence gradients
+        mask = .25 * self.mask(net)
+        return net, mask, delta_flow, feature
+class BasicWeightsNet(nn.Module):
+    def __init__(self, opt):
+        super(BasicUpdateBlock, self).__init__()
+        if opt.small:
+            in_dim = 128
+        else:
+            in_dim = 256
+        fn_0 = 16
+        self.input_fn = fn_0 + 2
+        fn_1 = 16
+        self.conv1 = torch.nn.Conv2d(in_channels=in_dim, out_channels=fn_0, kernel_size=3, stride=1, padding=1)
+        if opt.use_batch_norm:
+            custom_batch_norm = torch.nn.BatchNorm2d
+        else:
+            custom_batch_norm = Identity
+        self.model = nn.Sequential(
+            make_conv_2d(self.input_fn, fn_1, n_blocks=1, normalization=custom_batch_norm),
+            ResBlock2d(fn_1, normalization=custom_batch_norm),
+            ResBlock2d(fn_1, normalization=custom_batch_norm),
+            ResBlock2d(fn_1, normalization=custom_batch_norm),
+            nn.Conv2d(fn_1, 1, kernel_size=3, padding=1),
+            torch.nn.Sigmoid()
+        )
+    def forward(self, flow, feature):
+        features = self.conv1(features)
+        x = torch.cat([features, flow], 1)
+        return self.model(x)

data_utils/UNFaceFlow/core/utils_core/__init__.py ADDED Viewed

File without changes

data_utils/UNFaceFlow/core/utils_core/augmentor.py ADDED Viewed

	@@ -0,0 +1,246 @@

+import numpy as np
+import random
+import math
+from PIL import Image
+import cv2
+cv2.setNumThreads(0)
+cv2.ocl.setUseOpenCL(False)
+import torch
+from torchvision.transforms import ColorJitter
+import torch.nn.functional as F
+class FlowAugmentor:
+    def __init__(self, crop_size, min_scale=-0.2, max_scale=0.5, do_flip=True):
+        # spatial augmentation params
+        self.crop_size = crop_size
+        self.min_scale = min_scale
+        self.max_scale = max_scale
+        self.spatial_aug_prob = 0.8
+        self.stretch_prob = 0.8
+        self.max_stretch = 0.2
+        # flip augmentation params
+        self.do_flip = do_flip
+        self.h_flip_prob = 0.5
+        self.v_flip_prob = 0.1
+        # photometric augmentation params
+        self.photo_aug = ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.5/3.14)
+        self.asymmetric_color_aug_prob = 0.2
+        self.eraser_aug_prob = 0.5
+    def color_transform(self, img1, img2):
+        """ Photometric augmentation """
+        # asymmetric
+        if np.random.rand() < self.asymmetric_color_aug_prob:
+            img1 = np.array(self.photo_aug(Image.fromarray(img1)), dtype=np.uint8)
+            img2 = np.array(self.photo_aug(Image.fromarray(img2)), dtype=np.uint8)
+        # symmetric
+        else:
+            image_stack = np.concatenate([img1, img2], axis=0)
+            image_stack = np.array(self.photo_aug(Image.fromarray(image_stack)), dtype=np.uint8)
+            img1, img2 = np.split(image_stack, 2, axis=0)
+        return img1, img2
+    def eraser_transform(self, img1, img2, bounds=[50, 100]):
+        """ Occlusion augmentation """
+        ht, wd = img1.shape[:2]
+        if np.random.rand() < self.eraser_aug_prob:
+            mean_color = np.mean(img2.reshape(-1, 3), axis=0)
+            for _ in range(np.random.randint(1, 3)):
+                x0 = np.random.randint(0, wd)
+                y0 = np.random.randint(0, ht)
+                dx = np.random.randint(bounds[0], bounds[1])
+                dy = np.random.randint(bounds[0], bounds[1])
+                img2[y0:y0+dy, x0:x0+dx, :] = mean_color
+        return img1, img2
+    def spatial_transform(self, img1, img2, flow):
+        # randomly sample scale
+        ht, wd = img1.shape[:2]
+        min_scale = np.maximum(
+            (self.crop_size[0] + 8) / float(ht),
+            (self.crop_size[1] + 8) / float(wd))
+        scale = 2 ** np.random.uniform(self.min_scale, self.max_scale)
+        scale_x = scale
+        scale_y = scale
+        if np.random.rand() < self.stretch_prob:
+            scale_x *= 2 ** np.random.uniform(-self.max_stretch, self.max_stretch)
+            scale_y *= 2 ** np.random.uniform(-self.max_stretch, self.max_stretch)
+        scale_x = np.clip(scale_x, min_scale, None)
+        scale_y = np.clip(scale_y, min_scale, None)
+        if np.random.rand() < self.spatial_aug_prob:
+            # rescale the images
+            img1 = cv2.resize(img1, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
+            img2 = cv2.resize(img2, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
+            flow = cv2.resize(flow, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
+            flow = flow * [scale_x, scale_y]
+        if self.do_flip:
+            if np.random.rand() < self.h_flip_prob: # h-flip
+                img1 = img1[:, ::-1]
+                img2 = img2[:, ::-1]
+                flow = flow[:, ::-1] * [-1.0, 1.0]
+            if np.random.rand() < self.v_flip_prob: # v-flip
+                img1 = img1[::-1, :]
+                img2 = img2[::-1, :]
+                flow = flow[::-1, :] * [1.0, -1.0]
+        y0 = np.random.randint(0, img1.shape[0] - self.crop_size[0])
+        x0 = np.random.randint(0, img1.shape[1] - self.crop_size[1])
+        img1 = img1[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
+        img2 = img2[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
+        flow = flow[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
+        return img1, img2, flow
+    def __call__(self, img1, img2, flow):
+        img1, img2 = self.color_transform(img1, img2)
+        img1, img2 = self.eraser_transform(img1, img2)
+        img1, img2, flow = self.spatial_transform(img1, img2, flow)
+        img1 = np.ascontiguousarray(img1)
+        img2 = np.ascontiguousarray(img2)
+        flow = np.ascontiguousarray(flow)
+        return img1, img2, flow
+class SparseFlowAugmentor:
+    def __init__(self, crop_size, min_scale=-0.2, max_scale=0.5, do_flip=False):
+        # spatial augmentation params
+        self.crop_size = crop_size
+        self.min_scale = min_scale
+        self.max_scale = max_scale
+        self.spatial_aug_prob = 0.8
+        self.stretch_prob = 0.8
+        self.max_stretch = 0.2
+        # flip augmentation params
+        self.do_flip = do_flip
+        self.h_flip_prob = 0.5
+        self.v_flip_prob = 0.1
+        # photometric augmentation params
+        self.photo_aug = ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.3/3.14)
+        self.asymmetric_color_aug_prob = 0.2
+        self.eraser_aug_prob = 0.5
+    def color_transform(self, img1, img2):
+        image_stack = np.concatenate([img1, img2], axis=0)
+        image_stack = np.array(self.photo_aug(Image.fromarray(image_stack)), dtype=np.uint8)
+        img1, img2 = np.split(image_stack, 2, axis=0)
+        return img1, img2
+    def eraser_transform(self, img1, img2):
+        ht, wd = img1.shape[:2]
+        if np.random.rand() < self.eraser_aug_prob:
+            mean_color = np.mean(img2.reshape(-1, 3), axis=0)
+            for _ in range(np.random.randint(1, 3)):
+                x0 = np.random.randint(0, wd)
+                y0 = np.random.randint(0, ht)
+                dx = np.random.randint(50, 100)
+                dy = np.random.randint(50, 100)
+                img2[y0:y0+dy, x0:x0+dx, :] = mean_color
+        return img1, img2
+    def resize_sparse_flow_map(self, flow, valid, fx=1.0, fy=1.0):
+        ht, wd = flow.shape[:2]
+        coords = np.meshgrid(np.arange(wd), np.arange(ht))
+        coords = np.stack(coords, axis=-1)
+        coords = coords.reshape(-1, 2).astype(np.float32)
+        flow = flow.reshape(-1, 2).astype(np.float32)
+        valid = valid.reshape(-1).astype(np.float32)
+        coords0 = coords[valid>=1]
+        flow0 = flow[valid>=1]
+        ht1 = int(round(ht * fy))
+        wd1 = int(round(wd * fx))
+        coords1 = coords0 * [fx, fy]
+        flow1 = flow0 * [fx, fy]
+        xx = np.round(coords1[:,0]).astype(np.int32)
+        yy = np.round(coords1[:,1]).astype(np.int32)
+        v = (xx > 0) & (xx < wd1) & (yy > 0) & (yy < ht1)
+        xx = xx[v]
+        yy = yy[v]
+        flow1 = flow1[v]
+        flow_img = np.zeros([ht1, wd1, 2], dtype=np.float32)
+        valid_img = np.zeros([ht1, wd1], dtype=np.int32)
+        flow_img[yy, xx] = flow1
+        valid_img[yy, xx] = 1
+        return flow_img, valid_img
+    def spatial_transform(self, img1, img2, flow, valid):
+        # randomly sample scale
+        ht, wd = img1.shape[:2]
+        min_scale = np.maximum(
+            (self.crop_size[0] + 1) / float(ht),
+            (self.crop_size[1] + 1) / float(wd))
+        scale = 2 ** np.random.uniform(self.min_scale, self.max_scale)
+        scale_x = np.clip(scale, min_scale, None)
+        scale_y = np.clip(scale, min_scale, None)
+        if np.random.rand() < self.spatial_aug_prob:
+            # rescale the images
+            img1 = cv2.resize(img1, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
+            img2 = cv2.resize(img2, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
+            flow, valid = self.resize_sparse_flow_map(flow, valid, fx=scale_x, fy=scale_y)
+        if self.do_flip:
+            if np.random.rand() < 0.5: # h-flip
+                img1 = img1[:, ::-1]
+                img2 = img2[:, ::-1]
+                flow = flow[:, ::-1] * [-1.0, 1.0]
+                valid = valid[:, ::-1]
+        margin_y = 20
+        margin_x = 50
+        y0 = np.random.randint(0, img1.shape[0] - self.crop_size[0] + margin_y)
+        x0 = np.random.randint(-margin_x, img1.shape[1] - self.crop_size[1] + margin_x)
+        y0 = np.clip(y0, 0, img1.shape[0] - self.crop_size[0])
+        x0 = np.clip(x0, 0, img1.shape[1] - self.crop_size[1])
+        img1 = img1[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
+        img2 = img2[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
+        flow = flow[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
+        valid = valid[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
+        return img1, img2, flow, valid
+    def __call__(self, img1, img2, flow, valid):
+        img1, img2 = self.color_transform(img1, img2)
+        img1, img2 = self.eraser_transform(img1, img2)
+        img1, img2, flow, valid = self.spatial_transform(img1, img2, flow, valid)
+        img1 = np.ascontiguousarray(img1)
+        img2 = np.ascontiguousarray(img2)
+        flow = np.ascontiguousarray(flow)
+        valid = np.ascontiguousarray(valid)
+        return img1, img2, flow, valid

data_utils/UNFaceFlow/core/utils_core/flow_viz.py ADDED Viewed

	@@ -0,0 +1,132 @@

+# Flow visualization code used from https://github.com/tomrunia/OpticalFlow_Visualization
+# MIT License
+#
+# Copyright (c) 2018 Tom Runia
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to conditions.
+#
+# Author: Tom Runia
+# Date Created: 2018-08-03
+import numpy as np
+def make_colorwheel():
+    """
+    Generates a color wheel for optical flow visualization as presented in:
+        Baker et al. "A Database and Evaluation Methodology for Optical Flow" (ICCV, 2007)
+        URL: http://vision.middlebury.edu/flow/flowEval-iccv07.pdf
+    Code follows the original C++ source code of Daniel Scharstein.
+    Code follows the the Matlab source code of Deqing Sun.
+    Returns:
+        np.ndarray: Color wheel
+    """
+    RY = 15
+    YG = 6
+    GC = 4
+    CB = 11
+    BM = 13
+    MR = 6
+    ncols = RY + YG + GC + CB + BM + MR
+    colorwheel = np.zeros((ncols, 3))
+    col = 0
+    # RY
+    colorwheel[0:RY, 0] = 255
+    colorwheel[0:RY, 1] = np.floor(255*np.arange(0,RY)/RY)
+    col = col+RY
+    # YG
+    colorwheel[col:col+YG, 0] = 255 - np.floor(255*np.arange(0,YG)/YG)
+    colorwheel[col:col+YG, 1] = 255
+    col = col+YG
+    # GC
+    colorwheel[col:col+GC, 1] = 255
+    colorwheel[col:col+GC, 2] = np.floor(255*np.arange(0,GC)/GC)
+    col = col+GC
+    # CB
+    colorwheel[col:col+CB, 1] = 255 - np.floor(255*np.arange(CB)/CB)
+    colorwheel[col:col+CB, 2] = 255
+    col = col+CB
+    # BM
+    colorwheel[col:col+BM, 2] = 255
+    colorwheel[col:col+BM, 0] = np.floor(255*np.arange(0,BM)/BM)
+    col = col+BM
+    # MR
+    colorwheel[col:col+MR, 2] = 255 - np.floor(255*np.arange(MR)/MR)
+    colorwheel[col:col+MR, 0] = 255
+    return colorwheel
+def flow_uv_to_colors(u, v, convert_to_bgr=False):
+    """
+    Applies the flow color wheel to (possibly clipped) flow components u and v.
+    According to the C++ source code of Daniel Scharstein
+    According to the Matlab source code of Deqing Sun
+    Args:
+        u (np.ndarray): Input horizontal flow of shape [H,W]
+        v (np.ndarray): Input vertical flow of shape [H,W]
+        convert_to_bgr (bool, optional): Convert output image to BGR. Defaults to False.
+    Returns:
+        np.ndarray: Flow visualization image of shape [H,W,3]
+    """
+    flow_image = np.zeros((u.shape[0], u.shape[1], 3), np.uint8)
+    colorwheel = make_colorwheel()  # shape [55x3]
+    ncols = colorwheel.shape[0]
+    rad = np.sqrt(np.square(u) + np.square(v))
+    a = np.arctan2(-v, -u)/np.pi
+    fk = (a+1) / 2*(ncols-1)
+    k0 = np.floor(fk).astype(np.int32)
+    k1 = k0 + 1
+    k1[k1 == ncols] = 0
+    f = fk - k0
+    for i in range(colorwheel.shape[1]):
+        tmp = colorwheel[:,i]
+        col0 = tmp[k0] / 255.0
+        col1 = tmp[k1] / 255.0
+        col = (1-f)*col0 + f*col1
+        idx = (rad <= 1)
+        col[idx]  = 1 - rad[idx] * (1-col[idx])
+        col[~idx] = col[~idx] * 0.75   # out of range
+        # Note the 2-i => BGR instead of RGB
+        ch_idx = 2-i if convert_to_bgr else i
+        flow_image[:,:,ch_idx] = np.floor(255 * col)
+    return flow_image
+def flow_to_image(flow_uv, clip_flow=None, convert_to_bgr=False):
+    """
+    Expects a two dimensional flow image of shape.
+    Args:
+        flow_uv (np.ndarray): Flow UV image of shape [H,W,2]
+        clip_flow (float, optional): Clip maximum of flow values. Defaults to None.
+        convert_to_bgr (bool, optional): Convert output image to BGR. Defaults to False.
+    Returns:
+        np.ndarray: Flow visualization image of shape [H,W,3]
+    """
+    assert flow_uv.ndim == 3, 'input flow must have three dimensions'
+    assert flow_uv.shape[2] == 2, 'input flow must have shape [H,W,2]'
+    if clip_flow is not None:
+        flow_uv = np.clip(flow_uv, 0, clip_flow)
+    u = flow_uv[:,:,0]
+    v = flow_uv[:,:,1]
+    rad = np.sqrt(np.square(u) + np.square(v))
+    rad_max = np.max(rad)
+    epsilon = 1e-5
+    u = u / (rad_max + epsilon)
+    v = v / (rad_max + epsilon)
+    return flow_uv_to_colors(u, v, convert_to_bgr)

data_utils/UNFaceFlow/core/utils_core/frame_utils.py ADDED Viewed

	@@ -0,0 +1,137 @@

+import numpy as np
+from PIL import Image
+from os.path import *
+import re
+import cv2
+cv2.setNumThreads(0)
+cv2.ocl.setUseOpenCL(False)
+TAG_CHAR = np.array([202021.25], np.float32)
+def readFlow(fn):
+    """ Read .flo file in Middlebury format"""
+    # Code adapted from:
+    # http://stackoverflow.com/questions/28013200/reading-middlebury-flow-files-with-python-bytes-array-numpy
+    # WARNING: this will work on little-endian architectures (eg Intel x86) only!
+    # print 'fn = %s'%(fn)
+    with open(fn, 'rb') as f:
+        magic = np.fromfile(f, np.float32, count=1)
+        if 202021.25 != magic:
+            print('Magic number incorrect. Invalid .flo file')
+            return None
+        else:
+            w = np.fromfile(f, np.int32, count=1)
+            h = np.fromfile(f, np.int32, count=1)
+            # print 'Reading %d x %d flo file\n' % (w, h)
+            data = np.fromfile(f, np.float32, count=2*int(w)*int(h))
+            # Reshape data into 3D array (columns, rows, bands)
+            # The reshape here is for visualization, the original code is (w,h,2)
+            return np.resize(data, (int(h), int(w), 2))
+def readPFM(file):
+    file = open(file, 'rb')
+    color = None
+    width = None
+    height = None
+    scale = None
+    endian = None
+    header = file.readline().rstrip()
+    if header == b'PF':
+        color = True
+    elif header == b'Pf':
+        color = False
+    else:
+        raise Exception('Not a PFM file.')
+    dim_match = re.match(rb'^(\d+)\s(\d+)\s$', file.readline())
+    if dim_match:
+        width, height = map(int, dim_match.groups())
+    else:
+        raise Exception('Malformed PFM header.')
+    scale = float(file.readline().rstrip())
+    if scale < 0: # little-endian
+        endian = '<'
+        scale = -scale
+    else:
+        endian = '>' # big-endian
+    data = np.fromfile(file, endian + 'f')
+    shape = (height, width, 3) if color else (height, width)
+    data = np.reshape(data, shape)
+    data = np.flipud(data)
+    return data
+def writeFlow(filename,uv,v=None):
+    """ Write optical flow to file.
+    If v is None, uv is assumed to contain both u and v channels,
+    stacked in depth.
+    Original code by Deqing Sun, adapted from Daniel Scharstein.
+    """
+    nBands = 2
+    if v is None:
+        assert(uv.ndim == 3)
+        assert(uv.shape[2] == 2)
+        u = uv[:,:,0]
+        v = uv[:,:,1]
+    else:
+        u = uv
+    assert(u.shape == v.shape)
+    height,width = u.shape
+    f = open(filename,'wb')
+    # write the header
+    f.write(TAG_CHAR)
+    np.array(width).astype(np.int32).tofile(f)
+    np.array(height).astype(np.int32).tofile(f)
+    # arrange into matrix form
+    tmp = np.zeros((height, width*nBands))
+    tmp[:,np.arange(width)*2] = u
+    tmp[:,np.arange(width)*2 + 1] = v
+    tmp.astype(np.float32).tofile(f)
+    f.close()
+def readFlowKITTI(filename):
+    flow = cv2.imread(filename, cv2.IMREAD_ANYDEPTH|cv2.IMREAD_COLOR)
+    flow = flow[:,:,::-1].astype(np.float32)
+    flow, valid = flow[:, :, :2], flow[:, :, 2]
+    flow = (flow - 2**15) / 64.0
+    return flow, valid
+def readDispKITTI(filename):
+    disp = cv2.imread(filename, cv2.IMREAD_ANYDEPTH) / 256.0
+    valid = disp > 0.0
+    flow = np.stack([-disp, np.zeros_like(disp)], -1)
+    return flow, valid
+def writeFlowKITTI(filename, uv):
+    uv = 64.0 * uv + 2**15
+    valid = np.ones([uv.shape[0], uv.shape[1], 1])
+    uv = np.concatenate([uv, valid], axis=-1).astype(np.uint16)
+    cv2.imwrite(filename, uv[..., ::-1])
+def read_gen(file_name, pil=False):
+    ext = splitext(file_name)[-1]
+    if ext == '.png' or ext == '.jpeg' or ext == '.ppm' or ext == '.jpg':
+        return Image.open(file_name)
+    elif ext == '.bin' or ext == '.raw':
+        return np.load(file_name)
+    elif ext == '.flo':
+        return readFlow(file_name).astype(np.float32)
+    elif ext == '.pfm':
+        flow = readPFM(file_name).astype(np.float32)
+        if len(flow.shape) == 2:
+            return flow
+        else:
+            return flow[:, :, :-1]
+    return []

data_utils/UNFaceFlow/core/utils_core/utils.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import torch
+import torch.nn.functional as F
+import numpy as np
+from scipy import interpolate
+class InputPadder:
+    """ Pads images such that dimensions are divisible by 8 """
+    def __init__(self, dims, mode='sintel'):
+        self.ht, self.wd = dims[-2:]
+        pad_ht = (((self.ht // 8) + 1) * 8 - self.ht) % 8
+        pad_wd = (((self.wd // 8) + 1) * 8 - self.wd) % 8
+        if mode == 'sintel':
+            self._pad = [pad_wd//2, pad_wd - pad_wd//2, pad_ht//2, pad_ht - pad_ht//2]
+        else:
+            self._pad = [pad_wd//2, pad_wd - pad_wd//2, 0, pad_ht]
+    def pad(self, *inputs):
+        return [F.pad(x, self._pad, mode='replicate') for x in inputs]
+    def unpad(self,x):
+        ht, wd = x.shape[-2:]
+        c = [self._pad[2], ht-self._pad[3], self._pad[0], wd-self._pad[1]]
+        return x[..., c[0]:c[1], c[2]:c[3]]
+def forward_interpolate(flow):
+    flow = flow.detach().cpu().numpy()
+    dx, dy = flow[0], flow[1]
+    ht, wd = dx.shape
+    x0, y0 = np.meshgrid(np.arange(wd), np.arange(ht))
+    x1 = x0 + dx
+    y1 = y0 + dy
+    x1 = x1.reshape(-1)
+    y1 = y1.reshape(-1)
+    dx = dx.reshape(-1)
+    dy = dy.reshape(-1)
+    valid = (x1 > 0) & (x1 < wd) & (y1 > 0) & (y1 < ht)
+    x1 = x1[valid]
+    y1 = y1[valid]
+    dx = dx[valid]
+    dy = dy[valid]
+    flow_x = interpolate.griddata(
+        (x1, y1), dx, (x0, y0), method='nearest', fill_value=0)
+    flow_y = interpolate.griddata(
+        (x1, y1), dy, (x0, y0), method='nearest', fill_value=0)
+    flow = np.stack([flow_x, flow_y], axis=0)
+    return torch.from_numpy(flow).float()
+def bilinear_sampler(img, coords, mode='bilinear', mask=False):
+    """ Wrapper for grid_sample, uses pixel coordinates """
+    H, W = img.shape[-2:]
+    xgrid, ygrid = coords.split([1,1], dim=-1)
+    xgrid = 2*xgrid/(W-1) - 1
+    ygrid = 2*ygrid/(H-1) - 1
+    grid = torch.cat([xgrid, ygrid], dim=-1)
+    img = F.grid_sample(img, grid, align_corners=True)
+    if mask:
+        mask = (xgrid > -1) & (ygrid > -1) & (xgrid < 1) & (ygrid < 1)
+        return img, mask.float()
+    return img
+def coords_grid(batch, ht, wd):
+    coords = torch.meshgrid(torch.arange(ht), torch.arange(wd))
+    coords = torch.stack(coords[::-1], dim=0).float()
+    return coords[None].repeat(batch, 1, 1, 1)
+def upflow8(flow, mode='bilinear'):
+    new_size = (8 * flow.shape[2], 8 * flow.shape[3])
+    return  8 * F.interpolate(flow, size=new_size, mode=mode, align_corners=True)
+def upweights8(weights, mode='bilinear'):
+    new_size = (8 * weights.shape[2], 8 * weights.shape[3])
+    return  F.interpolate(weights, size=new_size, mode=mode, align_corners=True)

data_utils/UNFaceFlow/core/warp_utils.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+def mesh_grid(B, H, W):
+    # mesh grid
+    x_base = torch.arange(0, W).repeat(B, H, 1)  # BHW
+    y_base = torch.arange(0, H).repeat(B, W, 1).transpose(1, 2)  # BHW
+    base_grid = torch.stack([x_base, y_base], 1)  # B2HW
+    return base_grid
+def norm_grid(v_grid):
+    _, _, H, W = v_grid.size()
+    # scale grid to [-1,1]
+    v_grid_norm = torch.zeros_like(v_grid)
+    v_grid_norm[:, 0, :, :] = 2.0 * v_grid[:, 0, :, :] / (W - 1) - 1.0
+    v_grid_norm[:, 1, :, :] = 2.0 * v_grid[:, 1, :, :] / (H - 1) - 1.0
+    return v_grid_norm.permute(0, 2, 3, 1)  # BHW2
+def get_corresponding_map(data):
+    """
+    :param data: unnormalized coordinates Bx2xHxW
+    :return: Bx1xHxW
+    """
+    B, _, H, W = data.size()
+    # x = data[:, 0, :, :].view(B, -1).clamp(0, W - 1)  # BxN (N=H*W)
+    # y = data[:, 1, :, :].view(B, -1).clamp(0, H - 1)
+    x = data[:, 0, :, :].view(B, -1)  # BxN (N=H*W)
+    y = data[:, 1, :, :].view(B, -1)
+    # invalid = (x < 0) | (x > W - 1) | (y < 0) | (y > H - 1)   # BxN
+    # invalid = invalid.repeat([1, 4])
+    x1 = torch.floor(x)
+    x_floor = x1.clamp(0, W - 1)
+    y1 = torch.floor(y)
+    y_floor = y1.clamp(0, H - 1)
+    x0 = x1 + 1
+    x_ceil = x0.clamp(0, W - 1)
+    y0 = y1 + 1
+    y_ceil = y0.clamp(0, H - 1)
+    x_ceil_out = x0 != x_ceil
+    y_ceil_out = y0 != y_ceil
+    x_floor_out = x1 != x_floor
+    y_floor_out = y1 != y_floor
+    invalid = torch.cat([x_ceil_out | y_ceil_out,
+                         x_ceil_out | y_floor_out,
+                         x_floor_out | y_ceil_out,
+                         x_floor_out | y_floor_out], dim=1)
+    # encode coordinates, since the scatter function can only index along one axis
+    corresponding_map = torch.zeros(B, H * W).type_as(data)
+    indices = torch.cat([x_ceil + y_ceil * W,
+                         x_ceil + y_floor * W,
+                         x_floor + y_ceil * W,
+                         x_floor + y_floor * W], 1).long()  # BxN   (N=4*H*W)
+    values = torch.cat([(1 - torch.abs(x - x_ceil)) * (1 - torch.abs(y - y_ceil)),
+                        (1 - torch.abs(x - x_ceil)) * (1 - torch.abs(y - y_floor)),
+                        (1 - torch.abs(x - x_floor)) * (1 - torch.abs(y - y_ceil)),
+                        (1 - torch.abs(x - x_floor)) * (1 - torch.abs(y - y_floor))],
+                       1)
+    # values = torch.ones_like(values)
+    values[invalid] = 0
+    corresponding_map.scatter_add_(1, indices, values)
+    # decode coordinates
+    corresponding_map = corresponding_map.view(B, H, W)
+    return corresponding_map.unsqueeze(1)
+def flow_warp(x, flow12, pad='border', mode='bilinear'):
+    B, _, H, W = x.size()
+    base_grid = mesh_grid(B, H, W).type_as(x)  # B2HW
+    v_grid = norm_grid(base_grid + flow12)  # BHW2
+    im1_recons = nn.functional.grid_sample(x, v_grid, mode=mode, padding_mode=pad)
+    return im1_recons
+def get_occu_mask_bidirection(flow12, flow21, mask, scale=1, bias=0.5):
+    flow21_warped = flow_warp(flow21, flow12, pad='zeros')
+    flow12_diff = flow12 + flow21_warped
+    mag = (flow12 * flow12).sum(1, keepdim=True) + \
+          (flow21_warped * flow21_warped).sum(1, keepdim=True)
+    occ_thresh = scale * mag + bias
+    occu = (flow12_diff * flow12_diff).sum(1, keepdim=True) > occ_thresh
+    # soft_occu = 1.0 / (1 + torch.exp(diff) / 5.0)
+    # print("forward:", diff.max(), diff.min())
+    return occu
+def get_occu_mask_backward(flow21, th=0.2):
+    B, _, H, W = flow21.size()
+    base_grid = mesh_grid(B, H, W).type_as(flow21)  # B2HW
+    corr_map = get_corresponding_map(base_grid + flow21)  # BHW
+    occu_mask = corr_map.clamp(min=0., max=1.) < th
+    return occu_mask.float()
+def get_ssv_weights(cycle_corres, input, mask, scale_value):
+    vgrid = (cycle_corres.mul(scale_value) - 1.0).permute(0,2,3,1)
+    new_input = nn.functional.grid_sample(input, vgrid, align_corners=True, padding_mode='border')
+    color_diff = (((input[:, :3, :, :] - new_input[:, :3, :, :]) / 255.0) ** 2).sum(1, keepdim=True)
+    depth_diff = (((input[:, 3:, :, :] - new_input[:, 3:, :, :])) ** 2).sum(1, keepdim=True)
+    diff = torch.mul(mask.float(), color_diff + depth_diff) #(N, 1, H, W)
+    return torch.exp(-diff)

data_utils/UNFaceFlow/data_test_flow/__init__.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import importlib
+import torch.utils.data
+from data_test_flow.dd_dataset import DDDataset
+def CreateDataLoader(opt):
+    data_loader = CustomDatasetDataLoader()
+    data_loader.initialize(opt)
+    return data_loader
+# def CreateTestDataLoader(opt):
+#     data_loader = CustomTestDatasetDataLoader()
+#     data_loader.initialize(opt)
+#     return data_loader
+class BaseDataLoader():
+    def __init__(self):
+        pass
+    def initialize(self, opt):
+        self.opt = opt
+        pass
+    def load_data(self):
+        return None
+class CustomDatasetDataLoader(BaseDataLoader):
+    def name(self):
+        return 'CustomDatasetDataLoader'
+    def initialize(self, opt):
+        BaseDataLoader.initialize(self, opt)
+        self.dataset = DDDataset()
+        self.dataset.initialize(opt)
+        '''
+        sampler = torch.utils.data.distributed.DistributedSampler(self.dataset)
+        self.dataloader = torch.utils.data.DataLoader(
+            self.dataset,
+            batch_size=opt.batch_size,
+            shuffle=False,
+            sampler=sampler)
+        '''
+        self.dataloader = torch.utils.data.DataLoader(
+            self.dataset,
+            batch_size=opt.batch_size,
+            shuffle=opt.shuffle,
+            drop_last=True,
+            num_workers=int(opt.num_threads))
+    def load_data(self):
+        return self
+    def __len__(self):
+        return min(len(self.dataset), self.opt.max_dataset_size)
+    def __iter__(self):
+        for i, data in enumerate(self.dataloader):
+            if i * self.opt.batch_size >= self.opt.max_dataset_size:
+                break
+            yield data
+# class CustomTestDatasetDataLoader(BaseDataLoader):
+#     def name(self):
+#         return 'CustomDatasetDataLoader'
+#     def initialize(self, opt):
+#         BaseDataLoader.initialize(self, opt)
+#         self.dataset = DDDatasetTest()
+#         self.dataset.initialize(opt)
+#         '''
+#         sampler = torch.utils.data.distributed.DistributedSampler(self.dataset)
+#         self.dataloader = torch.utils.data.DataLoader(
+#             self.dataset,
+#             batch_size=opt.batch_size,
+#             shuffle=False,
+#             sampler=sampler)
+#         '''
+#         self.dataloader = torch.utils.data.DataLoader(
+#             self.dataset,
+#             batch_size=opt.batch_size,
+#             shuffle=opt.shuffle,
+#             drop_last=True,
+#             num_workers=int(opt.num_threads))
+#     def load_data(self):
+#         return self
+#     def __len__(self):
+#         return min(len(self.dataset), self.opt.max_dataset_size)
+#     def __iter__(self):
+#         for i, data in enumerate(self.dataloader):
+#             if i * self.opt.batch_size >= self.opt.max_dataset_size:
+#                 break
+#             yield data

data_utils/UNFaceFlow/data_test_flow/base_dataset.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import torch.utils.data as data
+from PIL import Image
+import torchvision.transforms as transforms
+class BaseDataset(data.Dataset):
+    def __init__(self):
+        super(BaseDataset, self).__init__()
+    def name(self):
+        return 'BaseDataset'
+    def initialize(self, opt):
+        pass
+    def __len__(self):
+        return 0
+def get_transform(opt):
+    transform_list = []
+    if opt.resize_or_crop == 'resize_and_crop':
+        osize = [opt.loadSize, opt.loadSize]
+        transform_list.append(transforms.Resize(osize, Image.BICUBIC))
+        transform_list.append(transforms.RandomCrop(opt.fineSize))
+    elif opt.resize_or_crop == 'crop':
+        transform_list.append(transforms.RandomCrop(opt.fineSize))
+    elif opt.resize_or_crop == 'scale_width':
+        transform_list.append(transforms.Lambda(
+            lambda img: __scale_width(img, opt.fineSize)))
+    elif opt.resize_or_crop == 'scale_width_and_crop':
+        transform_list.append(transforms.Lambda(
+            lambda img: __scale_width(img, opt.loadSize)))
+        transform_list.append(transforms.RandomCrop(opt.fineSize))
+    elif opt.resize_or_crop == 'none':
+        transform_list.append(transforms.Lambda(
+            lambda img: __adjust(img)))
+    else:
+        raise ValueError('--resize_or_crop %s is not a valid option.' % opt.resize_or_crop)
+    if opt.isTrain and not opt.no_flip:
+        transform_list.append(transforms.RandomHorizontalFlip())
+    transform_list += [transforms.ToTensor(),
+                       transforms.Normalize((0.5, 0.5, 0.5),
+                                            (0.5, 0.5, 0.5))]
+    return transforms.Compose(transform_list)
+# just modify the width and height to be multiple of 4
+def __adjust(img):
+    ow, oh = img.size
+    # the size needs to be a multiple of this number,
+    # because going through generator network may change img size
+    # and eventually cause size mismatch error
+    mult = 4
+    if ow % mult == 0 and oh % mult == 0:
+        return img
+    w = (ow - 1) // mult
+    w = (w + 1) * mult
+    h = (oh - 1) // mult
+    h = (h + 1) * mult
+    if ow != w or oh != h:
+        __print_size_warning(ow, oh, w, h)
+    return img.resize((w, h), Image.BICUBIC)
+def __scale_width(img, target_width):
+    ow, oh = img.size
+    # the size needs to be a multiple of this number,
+    # because going through generator network may change img size
+    # and eventually cause size mismatch error
+    mult = 4
+    assert target_width % mult == 0, "the target width needs to be multiple of %d." % mult
+    if (ow == target_width and oh % mult == 0):
+        return img
+    w = target_width
+    target_height = int(target_width * oh / ow)
+    m = (target_height - 1) // mult
+    h = (m + 1) * mult
+    if target_height != h:
+        __print_size_warning(target_width, target_height, w, h)
+    return img.resize((w, h), Image.BICUBIC)
+def __print_size_warning(ow, oh, w, h):
+    if not hasattr(__print_size_warning, 'has_printed'):
+        print("The image size needs to be a multiple of 4. "
+              "The loaded image size was (%d, %d), so it was adjusted to "
+              "(%d, %d). This adjustment will be done to all images "
+              "whose sizes are not multiples of 4" % (ow, oh, w, h))
+        __print_size_warning.has_printed = True

data_utils/UNFaceFlow/data_test_flow/dd_dataset.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import os.path
+import torch
+import torch.utils.data as data
+from PIL import Image
+import random
+import utils
+import numpy as np
+import torchvision.transforms as transforms
+from utils_core import flow_viz
+import cv2
+class DDDataset(data.Dataset):
+    def __init__(self):
+        super(DDDataset, self).__init__()
+    def initialize(self, opt):
+        self.opt = opt
+        self.dir_txt = opt.datapath
+        self.paths = []
+        in_file = open(self.dir_txt, "r")
+        k = 0
+        list_paths = in_file.readlines()
+        for line in list_paths:
+            #if k>=20: break
+            flag = False
+            line = line.strip()
+            line = line.split()
+            #source data
+            if (not os.path.exists(line[0])):
+                print(line[0]+" not exists")
+                continue
+            if (not os.path.exists(line[1])):
+                print(line[1]+" not exists")
+                continue
+            if (not os.path.exists(line[2])):
+                print(line[2]+" not exists")
+                continue
+            if (not os.path.exists(line[3])):
+                print(line[3]+" not exists")
+                continue
+            # if (not os.path.exists(line[2])):
+            #     print(line[2]+" not exists")
+            #     continue
+            # path_list = [line[0], line[1], line[2]]
+            path_list = [line[0], line[1], line[2], line[3]]
+            self.paths.append(path_list)
+            k += 1
+        in_file.close()
+        self.data_size = len(self.paths)
+        print("num data: ", len(self.paths))
+    def process_data(self, color, mask):
+        non_zero = mask.nonzero()
+        bound = 10
+        min_x = max(0, non_zero[1].min()-bound)
+        max_x = min(self.opt.width-1, non_zero[1].max()+bound)
+        min_y = max(0, non_zero[0].min()-bound)
+        max_y = min(self.opt.height-1, non_zero[0].max()+bound)
+        color = color * (mask!=0).astype(float)[:, :, None]
+        crop_color = color[min_y:max_y, min_x:max_x, :]
+        crop_color = cv2.resize(np.ascontiguousarray(crop_color), (self.opt.crop_width, self.opt.crop_height), interpolation=cv2.INTER_LINEAR)
+        crop_params = [[min_x], [max_x], [min_y], [max_y]]
+        return crop_color, crop_params
+    def __getitem__(self, index):
+        paths = self.paths[index % self.data_size]
+        src_color = np.array(Image.open(paths[0]))
+        src_color = src_color.astype(np.uint8)
+        raw_src_color = src_color.copy()
+        src_mask = np.array(Image.open(paths[1]))[:, :, 0]
+        cv2.imwrite("test_mask.png", src_mask)
+        src_mask_copy = src_mask.copy()
+        src_crop_color, src_crop_params = self.process_data(src_color, src_mask)
+        #self.write_mesh(src_X, src_Y, src_Z, "./tmp/src.obj")
+        #HWC --> CHW,
+        raw_src_color = torch.from_numpy(raw_src_color).permute(2, 0, 1).float() / 255.0
+        src_crop_color = torch.from_numpy(src_crop_color).permute(2, 0, 1).float() / 255.0
+        src_mask_copy = (src_mask_copy!=0)
+        src_mask_copy = torch.tensor(src_mask_copy[np.newaxis, :, :])
+        tar_color = np.array(Image.open(paths[2]))
+        tar_color = tar_color.astype(np.uint8)
+        raw_tar_color = tar_color.copy()
+        tar_mask = np.array(Image.open(paths[3]))[:, :, 0]
+        tar_mask_copy = tar_mask.copy()
+        tar_crop_color, tar_crop_params = self.process_data(tar_color, tar_mask)
+        raw_tar_color = torch.from_numpy(raw_tar_color).permute(2, 0, 1).float() / 255.0
+        tar_crop_color = torch.from_numpy(tar_crop_color).permute(2, 0, 1).float() / 255.0
+        tar_mask_copy = (tar_mask_copy!=0)
+        tar_mask_copy = torch.tensor(tar_mask_copy[np.newaxis, :, :])
+        Crop_param = torch.tensor(src_crop_params+tar_crop_params)
+        split_ = paths[0].split("/")
+        path1 = split_[-1][:-4] + "_" + paths[2].split("/")[-1][:-4] +".oflow"
+        return {"path_flow":path1, "src_crop_color":src_crop_color, "tar_crop_color":tar_crop_color, "src_color":raw_src_color, "tar_color":raw_tar_color, "src_mask":src_mask_copy, "tar_mask":tar_mask_copy, "Crop_param":Crop_param}
+    def __len__(self):
+        return self.data_size
+    def name(self):
+        return 'DDDataset'

data_utils/UNFaceFlow/data_test_flow/dd_dataset_bak.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import os.path
+import torch
+import torch.utils.data as data
+from PIL import Image
+import random
+import utils
+import numpy as np
+import torchvision.transforms as transforms
+from utils_core import flow_viz
+import cv2
+class DDDataset(data.Dataset):
+    def __init__(self):
+        super(DDDataset, self).__init__()
+    def initialize(self, opt):
+        self.opt = opt
+        self.dir_txt = opt.datapath
+        self.paths = []
+        in_file = open(self.dir_txt, "r")
+        k = 0
+        list_paths = in_file.readlines()
+        for line in list_paths:
+            #if k>=20: break
+            flag = False
+            line = line.strip()
+            line = line.split()
+            #source data
+            if (not os.path.exists(line[0])):
+                print(line[0]+" not exists")
+                continue
+            if (not os.path.exists(line[1])):
+                print(line[1]+" not exists")
+                continue
+            if (not os.path.exists(line[2])):
+                print(line[2]+" not exists")
+                continue
+            if (not os.path.exists(line[3])):
+                print(line[3]+" not exists")
+                continue
+            # if (not os.path.exists(line[2])):
+            #     print(line[2]+" not exists")
+            #     continue
+            # path_list = [line[0], line[1], line[2]]
+            path_list = [line[0], line[1], line[2], line[3]]
+            self.paths.append(path_list)
+            k += 1
+        in_file.close()
+        self.data_size = len(self.paths)
+        print("num data: ", len(self.paths))
+    def process_data(self, color, mask):
+        non_zero = mask.nonzero()
+        bound = 10
+        min_x = max(0, non_zero[1].min()-bound)
+        max_x = min(self.opt.width-1, non_zero[1].max()+bound)
+        min_y = max(0, non_zero[0].min()-bound)
+        max_y = min(self.opt.height-1, non_zero[0].max()+bound)
+        color = color * (mask!=0).astype(float)[:, :, None]
+        crop_color = color[min_y:max_y, min_x:max_x, :]
+        crop_color = cv2.resize(np.ascontiguousarray(crop_color), (self.opt.crop_width, self.opt.crop_height), interpolation=cv2.INTER_LINEAR)
+        crop_params = [[min_x], [max_x], [min_y], [max_y]]
+        return crop_color, crop_params
+    def __getitem__(self, index):
+        paths = self.paths[index % self.data_size]
+        src_color = np.array(Image.open(paths[0]))
+        src_color = src_color.astype(np.uint8)
+        raw_src_color = src_color.copy()
+        src_mask = np.array(Image.open(paths[1]))
+        src_mask_copy = src_mask.copy()
+        src_crop_color, src_crop_params = self.process_data(src_color, src_mask)
+        #self.write_mesh(src_X, src_Y, src_Z, "./tmp/src.obj")
+        #HWC --> CHW,
+        raw_src_color = torch.from_numpy(raw_src_color).permute(2, 0, 1).float() / 255.0
+        src_crop_color = torch.from_numpy(src_crop_color).permute(2, 0, 1).float() / 255.0
+        src_mask_copy = (src_mask_copy!=0)
+        src_mask_copy = torch.tensor(src_mask_copy[np.newaxis, :, :])
+        tar_color = np.array(Image.open(paths[2]))
+        tar_color = tar_color.astype(np.uint8)
+        raw_tar_color = tar_color.copy()
+        tar_mask = np.array(Image.open(paths[3]))
+        tar_mask_copy = tar_mask.copy()
+        tar_crop_color, tar_crop_params = self.process_data(tar_color, tar_mask)
+        raw_tar_color = torch.from_numpy(raw_tar_color).permute(2, 0, 1).float() / 255.0
+        tar_crop_color = torch.from_numpy(tar_crop_color).permute(2, 0, 1).float() / 255.0
+        tar_mask_copy = (tar_mask_copy!=0)
+        tar_mask_copy = torch.tensor(tar_mask_copy[np.newaxis, :, :])
+        Crop_param = torch.tensor(src_crop_params+tar_crop_params)
+        split_ = paths[0].split("/")
+        path1 = split_[-1][:-4] + "_" + paths[2].split("/")[-1][:-4] +".oflow"
+        return {"path_flow":path1, "src_crop_color":src_crop_color, "tar_crop_color":tar_crop_color, "src_color":raw_src_color, "tar_color":raw_tar_color, "src_mask":src_mask_copy, "tar_mask":tar_mask_copy, "Crop_param":Crop_param}
+    def __len__(self):
+        return self.data_size
+    def name(self):
+        return 'DDDataset'

data_utils/UNFaceFlow/models/network_test_flow.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from raft import RAFT
+from nnutils import make_conv_2d, make_upscale_2d, make_downscale_2d, ResBlock2d, Identity
+class ImportanceWeights(torch.nn.Module):
+    def __init__(self, opt):
+        super().__init__()
+        if opt.small:
+            in_dim = 128
+        else:
+            in_dim = 256
+        fn_0 = 16
+        self.input_fn = fn_0 + 3 * 2
+        fn_1 = 16
+        self.conv1 = torch.nn.Conv2d(in_channels=in_dim, out_channels=fn_0, kernel_size=3, stride=1, padding=1)
+        if opt.use_batch_norm:
+            custom_batch_norm = torch.nn.BatchNorm2d
+        else:
+            custom_batch_norm = Identity
+        self.model = nn.Sequential(
+            make_conv_2d(self.input_fn, fn_1, n_blocks=1, normalization=custom_batch_norm),
+            ResBlock2d(fn_1, normalization=custom_batch_norm),
+            ResBlock2d(fn_1, normalization=custom_batch_norm),
+            ResBlock2d(fn_1, normalization=custom_batch_norm),
+            nn.Conv2d(fn_1, 1, kernel_size=3, padding=1)
+            # torch.nn.Sigmoid()
+        )
+    def forward(self, x, features):
+        # Reduce number of channels and upscale to highest resolution
+        features = self.conv1(features)
+        x = torch.cat([features, x], 1)
+        assert x.shape[1] == self.input_fn
+        x = self.model(x)
+        print(x)
+        print(x.max(), x.min(), x.mean())
+        return torch.nn.Sigmoid()(x)
+class NeuralNRT(nn.Module):
+    def __init__(self, opt, path=None, device="cuda:0"):
+        super(NeuralNRT, self).__init__()
+        self.opt = opt
+        self.CorresPred = RAFT(opt)
+        self.ImportanceW = ImportanceWeights(opt)
+        if path is not None:
+            data = torch.load(path,map_location='cpu')
+            if 'state_dict' in data.keys():
+                self.CorresPred.load_state_dict(data['state_dict'])
+                print("load done")
+            else:
+                self.CorresPred.load_state_dict({k.replace('module.', ''):v for k,v in data.items()})
+                print("load done")
+    def forward(self, src_im,tar_im, src_im_raw, tar_im_raw, Crop_param):
+        N=src_im.shape[0]
+        src_im = src_im*255.0
+        tar_im = tar_im*255.0
+        flow_fw_crop, feature_fw_crop = self.CorresPred(src_im, tar_im, iters=self.opt.iters)
+        xx = torch.arange(0, self.opt.width).view(1,-1).repeat(self.opt.height,1)
+        yy = torch.arange(0, self.opt.height).view(-1,1).repeat(1,self.opt.width)
+        xx = xx.view(1,1,self.opt.height,self.opt.width).repeat(N,1,1,1)
+        yy = yy.view(1,1,self.opt.height,self.opt.width).repeat(N,1,1,1)
+        grid = torch.cat((xx,yy),1).float()
+        grid = grid.to(src_im.device)
+        grid_crop = grid[:, :, :self.opt.crop_height, :self.opt.crop_width]
+        flow_fw = torch.zeros((N, 2, self.opt.height, self.opt.width), device=src_im.device)
+        leftup1 = torch.cat((Crop_param[:, 0:1, 0], Crop_param[:, 2:3, 0]), 1)[:, :, None, None]
+        leftup2 = torch.cat((Crop_param[:, 4:5, 0], Crop_param[:, 6:7, 0]), 1)[:, :, None, None]
+        scale1 = torch.cat(((Crop_param[:, 1:2, 0]-Crop_param[:, 0:1, 0]).float() / self.opt.crop_width, (Crop_param[:, 3:4, 0]-Crop_param[:, 2:3, 0]).float() / self.opt.crop_height), 1)[:, :, None, None]
+        scale2 = torch.cat(((Crop_param[:, 5:6, 0]-Crop_param[:, 4:5, 0]).float() / self.opt.crop_width, (Crop_param[:, 7:8, 0]-Crop_param[:, 6:7, 0]).float() / self.opt.crop_height), 1)[:, :, None, None]
+        flow_fw_crop = (scale2 - scale1) * grid_crop + scale2 * flow_fw_crop
+        for i in range(N):
+            flow_fw_cropi = F.interpolate(flow_fw_crop[i:(i+1)], ((Crop_param[i, 3, 0]-Crop_param[i, 2, 0]).item(), (Crop_param[i, 1, 0]-Crop_param[i, 0, 0]).item()), mode='bilinear', align_corners=True)
+            flow_fw_cropi  =flow_fw_cropi + (leftup2 - leftup1)[i:(i+1), :, :, :]
+            flow_fw[i, :, Crop_param[i, 2, 0]:Crop_param[i, 3, 0], Crop_param[i, 0, 0]:Crop_param[i, 1, 0]] = flow_fw_cropi[0]
+        return flow_fw

data_utils/UNFaceFlow/options_test_flow.py ADDED Viewed

	@@ -0,0 +1,123 @@

+# ref:https://github.com/ShunyuYao/DFA-NeRF
+import argparse
+class BaseOptions():
+    def __init__(self):
+        self.parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+        self.initialized = False
+    def initialize(self):
+        # self.parser.add_argument('--model_save_path', type=str, default='snapshot/small_filter_wo_ct_wi_bn/real_data/combine/', help='path')
+        self.parser.add_argument('--model_save_path', type=str, default='snapshot/version1/', help='path')
+        self.parser.add_argument('--num_threads', type=int, default=2, help='number of threads')
+        self.parser.add_argument('--max_dataset_size', type=int, default=150000, help='max dataset size')
+        self.parser.add_argument('--n_epochs', type=int, default=40000, help='number of iterations')
+        self.parser.add_argument('--dropout', type=float, default=0.0, help='dropout')
+        self.parser.add_argument('--init_type', type=str, default='uniform', help='[uniform | xavier]')
+        self.parser.add_argument('--frequency_print_batch', type=int, default=1000, help='print messages every set iter')
+        self.parser.add_argument('--frequency_save_model', type=int, default=2000, help='save model every set iter')
+        self.parser.add_argument('--small', type=bool, default=True, help='use small model')
+        self.parser.add_argument('--use_batch_norm', action='store_true', help='')
+        self.parser.add_argument('--smooth_2nd', type=bool, default=True, help='')
+        #loss weight for Gauss-Newton optimization
+        self.parser.add_argument('--lambda_2d', type=float, default=0.001, help='weight of 2D projection loss')
+        self.parser.add_argument('--lambda_depth', type=float, default=1.0, help='weight of depth loss')
+        self.parser.add_argument('--lambda_reg', type=float, default=1.0, help='weight of regularization loss')
+        self.parser.add_argument('--num_adja', type=int, default=6, help='number of nodes who affect a point')
+        self.parser.add_argument('--num_corres', type=int, default=20000, help='number of corres')
+        self.parser.add_argument('--iter_num', type=int, default=3, help='GN iter num')
+        self.parser.add_argument('--width', type=int, default=512, help='image width')#480
+        self.parser.add_argument('--height', type=int, default=512, help='image height')#640
+        self.parser.add_argument('--crop_width', type=int, default=240, help='image width')
+        self.parser.add_argument('--crop_height', type=int, default=320, help='image height')
+        self.parser.add_argument('--max_num_edges', type=int, default=30000, help='number of edges')
+        self.parser.add_argument('--max_num_nodes', type=int, default=1500, help='number of edges')
+        self.parser.add_argument('--fdim', type=int, default=128)
+        #loss weight for training
+        self.parser.add_argument('--lambda_weights', type=float, default=0.0, help='weight of weights loss')#75
+        self.parser.add_argument('--lambda_corres', type=float, default=1.0, help='weight of corres loss')#0, 1
+        self.parser.add_argument('--lambda_graph', type=float, default=10.0, help='weight of graph loss')#1000, 5
+        self.parser.add_argument('--lambda_warp', type=float, default=10.0, help='weight of warp loss')#1000, 5
+    def parse(self):
+        if not self.initialized:
+            self.initialize()
+        self.opt = self.parser.parse_args()
+        self.opt.isTrain = self.isTrain
+        self.opt.isTest = self.isTest
+        args = vars(self.opt)
+        return self.opt
+class TrainOptions(BaseOptions):
+    # Override
+    def initialize(self):
+        BaseOptions.initialize(self)
+        #syn_datasets/syn_new_train_data.txt
+        self.parser.add_argument('--datapath', type=str, default='./data/train_data.txt', help='path')
+        self.parser.add_argument('--pretrain_model_path', type=str, default='./pretrain_model/raft-small.pth', help='path')#
+        self.parser.add_argument('--lr_C', type=float, default=0.00001, help='initial learning rate')#0.01
+        self.parser.add_argument('--optimizer_C', type=str, default='sgd', help='[sgd | adam]')
+        self.parser.add_argument('--lr_W', type=float, default=0.00001, help='initial learning rate')
+        self.parser.add_argument('--lr_BSW', type=float, default=0.00001, help='initial learning rate')
+        self.parser.add_argument('--optimizer_W', type=str, default='sgd', help='[sgd | adam]')
+        self.parser.add_argument('--optimizer_BSW', type=str, default='sgd', help='[sgd | adam]')
+        self.parser.add_argument('--lr_decay_epoch', type=int, default=8000, help='multiply by a gamma every set iter')
+        self.parser.add_argument('--lr_decay', type=float, default=0.1, help='coefficient of lr decay')
+        self.parser.add_argument('--weight_decay', type=float, default=1e-4, help='0.0005coefficient of weight decay')
+        self.parser.add_argument('--batch_size', type=int, default=4, help='batch size')
+        self.parser.add_argument('--shuffle', type=bool, default=True, help='whether to shuffle data')
+        self.parser.add_argument('--validation', type=str, nargs='+')
+        #self.parser.add_argument('--image_size', type=int, nargs='+', default=[384, 512])
+        self.parser.add_argument('--gpus', type=int, nargs='+', default=[0,1])
+        self.parser.add_argument('--mixed_precision', action='store_true', help='use mixed precision')
+        self.parser.add_argument('--iters', type=int, default=12)
+        self.parser.add_argument('--clip', type=float, default=1.0)
+        self.parser.add_argument('--gamma', type=float, default=0.8, help='exponential weighting')
+        self.parser.add_argument('--add_noise', action='store_true')
+        self.parser.add_argument('--train_bsw', type=bool, default=True, help='whether to train bsw network')
+        self.parser.add_argument('--train_weight', type=bool, default=True, help='whether to train weight network')
+        self.parser.add_argument('--train_corres', type=bool, default=True, help='whether to train corresPred network')
+        self.isTrain = True
+        self.isTest = False
+class ValOptions(BaseOptions):
+    def initialize(self):
+        BaseOptions.initialize(self)
+        self.parser.add_argument('--batch_size', type=int, default=4, help='batch size')
+        self.parser.add_argument('--datapath', type=str, default='./data/val_data.txt', help='path')
+        self.parser.add_argument('--shuffle', type=bool, default=True, help='whether to shuffle data')
+        self.parser.add_argument('--mixed_precision', action='store_true', help='use mixed precision')
+        self.parser.add_argument('--alternate_corr', action='store_true', help='use efficent correlation implementation')
+        self.parser.add_argument('--iters', type=int, default=12)
+        self.isTrain = True
+        self.isTest = False
+class TestOptions(BaseOptions):
+    def initialize(self):
+        BaseOptions.initialize(self)
+        self.parser.add_argument('--batch_size', type=int, default=1, help='batch size')
+        self.parser.add_argument('--pretrain_model_path', type=str, default='./pretrain_model/raft-small.pth', help='path')#
+        # self.parser.add_argument('--datapath', type=str, default='./data/real_train_data_1128_1.txt', help='path')
+        # self.parser.add_argument('--datapath', type=str, default='./data_test_flow/test_data.txt', help='path')
+        self.parser.add_argument('--savepath', type=str, default='flow_result',
+                        help='save path')
+        self.parser.add_argument('--datapath', type=str, default='/data_b/yudong/paper_code/TalkingHead-NeRF/data_guancha/guancha_flow.txt',
+            help='path')
+        self.parser.add_argument('--mixed_precision', action='store_true', help='use mixed precision')
+        self.parser.add_argument('--alternate_corr', action='store_true', help='use efficent correlation implementation')
+        self.parser.add_argument('--iters', type=int, default=12)
+        self.parser.add_argument('--shuffle', type=bool, default=True, help='whether to shuffle data')
+        self.isTrain = False
+        self.isTest = True

data_utils/UNFaceFlow/pretrain_model/raft-small.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7d41b9cc88442bb8aa911dbb33086dac55a226394b142937ff22d5578717332
+size 3984814

data_utils/UNFaceFlow/sgd_NNRT_model_epoch19008_50000.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a8156ef276732a4cbd9a9d85d9b5653abf500976372daf7892b122971a7b8f37
+size 8808087

data_utils/UNFaceFlow/test_flow.py ADDED Viewed

	@@ -0,0 +1,62 @@

+# ref:https://github.com/ShunyuYao/DFA-NeRF
+import sys
+import os
+from tqdm import tqdm
+dir_path = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(os.path.join(dir_path, 'core'))
+from pathlib import Path
+from data_test_flow import *
+from models.network_test_flow import NeuralNRT
+from options_test_flow import TestOptions
+import torch
+import numpy as np
+def save_flow_numpy(filename, flow_input):
+    np.save(filename, flow_input)
+def predict(data):
+    with torch.no_grad():
+        model.eval()
+        path_flow = data["path_flow"]
+        src_crop_im = data["src_crop_color"].cuda()
+        tar_crop_im = data["tar_crop_color"].cuda()
+        src_im = data["src_color"].cuda()
+        tar_im = data["tar_color"].cuda()
+        src_mask = data["src_mask"].cuda()
+        crop_param = data["Crop_param"].cuda()
+        B = src_mask.shape[0]
+        flow = model(src_crop_im, tar_crop_im, src_im, tar_im, crop_param)
+        for i in range(B):
+            flow_tmp = flow[i].cpu().numpy() * src_mask[i].cpu().numpy()
+            save_flow_numpy(os.path.join(save_path, os.path.basename(
+                path_flow[i])[:-6]+".npy"), flow_tmp)
+if __name__ == "__main__":
+    width = 272
+    height = 480
+    test_opts = TestOptions().parse()
+    test_opts.pretrain_model_path = os.path.join(
+        dir_path, 'pretrain_model/raft-small.pth')
+    data_loader = CreateDataLoader(test_opts)
+    testloader = data_loader.load_data()
+    model_path = os.path.join(dir_path, 'sgd_NNRT_model_epoch19008_50000.pth')
+    model = NeuralNRT(test_opts, os.path.join(
+        dir_path, 'pretrain_model/raft-small.pth'))
+    state_dict = torch.load(model_path)
+    model.CorresPred.load_state_dict(state_dict["net_C"])
+    model.ImportanceW.load_state_dict(state_dict["net_W"])
+    model = model.cuda()
+    save_path = test_opts.savepath
+    Path(save_path).mkdir(parents=True, exist_ok=True)
+    total_length = len(testloader)
+    for batch_idx, data in tqdm(enumerate(testloader), total=total_length):
+        predict(data)

data_utils/UNFaceFlow/utils.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import os
+import shutil
+import numpy as np
+import struct
+import pickle
+from scipy.sparse import coo_matrix
+def load_flow(filename):
+    # Flow is stored row-wise in order [channels, height, width].
+    assert os.path.isfile(filename), "File not found: {}".format(filename)
+    flow = None
+    with open(filename, 'rb') as fin:
+        width = struct.unpack('I', fin.read(4))[0]
+        height = struct.unpack('I', fin.read(4))[0]
+        channels = struct.unpack('I', fin.read(4))[0]
+        n_elems = height * width * channels
+        flow = struct.unpack('f' * n_elems, fin.read(n_elems * 4))
+        flow = np.asarray(flow, dtype=np.float32).reshape([channels, height, width])
+    return flow
+def load_graph_info(filename, max_edges, max_nodes):
+    assert os.path.isfile(filename), "File not found: {}".format(filename)
+    with open(filename, 'rb') as fin:
+        edge_total_size = struct.unpack('I', fin.read(4))[0]
+        edges = struct.unpack('I' * (int(edge_total_size / 4)), fin.read(edge_total_size))
+        edges = np.asarray(edges, dtype=np.int16).reshape(-1, 2).transpose()
+        nodes_total_size = struct.unpack('I', fin.read(4))[0]
+        nodes_ids = struct.unpack('I' * (int(nodes_total_size / 4)), fin.read(nodes_total_size))
+        nodes_ids = np.asarray(nodes_ids, dtype=np.int32).reshape(-1)
+        nodes_ids = np.sort(nodes_ids)
+        edges_extent = np.zeros((2, max_edges), dtype=np.int16)
+        edges_mask = np.zeros((max_edges), dtype=np.bool)
+        edges_mask[:edges.shape[1]] = 1
+        edges_extent[:, :edges.shape[1]] = edges
+        nodes_extent = np.zeros((max_nodes), dtype=np.int32)
+        nodes_mask = np.zeros((max_nodes), dtype=np.bool)
+        nodes_mask[:nodes_ids.shape[0]] = 1
+        nodes_extent[:nodes_ids.shape[0]] = nodes_ids
+        fx = struct.unpack('f', fin.read(4))[0]
+        fy = struct.unpack('f', fin.read(4))[0]
+        ox = struct.unpack('f', fin.read(4))[0]
+        oy = struct.unpack('f', fin.read(4))[0]
+    return edges_extent, edges_mask, nodes_extent, nodes_mask, fx, fy, ox, oy
+def load_adja_id_info(filename, src_mask, H, W, num_adja, num_neigb):
+    assert os.path.isfile(filename), "File not found: {}".format(filename)
+    assert num_adja<=8, "Num of adja is larger than 8"
+    assert num_neigb<=8, "Num of neighb is larger than 8"
+    src_v_id = np.zeros((H*W, num_adja), dtype=np.int16)
+    src_neigb_id = np.zeros((H*W, num_neigb), dtype=np.int32)
+    with open(filename, 'rb') as fin:
+        neigb_id, value_id = pickle.load(fin)
+        assert((src_mask.sum())==value_id.shape[0])
+        for i in range(num_adja):
+            src_v_id[src_mask.reshape(-1), i] = value_id[:, i]
+        for i in range(num_neigb):
+            src_neigb_id[src_mask.reshape(-1), i] = neigb_id[:, i]
+    src_v_id = src_v_id.transpose().reshape(num_adja, H, W)
+    src_neigb_id = src_neigb_id.transpose().reshape(num_neigb, H, W)
+    return src_v_id, src_neigb_id
+def save_flow(filename, flow_input):
+    flow = np.copy(flow_input)
+    # Flow is stored row-wise in order [channels, height, width].
+    assert len(flow.shape) == 3
+    with open(filename, 'wb') as fout:
+        fout.write(struct.pack('I', flow.shape[2]))
+        fout.write(struct.pack('I', flow.shape[1]))
+        fout.write(struct.pack('I', flow.shape[0]))
+        fout.write(struct.pack('={}f'.format(flow.size), *flow.flatten("C")))

data_utils/blendshape_capture/face_landmarker.task ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:64184e229b263107bc2b804c6625db1341ff2bb731874b0bcc2fe6544e0bc9ff
+size 3758596

data_utils/blendshape_capture/main.py ADDED Viewed

	@@ -0,0 +1,86 @@

+# -*-coding:utf-8-*-
+import argparse
+import os
+import random
+import numpy as np
+import cv2
+import glob
+from mediapipe import solutions
+from mediapipe.framework.formats import landmark_pb2
+import numpy as np
+import matplotlib.pyplot as plt
+import torch
+import torch.nn as nn
+from scipy.signal import savgol_filter
+import onnxruntime as ort
+from collections import OrderedDict
+import mediapipe as mp
+from mediapipe.tasks import python
+from mediapipe.tasks.python import vision
+from tqdm import tqdm
+def infer_bs(root_path):
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    base_options = python.BaseOptions(model_asset_path="./data_utils/blendshape_capture/face_landmarker.task")
+    options = vision.FaceLandmarkerOptions(base_options=base_options,
+                                           output_face_blendshapes=True,
+                                           output_facial_transformation_matrixes=True,
+                                           num_faces=1)
+    detector = vision.FaceLandmarker.create_from_options(options)
+    for i in os.listdir(root_path):
+        if i.endswith(".mp4"):
+            mp4_path = os.path.join(root_path, i)
+            npy_path = os.path.join(root_path, "bs.npy")
+            if os.path.exists(npy_path):
+                print("npy file exists:", i.split(".")[0])
+                continue
+            else:
+                print("npy file not exists:", i.split(".")[0])
+                image_path = os.path.join(root_path, "img/temp.png")
+                os.makedirs(os.path.join(root_path, 'img/'), exist_ok=True)
+                cap = cv2.VideoCapture(mp4_path)
+                fps = cap.get(cv2.CAP_PROP_FPS)
+                frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+                print("fps:", fps)
+                print("frame_count:", frame_count)
+                k = 0
+                total = frame_count
+                bs = np.zeros((int(total), 52), dtype=np.float32)
+                print("total:", total)
+                print("videoPath:{} fps:{},k".format(mp4_path.split('/')[-1], fps))
+                pbar = tqdm(total=int(total))
+                while (cap.isOpened()):
+                    ret, frame = cap.read()
+                    if ret:
+                        cv2.imwrite(image_path, frame)
+                        image = mp.Image.create_from_file(image_path)
+                        result = detector.detect(image)
+                        face_blendshapes_scores = [face_blendshapes_category.score for face_blendshapes_category in
+                                                   result.face_blendshapes[0]]
+                        blendshape_coef = np.array(face_blendshapes_scores)[1:]
+                        blendshape_coef = np.append(blendshape_coef, 0)
+                        bs[k] = blendshape_coef
+                        pbar.update(1)
+                        k += 1
+                    else:
+                        break
+                cap.release()
+                pbar.close()
+                # np.save(npy_path, bs)
+                # print(np.shape(bs))
+                output = np.zeros((bs.shape[0], bs.shape[1]))
+                for j in range(bs.shape[1]):
+                    output[:, j] = savgol_filter(bs[:, j], 5, 3)
+                np.save(npy_path, output)
+                print(np.shape(output))
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--path", type=str, help="idname of target person")
+    args = parser.parse_args()
+    infer_bs(args.path)

data_utils/deepspeech_features/README.md ADDED Viewed

	@@ -0,0 +1,20 @@

+# Routines for DeepSpeech features processing
+Several routines for [DeepSpeech](https://github.com/mozilla/DeepSpeech) features processing, like speech features generation for [VOCA](https://github.com/TimoBolkart/voca) model.
+## Installation
+```
+pip3 install -r requirements.txt
+```
+## Usage
+Generate wav files:
+```
+python3 extract_wav.py --in-video=<you_data_dir>
+```
+Generate files with DeepSpeech features:
+```
+python3 extract_ds_features.py --input=<you_data_dir>
+```

data_utils/deepspeech_features/deepspeech_features.py ADDED Viewed

	@@ -0,0 +1,275 @@

+"""
+    DeepSpeech features processing routines.
+    NB: Based on VOCA code. See the corresponding license restrictions.
+"""
+__all__ = ['conv_audios_to_deepspeech']
+import numpy as np
+import warnings
+import resampy
+from scipy.io import wavfile
+from python_speech_features import mfcc
+import tensorflow.compat.v1 as tf
+tf.disable_v2_behavior()
+def conv_audios_to_deepspeech(audios,
+                              out_files,
+                              num_frames_info,
+                              deepspeech_pb_path,
+                              audio_window_size=1,
+                              audio_window_stride=1):
+    """
+    Convert list of audio files into files with DeepSpeech features.
+    Parameters
+    ----------
+    audios : list of str or list of None
+        Paths to input audio files.
+    out_files : list of str
+        Paths to output files with DeepSpeech features.
+    num_frames_info : list of int
+        List of numbers of frames.
+    deepspeech_pb_path : str
+        Path to DeepSpeech 0.1.0 frozen model.
+    audio_window_size : int, default 16
+        Audio window size.
+    audio_window_stride : int, default 1
+        Audio window stride.
+    """
+    # deepspeech_pb_path="/disk4/keyu/DeepSpeech/deepspeech-0.9.2-models.pbmm"
+    graph, logits_ph, input_node_ph, input_lengths_ph = prepare_deepspeech_net(
+        deepspeech_pb_path)
+    with tf.compat.v1.Session(graph=graph) as sess:
+        for audio_file_path, out_file_path, num_frames in zip(audios, out_files, num_frames_info):
+            print(audio_file_path)
+            print(out_file_path)
+            audio_sample_rate, audio = wavfile.read(audio_file_path)
+            if audio.ndim != 1:
+                warnings.warn(
+                    "Audio has multiple channels, the first channel is used")
+                audio = audio[:, 0]
+            ds_features = pure_conv_audio_to_deepspeech(
+                audio=audio,
+                audio_sample_rate=audio_sample_rate,
+                audio_window_size=audio_window_size,
+                audio_window_stride=audio_window_stride,
+                num_frames=num_frames,
+                net_fn=lambda x: sess.run(
+                    logits_ph,
+                    feed_dict={
+                        input_node_ph: x[np.newaxis, ...],
+                        input_lengths_ph: [x.shape[0]]}))
+            net_output = ds_features.reshape(-1, 29)
+            win_size = 16
+            zero_pad = np.zeros((int(win_size / 2), net_output.shape[1]))
+            net_output = np.concatenate(
+                (zero_pad, net_output, zero_pad), axis=0)
+            windows = []
+            for window_index in range(0, net_output.shape[0] - win_size, 2):
+                windows.append(
+                    net_output[window_index:window_index + win_size])
+            print(np.array(windows).shape)
+            np.save(out_file_path, np.array(windows))
+def prepare_deepspeech_net(deepspeech_pb_path):
+    """
+    Load and prepare DeepSpeech network.
+    Parameters
+    ----------
+    deepspeech_pb_path : str
+        Path to DeepSpeech 0.1.0 frozen model.
+    Returns
+    -------
+    graph : obj
+        ThensorFlow graph.
+    logits_ph : obj
+        ThensorFlow placeholder for `logits`.
+    input_node_ph : obj
+        ThensorFlow placeholder for `input_node`.
+    input_lengths_ph : obj
+        ThensorFlow placeholder for `input_lengths`.
+    """
+    # Load graph and place_holders:
+    with tf.io.gfile.GFile(deepspeech_pb_path, "rb") as f:
+        graph_def = tf.compat.v1.GraphDef()
+        graph_def.ParseFromString(f.read())
+    graph = tf.compat.v1.get_default_graph()
+    tf.import_graph_def(graph_def, name="deepspeech")
+    logits_ph = graph.get_tensor_by_name("deepspeech/logits:0")
+    input_node_ph = graph.get_tensor_by_name("deepspeech/input_node:0")
+    input_lengths_ph = graph.get_tensor_by_name("deepspeech/input_lengths:0")
+    return graph, logits_ph, input_node_ph, input_lengths_ph
+def pure_conv_audio_to_deepspeech(audio,
+                                  audio_sample_rate,
+                                  audio_window_size,
+                                  audio_window_stride,
+                                  num_frames,
+                                  net_fn):
+    """
+    Core routine for converting audion into DeepSpeech features.
+    Parameters
+    ----------
+    audio : np.array
+        Audio data.
+    audio_sample_rate : int
+        Audio sample rate.
+    audio_window_size : int
+        Audio window size.
+    audio_window_stride : int
+        Audio window stride.
+    num_frames : int or None
+        Numbers of frames.
+    net_fn : func
+        Function for DeepSpeech model call.
+    Returns
+    -------
+    np.array
+        DeepSpeech features.
+    """
+    target_sample_rate = 16000
+    if audio_sample_rate != target_sample_rate:
+        resampled_audio = resampy.resample(
+            x=audio.astype(np.float),
+            sr_orig=audio_sample_rate,
+            sr_new=target_sample_rate)
+    else:
+        resampled_audio = audio.astype(np.float32)
+    input_vector = conv_audio_to_deepspeech_input_vector(
+        audio=resampled_audio.astype(np.int16),
+        sample_rate=target_sample_rate,
+        num_cepstrum=26,
+        num_context=9)
+    network_output = net_fn(input_vector)
+    # print(network_output.shape)
+    deepspeech_fps = 50
+    video_fps = 50  # Change this option if video fps is different
+    audio_len_s = float(audio.shape[0]) / audio_sample_rate
+    if num_frames is None:
+        num_frames = int(round(audio_len_s * video_fps))
+    else:
+        video_fps = num_frames / audio_len_s
+    network_output = interpolate_features(
+        features=network_output[:, 0],
+        input_rate=deepspeech_fps,
+        output_rate=video_fps,
+        output_len=num_frames)
+    # Make windows:
+    zero_pad = np.zeros((int(audio_window_size / 2), network_output.shape[1]))
+    network_output = np.concatenate(
+        (zero_pad, network_output, zero_pad), axis=0)
+    windows = []
+    for window_index in range(0, network_output.shape[0] - audio_window_size, audio_window_stride):
+        windows.append(
+            network_output[window_index:window_index + audio_window_size])
+    return np.array(windows)
+def conv_audio_to_deepspeech_input_vector(audio,
+                                          sample_rate,
+                                          num_cepstrum,
+                                          num_context):
+    """
+    Convert audio raw data into DeepSpeech input vector.
+    Parameters
+    ----------
+    audio : np.array
+        Audio data.
+    audio_sample_rate : int
+        Audio sample rate.
+    num_cepstrum : int
+        Number of cepstrum.
+    num_context : int
+        Number of context.
+    Returns
+    -------
+    np.array
+        DeepSpeech input vector.
+    """
+    # Get mfcc coefficients:
+    features = mfcc(
+        signal=audio,
+        samplerate=sample_rate,
+        numcep=num_cepstrum)
+    # We only keep every second feature (BiRNN stride = 2):
+    features = features[::2]
+    # One stride per time step in the input:
+    num_strides = len(features)
+    # Add empty initial and final contexts:
+    empty_context = np.zeros((num_context, num_cepstrum), dtype=features.dtype)
+    features = np.concatenate((empty_context, features, empty_context))
+    # Create a view into the array with overlapping strides of size
+    # numcontext (past) + 1 (present) + numcontext (future):
+    window_size = 2 * num_context + 1
+    train_inputs = np.lib.stride_tricks.as_strided(
+        features,
+        shape=(num_strides, window_size, num_cepstrum),
+        strides=(features.strides[0],
+                 features.strides[0], features.strides[1]),
+        writeable=False)
+    # Flatten the second and third dimensions:
+    train_inputs = np.reshape(train_inputs, [num_strides, -1])
+    train_inputs = np.copy(train_inputs)
+    train_inputs = (train_inputs - np.mean(train_inputs)) / \
+        np.std(train_inputs)
+    return train_inputs
+def interpolate_features(features,
+                         input_rate,
+                         output_rate,
+                         output_len):
+    """
+    Interpolate DeepSpeech features.
+    Parameters
+    ----------
+    features : np.array
+        DeepSpeech features.
+    input_rate : int
+        input rate (FPS).
+    output_rate : int
+        Output rate (FPS).
+    output_len : int
+        Output data length.
+    Returns
+    -------
+    np.array
+        Interpolated data.
+    """
+    input_len = features.shape[0]
+    num_features = features.shape[1]
+    input_timestamps = np.arange(input_len) / float(input_rate)
+    output_timestamps = np.arange(output_len) / float(output_rate)
+    output_features = np.zeros((output_len, num_features))
+    for feature_idx in range(num_features):
+        output_features[:, feature_idx] = np.interp(
+            x=output_timestamps,
+            xp=input_timestamps,
+            fp=features[:, feature_idx])
+    return output_features

data_utils/deepspeech_features/deepspeech_store.py ADDED Viewed

	@@ -0,0 +1,172 @@

+"""
+    Routines for loading DeepSpeech model.
+"""
+__all__ = ['get_deepspeech_model_file']
+import os
+import zipfile
+import logging
+import hashlib
+deepspeech_features_repo_url = 'https://github.com/osmr/deepspeech_features'
+def get_deepspeech_model_file(local_model_store_dir_path=os.path.join("~", ".tensorflow", "models")):
+    """
+    Return location for the pretrained on local file system. This function will download from online model zoo when
+    model cannot be found or has mismatch. The root directory will be created if it doesn't exist.
+    Parameters
+    ----------
+    local_model_store_dir_path : str, default $TENSORFLOW_HOME/models
+        Location for keeping the model parameters.
+    Returns
+    -------
+    file_path
+        Path to the requested pretrained model file.
+    """
+    sha1_hash = "b90017e816572ddce84f5843f1fa21e6a377975e"
+    file_name = "deepspeech-0_1_0-b90017e8.pb"
+    local_model_store_dir_path = os.path.expanduser(local_model_store_dir_path)
+    file_path = os.path.join(local_model_store_dir_path, file_name)
+    if os.path.exists(file_path):
+        if _check_sha1(file_path, sha1_hash):
+            return file_path
+        else:
+            logging.warning("Mismatch in the content of model file detected. Downloading again.")
+    else:
+        logging.info("Model file not found. Downloading to {}.".format(file_path))
+    if not os.path.exists(local_model_store_dir_path):
+        os.makedirs(local_model_store_dir_path)
+    zip_file_path = file_path + ".zip"
+    _download(
+        url="{repo_url}/releases/download/{repo_release_tag}/{file_name}.zip".format(
+            repo_url=deepspeech_features_repo_url,
+            repo_release_tag="v0.0.1",
+            file_name=file_name),
+        path=zip_file_path,
+        overwrite=True)
+    with zipfile.ZipFile(zip_file_path) as zf:
+        zf.extractall(local_model_store_dir_path)
+    os.remove(zip_file_path)
+    if _check_sha1(file_path, sha1_hash):
+        return file_path
+    else:
+        raise ValueError("Downloaded file has different hash. Please try again.")
+def _download(url, path=None, overwrite=False, sha1_hash=None, retries=5, verify_ssl=True):
+    """
+    Download an given URL
+    Parameters
+    ----------
+    url : str
+        URL to download
+    path : str, optional
+        Destination path to store downloaded file. By default stores to the
+        current directory with same name as in url.
+    overwrite : bool, optional
+        Whether to overwrite destination file if already exists.
+    sha1_hash : str, optional
+        Expected sha1 hash in hexadecimal digits. Will ignore existing file when hash is specified
+        but doesn't match.
+    retries : integer, default 5
+        The number of times to attempt the download in case of failure or non 200 return codes
+    verify_ssl : bool, default True
+        Verify SSL certificates.
+    Returns
+    -------
+    str
+        The file path of the downloaded file.
+    """
+    import warnings
+    try:
+        import requests
+    except ImportError:
+        class requests_failed_to_import(object):
+            pass
+        requests = requests_failed_to_import
+    if path is None:
+        fname = url.split("/")[-1]
+        # Empty filenames are invalid
+        assert fname, "Can't construct file-name from this URL. Please set the `path` option manually."
+    else:
+        path = os.path.expanduser(path)
+        if os.path.isdir(path):
+            fname = os.path.join(path, url.split("/")[-1])
+        else:
+            fname = path
+    assert retries >= 0, "Number of retries should be at least 0"
+    if not verify_ssl:
+        warnings.warn(
+            "Unverified HTTPS request is being made (verify_ssl=False). "
+            "Adding certificate verification is strongly advised.")
+    if overwrite or not os.path.exists(fname) or (sha1_hash and not _check_sha1(fname, sha1_hash)):
+        dirname = os.path.dirname(os.path.abspath(os.path.expanduser(fname)))
+        if not os.path.exists(dirname):
+            os.makedirs(dirname)
+        while retries + 1 > 0:
+            # Disable pyling too broad Exception
+            # pylint: disable=W0703
+            try:
+                print("Downloading {} from {}...".format(fname, url))
+                r = requests.get(url, stream=True, verify=verify_ssl)
+                if r.status_code != 200:
+                    raise RuntimeError("Failed downloading url {}".format(url))
+                with open(fname, "wb") as f:
+                    for chunk in r.iter_content(chunk_size=1024):
+                        if chunk:  # filter out keep-alive new chunks
+                            f.write(chunk)
+                if sha1_hash and not _check_sha1(fname, sha1_hash):
+                    raise UserWarning("File {} is downloaded but the content hash does not match."
+                                      " The repo may be outdated or download may be incomplete. "
+                                      "If the `repo_url` is overridden, consider switching to "
+                                      "the default repo.".format(fname))
+                break
+            except Exception as e:
+                retries -= 1
+                if retries <= 0:
+                    raise e
+                else:
+                    print("download failed, retrying, {} attempt{} left"
+                          .format(retries, "s" if retries > 1 else ""))
+    return fname
+def _check_sha1(filename, sha1_hash):
+    """
+    Check whether the sha1 hash of the file content matches the expected hash.
+    Parameters
+    ----------
+    filename : str
+        Path to the file.
+    sha1_hash : str
+        Expected sha1 hash in hexadecimal digits.
+    Returns
+    -------
+    bool
+        Whether the file content matches the expected hash.
+    """
+    sha1 = hashlib.sha1()
+    with open(filename, "rb") as f:
+        while True:
+            data = f.read(1048576)
+            if not data:
+                break
+            sha1.update(data)
+    return sha1.hexdigest() == sha1_hash

data_utils/deepspeech_features/extract_ds_features.py ADDED Viewed

	@@ -0,0 +1,132 @@

+"""
+    Script for extracting DeepSpeech features from audio file.
+"""
+import os
+import argparse
+import numpy as np
+import pandas as pd
+from deepspeech_store import get_deepspeech_model_file
+from deepspeech_features import conv_audios_to_deepspeech
+def parse_args():
+    """
+    Create python script parameters.
+    Returns
+    -------
+    ArgumentParser
+        Resulted args.
+    """
+    parser = argparse.ArgumentParser(
+        description="Extract DeepSpeech features from audio file",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument(
+        "--input",
+        type=str,
+        required=True,
+        help="path to input audio file or directory")
+    parser.add_argument(
+        "--output",
+        type=str,
+        help="path to output file with DeepSpeech features")
+    parser.add_argument(
+        "--deepspeech",
+        type=str,
+        help="path to DeepSpeech 0.1.0 frozen model")
+    parser.add_argument(
+        "--metainfo",
+        type=str,
+        help="path to file with meta-information")
+    args = parser.parse_args()
+    return args
+def extract_features(in_audios,
+                     out_files,
+                     deepspeech_pb_path,
+                     metainfo_file_path=None):
+    """
+    Real extract audio from video file.
+    Parameters
+    ----------
+    in_audios : list of str
+        Paths to input audio files.
+    out_files : list of str
+        Paths to output files with DeepSpeech features.
+    deepspeech_pb_path : str
+        Path to DeepSpeech 0.1.0 frozen model.
+    metainfo_file_path : str, default None
+        Path to file with meta-information.
+    """
+    #deepspeech_pb_path="/disk4/keyu/DeepSpeech/deepspeech-0.9.2-models.pbmm"
+    if metainfo_file_path is None:
+        num_frames_info = [None] * len(in_audios)
+    else:
+        train_df = pd.read_csv(
+            metainfo_file_path,
+            sep="\t",
+            index_col=False,
+            dtype={"Id": np.int, "File": np.unicode, "Count": np.int})
+        num_frames_info = train_df["Count"].values
+        assert (len(num_frames_info) == len(in_audios))
+    for i, in_audio in enumerate(in_audios):
+        if not out_files[i]:
+            file_stem, _ = os.path.splitext(in_audio)
+            out_files[i] = file_stem + "_ds.npy"
+            #print(out_files[i])
+    conv_audios_to_deepspeech(
+        audios=in_audios,
+        out_files=out_files,
+        num_frames_info=num_frames_info,
+        deepspeech_pb_path=deepspeech_pb_path)
+def main():
+    """
+    Main body of script.
+    """
+    args = parse_args()
+    in_audio = os.path.expanduser(args.input)
+    if not os.path.exists(in_audio):
+        raise Exception("Input file/directory doesn't exist: {}".format(in_audio))
+    deepspeech_pb_path = args.deepspeech
+    #add
+    deepspeech_pb_path = True
+    args.deepspeech = '~/.tensorflow/models/deepspeech-0_1_0-b90017e8.pb'
+    #deepspeech_pb_path="/disk4/keyu/DeepSpeech/deepspeech-0.9.2-models.pbmm"
+    if deepspeech_pb_path is None:
+        deepspeech_pb_path = ""
+    if deepspeech_pb_path:
+        deepspeech_pb_path = os.path.expanduser(args.deepspeech)
+    if not os.path.exists(deepspeech_pb_path):
+        deepspeech_pb_path = get_deepspeech_model_file()
+    if os.path.isfile(in_audio):
+        extract_features(
+            in_audios=[in_audio],
+            out_files=[args.output],
+            deepspeech_pb_path=deepspeech_pb_path,
+            metainfo_file_path=args.metainfo)
+    else:
+        audio_file_paths = []
+        for file_name in os.listdir(in_audio):
+            if not os.path.isfile(os.path.join(in_audio, file_name)):
+                continue
+            _, file_ext = os.path.splitext(file_name)
+            if file_ext.lower() == ".wav":
+                audio_file_path = os.path.join(in_audio, file_name)
+                audio_file_paths.append(audio_file_path)
+        audio_file_paths = sorted(audio_file_paths)
+        out_file_paths = [""] * len(audio_file_paths)
+        extract_features(
+            in_audios=audio_file_paths,
+            out_files=out_file_paths,
+            deepspeech_pb_path=deepspeech_pb_path,
+            metainfo_file_path=args.metainfo)
+if __name__ == "__main__":
+    main()

data_utils/deepspeech_features/extract_wav.py ADDED Viewed

	@@ -0,0 +1,87 @@

+"""
+    Script for extracting audio (16-bit, mono, 22000 Hz) from video file.
+"""
+import os
+import argparse
+import subprocess
+def parse_args():
+    """
+    Create python script parameters.
+    Returns
+    -------
+    ArgumentParser
+        Resulted args.
+    """
+    parser = argparse.ArgumentParser(
+        description="Extract audio from video file",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument(
+        "--in-video",
+        type=str,
+        required=True,
+        help="path to input video file or directory")
+    parser.add_argument(
+        "--out-audio",
+        type=str,
+        help="path to output audio file")
+    args = parser.parse_args()
+    return args
+def extract_audio(in_video,
+                  out_audio):
+    """
+    Real extract audio from video file.
+    Parameters
+    ----------
+    in_video : str
+        Path to input video file.
+    out_audio : str
+        Path to output audio file.
+    """
+    if not out_audio:
+        file_stem, _ = os.path.splitext(in_video)
+        out_audio = file_stem + ".wav"
+    # command1 = "ffmpeg -i {in_video} -vn -acodec copy {aac_audio}"
+    # command2 = "ffmpeg -i {aac_audio} -vn -acodec pcm_s16le -ac 1 -ar 22000 {out_audio}"
+    # command = "ffmpeg -i {in_video} -vn -acodec pcm_s16le -ac 1 -ar 22000 {out_audio}"
+    command = "ffmpeg -i {in_video} -vn -acodec pcm_s16le -ac 1 -ar 16000 {out_audio}"
+    subprocess.call([command.format(in_video=in_video, out_audio=out_audio)], shell=True)
+def main():
+    """
+    Main body of script.
+    """
+    args = parse_args()
+    in_video = os.path.expanduser(args.in_video)
+    if not os.path.exists(in_video):
+        raise Exception("Input file/directory doesn't exist: {}".format(in_video))
+    if os.path.isfile(in_video):
+        extract_audio(
+            in_video=in_video,
+            out_audio=args.out_audio)
+    else:
+        video_file_paths = []
+        for file_name in os.listdir(in_video):
+            if not os.path.isfile(os.path.join(in_video, file_name)):
+                continue
+            _, file_ext = os.path.splitext(file_name)
+            if file_ext.lower() in (".mp4", ".mkv", ".avi"):
+                video_file_path = os.path.join(in_video, file_name)
+                video_file_paths.append(video_file_path)
+        video_file_paths = sorted(video_file_paths)
+        for video_file_path in video_file_paths:
+            extract_audio(
+                in_video=video_file_path,
+                out_audio="")
+if __name__ == "__main__":
+    main()

data_utils/deepspeech_features/fea_win.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import numpy as np
+net_output = np.load('french.ds.npy').reshape(-1, 29)
+win_size = 16
+zero_pad = np.zeros((int(win_size / 2), net_output.shape[1]))
+net_output = np.concatenate((zero_pad, net_output, zero_pad), axis=0)
+windows = []
+for window_index in range(0, net_output.shape[0] - win_size, 2):
+        windows.append(net_output[window_index:window_index + win_size])
+print(np.array(windows).shape)
+np.save('aud_french.npy', np.array(windows))

data_utils/face_parsing/logger.py ADDED Viewed

	@@ -0,0 +1,23 @@

+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+import os.path as osp
+import time
+import sys
+import logging
+import torch.distributed as dist
+def setup_logger(logpth):
+    logfile = 'BiSeNet-{}.log'.format(time.strftime('%Y-%m-%d-%H-%M-%S'))
+    logfile = osp.join(logpth, logfile)
+    FORMAT = '%(levelname)s %(filename)s(%(lineno)d): %(message)s'
+    log_level = logging.INFO
+    if dist.is_initialized() and not dist.get_rank()==0:
+        log_level = logging.ERROR
+    logging.basicConfig(level=log_level, format=FORMAT, filename=logfile)
+    logging.root.addHandler(logging.StreamHandler())

data_utils/face_parsing/model.py ADDED Viewed

	@@ -0,0 +1,285 @@

+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision
+from resnet import Resnet18
+# from modules.bn import InPlaceABNSync as BatchNorm2d
+class ConvBNReLU(nn.Module):
+    def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1, *args, **kwargs):
+        super(ConvBNReLU, self).__init__()
+        self.conv = nn.Conv2d(in_chan,
+                out_chan,
+                kernel_size = ks,
+                stride = stride,
+                padding = padding,
+                bias = False)
+        self.bn = nn.BatchNorm2d(out_chan)
+        self.init_weight()
+    def forward(self, x):
+        x = self.conv(x)
+        x = F.relu(self.bn(x))
+        return x
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+class BiSeNetOutput(nn.Module):
+    def __init__(self, in_chan, mid_chan, n_classes, *args, **kwargs):
+        super(BiSeNetOutput, self).__init__()
+        self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1)
+        self.conv_out = nn.Conv2d(mid_chan, n_classes, kernel_size=1, bias=False)
+        self.init_weight()
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.conv_out(x)
+        return x
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+    def get_params(self):
+        wd_params, nowd_params = [], []
+        for name, module in self.named_modules():
+            if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
+                wd_params.append(module.weight)
+                if not module.bias is None:
+                    nowd_params.append(module.bias)
+            elif isinstance(module, nn.BatchNorm2d):
+                nowd_params += list(module.parameters())
+        return wd_params, nowd_params
+class AttentionRefinementModule(nn.Module):
+    def __init__(self, in_chan, out_chan, *args, **kwargs):
+        super(AttentionRefinementModule, self).__init__()
+        self.conv = ConvBNReLU(in_chan, out_chan, ks=3, stride=1, padding=1)
+        self.conv_atten = nn.Conv2d(out_chan, out_chan, kernel_size= 1, bias=False)
+        self.bn_atten = nn.BatchNorm2d(out_chan)
+        self.sigmoid_atten = nn.Sigmoid()
+        self.init_weight()
+    def forward(self, x):
+        feat = self.conv(x)
+        atten = F.avg_pool2d(feat, feat.size()[2:])
+        atten = self.conv_atten(atten)
+        atten = self.bn_atten(atten)
+        atten = self.sigmoid_atten(atten)
+        out = torch.mul(feat, atten)
+        return out
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+class ContextPath(nn.Module):
+    def __init__(self, *args, **kwargs):
+        super(ContextPath, self).__init__()
+        self.resnet = Resnet18()
+        self.arm16 = AttentionRefinementModule(256, 128)
+        self.arm32 = AttentionRefinementModule(512, 128)
+        self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
+        self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
+        self.conv_avg = ConvBNReLU(512, 128, ks=1, stride=1, padding=0)
+        self.init_weight()
+    def forward(self, x):
+        H0, W0 = x.size()[2:]
+        feat8, feat16, feat32 = self.resnet(x)
+        H8, W8 = feat8.size()[2:]
+        H16, W16 = feat16.size()[2:]
+        H32, W32 = feat32.size()[2:]
+        avg = F.avg_pool2d(feat32, feat32.size()[2:])
+        avg = self.conv_avg(avg)
+        avg_up = F.interpolate(avg, (H32, W32), mode='nearest')
+        feat32_arm = self.arm32(feat32)
+        feat32_sum = feat32_arm + avg_up
+        feat32_up = F.interpolate(feat32_sum, (H16, W16), mode='nearest')
+        feat32_up = self.conv_head32(feat32_up)
+        feat16_arm = self.arm16(feat16)
+        feat16_sum = feat16_arm + feat32_up
+        feat16_up = F.interpolate(feat16_sum, (H8, W8), mode='nearest')
+        feat16_up = self.conv_head16(feat16_up)
+        return feat8, feat16_up, feat32_up  # x8, x8, x16
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+    def get_params(self):
+        wd_params, nowd_params = [], []
+        for name, module in self.named_modules():
+            if isinstance(module, (nn.Linear, nn.Conv2d)):
+                wd_params.append(module.weight)
+                if not module.bias is None:
+                    nowd_params.append(module.bias)
+            elif isinstance(module, nn.BatchNorm2d):
+                nowd_params += list(module.parameters())
+        return wd_params, nowd_params
+### This is not used, since I replace this with the resnet feature with the same size
+class SpatialPath(nn.Module):
+    def __init__(self, *args, **kwargs):
+        super(SpatialPath, self).__init__()
+        self.conv1 = ConvBNReLU(3, 64, ks=7, stride=2, padding=3)
+        self.conv2 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1)
+        self.conv3 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1)
+        self.conv_out = ConvBNReLU(64, 128, ks=1, stride=1, padding=0)
+        self.init_weight()
+    def forward(self, x):
+        feat = self.conv1(x)
+        feat = self.conv2(feat)
+        feat = self.conv3(feat)
+        feat = self.conv_out(feat)
+        return feat
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+    def get_params(self):
+        wd_params, nowd_params = [], []
+        for name, module in self.named_modules():
+            if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
+                wd_params.append(module.weight)
+                if not module.bias is None:
+                    nowd_params.append(module.bias)
+            elif isinstance(module, nn.BatchNorm2d):
+                nowd_params += list(module.parameters())
+        return wd_params, nowd_params
+class FeatureFusionModule(nn.Module):
+    def __init__(self, in_chan, out_chan, *args, **kwargs):
+        super(FeatureFusionModule, self).__init__()
+        self.convblk = ConvBNReLU(in_chan, out_chan, ks=1, stride=1, padding=0)
+        self.conv1 = nn.Conv2d(out_chan,
+                out_chan//4,
+                kernel_size = 1,
+                stride = 1,
+                padding = 0,
+                bias = False)
+        self.conv2 = nn.Conv2d(out_chan//4,
+                out_chan,
+                kernel_size = 1,
+                stride = 1,
+                padding = 0,
+                bias = False)
+        self.relu = nn.ReLU(inplace=True)
+        self.sigmoid = nn.Sigmoid()
+        self.init_weight()
+    def forward(self, fsp, fcp):
+        fcat = torch.cat([fsp, fcp], dim=1)
+        feat = self.convblk(fcat)
+        atten = F.avg_pool2d(feat, feat.size()[2:])
+        atten = self.conv1(atten)
+        atten = self.relu(atten)
+        atten = self.conv2(atten)
+        atten = self.sigmoid(atten)
+        feat_atten = torch.mul(feat, atten)
+        feat_out = feat_atten + feat
+        return feat_out
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+    def get_params(self):
+        wd_params, nowd_params = [], []
+        for name, module in self.named_modules():
+            if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
+                wd_params.append(module.weight)
+                if not module.bias is None:
+                    nowd_params.append(module.bias)
+            elif isinstance(module, nn.BatchNorm2d):
+                nowd_params += list(module.parameters())
+        return wd_params, nowd_params
+class BiSeNet(nn.Module):
+    def __init__(self, n_classes, *args, **kwargs):
+        super(BiSeNet, self).__init__()
+        self.cp = ContextPath()
+        ## here self.sp is deleted
+        self.ffm = FeatureFusionModule(256, 256)
+        self.conv_out = BiSeNetOutput(256, 256, n_classes)
+        self.conv_out16 = BiSeNetOutput(128, 64, n_classes)
+        self.conv_out32 = BiSeNetOutput(128, 64, n_classes)
+        self.init_weight()
+    def forward(self, x):
+        H, W = x.size()[2:]
+        feat_res8, feat_cp8, feat_cp16 = self.cp(x)  # here return res3b1 feature
+        feat_sp = feat_res8  # use res3b1 feature to replace spatial path feature
+        feat_fuse = self.ffm(feat_sp, feat_cp8)
+        feat_out = self.conv_out(feat_fuse)
+        feat_out16 = self.conv_out16(feat_cp8)
+        feat_out32 = self.conv_out32(feat_cp16)
+        feat_out = F.interpolate(feat_out, (H, W), mode='bilinear', align_corners=True)
+        feat_out16 = F.interpolate(feat_out16, (H, W), mode='bilinear', align_corners=True)
+        feat_out32 = F.interpolate(feat_out32, (H, W), mode='bilinear', align_corners=True)
+        # return feat_out, feat_out16, feat_out32
+        return feat_out
+    def init_weight(self):
+        for ly in self.children():
+            if isinstance(ly, nn.Conv2d):
+                nn.init.kaiming_normal_(ly.weight, a=1)
+                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+    def get_params(self):
+        wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = [], [], [], []
+        for name, child in self.named_children():
+            child_wd_params, child_nowd_params = child.get_params()
+            if isinstance(child, FeatureFusionModule) or isinstance(child, BiSeNetOutput):
+                lr_mul_wd_params += child_wd_params
+                lr_mul_nowd_params += child_nowd_params
+            else:
+                wd_params += child_wd_params
+                nowd_params += child_nowd_params
+        return wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params
+if __name__ == "__main__":
+    net = BiSeNet(19)
+    net.cuda()
+    net.eval()
+    in_ten = torch.randn(16, 3, 640, 480).cuda()
+    out, out16, out32 = net(in_ten)
+    print(out.shape)
+    net.get_params()

data_utils/face_parsing/resnet.py ADDED Viewed

	@@ -0,0 +1,109 @@

+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.model_zoo as modelzoo
+# from modules.bn import InPlaceABNSync as BatchNorm2d
+resnet18_url = 'https://download.pytorch.org/models/resnet18-5c106cde.pth'
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=1, bias=False)
+class BasicBlock(nn.Module):
+    def __init__(self, in_chan, out_chan, stride=1):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(in_chan, out_chan, stride)
+        self.bn1 = nn.BatchNorm2d(out_chan)
+        self.conv2 = conv3x3(out_chan, out_chan)
+        self.bn2 = nn.BatchNorm2d(out_chan)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = None
+        if in_chan != out_chan or stride != 1:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(in_chan, out_chan,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(out_chan),
+                )
+    def forward(self, x):
+        residual = self.conv1(x)
+        residual = F.relu(self.bn1(residual))
+        residual = self.conv2(residual)
+        residual = self.bn2(residual)
+        shortcut = x
+        if self.downsample is not None:
+            shortcut = self.downsample(x)
+        out = shortcut + residual
+        out = self.relu(out)
+        return out
+def create_layer_basic(in_chan, out_chan, bnum, stride=1):
+    layers = [BasicBlock(in_chan, out_chan, stride=stride)]
+    for i in range(bnum-1):
+        layers.append(BasicBlock(out_chan, out_chan, stride=1))
+    return nn.Sequential(*layers)
+class Resnet18(nn.Module):
+    def __init__(self):
+        super(Resnet18, self).__init__()
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = create_layer_basic(64, 64, bnum=2, stride=1)
+        self.layer2 = create_layer_basic(64, 128, bnum=2, stride=2)
+        self.layer3 = create_layer_basic(128, 256, bnum=2, stride=2)
+        self.layer4 = create_layer_basic(256, 512, bnum=2, stride=2)
+        self.init_weight()
+    def forward(self, x):
+        x = self.conv1(x)
+        x = F.relu(self.bn1(x))
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        feat8 = self.layer2(x) # 1/8
+        feat16 = self.layer3(feat8) # 1/16
+        feat32 = self.layer4(feat16) # 1/32
+        return feat8, feat16, feat32
+    def init_weight(self):
+        state_dict = modelzoo.load_url(resnet18_url)
+        self_state_dict = self.state_dict()
+        for k, v in state_dict.items():
+            if 'fc' in k: continue
+            self_state_dict.update({k: v})
+        self.load_state_dict(self_state_dict)
+    def get_params(self):
+        wd_params, nowd_params = [], []
+        for name, module in self.named_modules():
+            if isinstance(module, (nn.Linear, nn.Conv2d)):
+                wd_params.append(module.weight)
+                if not module.bias is None:
+                    nowd_params.append(module.bias)
+            elif isinstance(module,  nn.BatchNorm2d):
+                nowd_params += list(module.parameters())
+        return wd_params, nowd_params
+if __name__ == "__main__":
+    net = Resnet18()
+    x = torch.randn(16, 3, 224, 224)
+    out = net(x)
+    print(out[0].size())
+    print(out[1].size())
+    print(out[2].size())
+    net.get_params()

data_utils/face_parsing/test.py ADDED Viewed

	@@ -0,0 +1,148 @@

+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+import numpy as np
+from model import BiSeNet
+import torch
+import os
+import os.path as osp
+from PIL import Image
+import torchvision.transforms as transforms
+import cv2
+from pathlib import Path
+import configargparse
+import tqdm
+# import ttach as tta
+def vis_parsing_maps(im, parsing_anno, stride, save_im=False, save_path='vis_results/parsing_map_on_im.jpg',
+                     img_size=(512, 512)):
+    im = np.array(im)
+    vis_im = im.copy().astype(np.uint8)
+    vis_parsing_anno = parsing_anno.copy().astype(np.uint8)
+    vis_parsing_anno = cv2.resize(
+        vis_parsing_anno, None, fx=stride, fy=stride, interpolation=cv2.INTER_NEAREST)
+    vis_parsing_anno_color = np.zeros(
+        (vis_parsing_anno.shape[0], vis_parsing_anno.shape[1], 3)) + np.array([255, 255, 255])  # + 255
+    vis_parsing_anno_color_face = np.zeros(
+        (vis_parsing_anno.shape[0], vis_parsing_anno.shape[1], 3)) + np.array([255, 255, 255])  # + 255
+    num_of_class = np.max(vis_parsing_anno)
+    # print(num_of_class)
+    for pi in range(1, 14):
+        index = np.where(vis_parsing_anno == pi)
+        vis_parsing_anno_color[index[0], index[1], :] = np.array([255, 0, 0])
+    for pi in range(14, 16):
+        index = np.where(vis_parsing_anno == pi)
+        vis_parsing_anno_color[index[0], index[1], :] = np.array([0, 255, 0])
+    for pi in range(16, 17):
+        index = np.where(vis_parsing_anno == pi)
+        vis_parsing_anno_color[index[0], index[1], :] = np.array([0, 0, 255])
+    for pi in range(17, num_of_class+1):
+        index = np.where(vis_parsing_anno == pi)
+        vis_parsing_anno_color[index[0], index[1], :] = np.array([255, 0, 0])
+    vis_parsing_anno_color = vis_parsing_anno_color.astype(np.uint8)
+    index = np.where(vis_parsing_anno == num_of_class-1)
+    vis_im = cv2.resize(vis_parsing_anno_color, img_size,
+                        interpolation=cv2.INTER_NEAREST)
+    if save_im:
+        cv2.imwrite(save_path, vis_im)
+    for pi in range(1, 7):
+        index = np.where(vis_parsing_anno == pi)
+        vis_parsing_anno_color_face[index[0], index[1], :] = np.array([255, 0, 0])
+    for pi in range(10, 14):
+        index = np.where(vis_parsing_anno == pi)
+        vis_parsing_anno_color_face[index[0], index[1], :] = np.array([255, 0, 0])
+    pad = 5
+    vis_parsing_anno_color_face = vis_parsing_anno_color_face.astype(np.uint8)
+    face_part = (vis_parsing_anno_color_face[..., 0] == 255) & (vis_parsing_anno_color_face[..., 1] == 0) & (vis_parsing_anno_color_face[..., 2] == 0)
+    face_coords = np.stack(np.nonzero(face_part), axis=-1)
+    sorted_inds = np.lexsort((-face_coords[:, 0], face_coords[:, 1]))
+    sorted_face_coords = face_coords[sorted_inds]
+    u, uid, ucnt = np.unique(sorted_face_coords[:, 1], return_index=True, return_counts=True)
+    bottom_face_coords = sorted_face_coords[uid] + np.array([pad, 0])
+    rows, cols, _ = vis_parsing_anno_color_face.shape
+    # 为了保证新的坐标在图片范围内
+    bottom_face_coords[:, 0] = np.clip(bottom_face_coords[:, 0], 0, rows - 1)
+    y_min = np.min(bottom_face_coords[:, 1])
+    y_max = np.max(bottom_face_coords[:, 1])
+    # 计算1和2部分的开始和结束位置
+    y_range = y_max - y_min
+    height_per_part = y_range // 4
+    start_y_part1 = y_min + height_per_part
+    end_y_part1 = start_y_part1 + height_per_part
+    start_y_part2 = end_y_part1
+    end_y_part2 = start_y_part2 + height_per_part
+    for coord in bottom_face_coords:
+        x, y = coord
+        start_x = max(x - pad, 0)
+        end_x = min(x + pad, rows)
+        if start_y_part1 <= y <= end_y_part1 or start_y_part2 <= y <= end_y_part2:
+            vis_parsing_anno_color_face[start_x:end_x, y] = [255, 0, 0]
+        # else:
+        #     start_x = max(x - 2*pad, 0)
+        #     end_x = max(x - pad, 0)
+        #     vis_parsing_anno_color_face[start_x:end_x+1, y] = [255, 255, 255]
+    vis_im = cv2.GaussianBlur(vis_parsing_anno_color_face, (9, 9), cv2.BORDER_DEFAULT)
+    vis_im = cv2.resize(vis_im, img_size,
+                        interpolation=cv2.INTER_NEAREST)
+    cv2.imwrite(save_path.replace('.png', '_face.png'), vis_im)
+def evaluate(respth='./res/test_res', dspth='./data', cp='model_final_diss.pth'):
+    Path(respth).mkdir(parents=True, exist_ok=True)
+    print(f'[INFO] loading model...')
+    n_classes = 19
+    net = BiSeNet(n_classes=n_classes)
+    net.cuda()
+    net.load_state_dict(torch.load(cp))
+    net.eval()
+    to_tensor = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
+    ])
+    image_paths = os.listdir(dspth)
+    with torch.no_grad():
+        for image_path in tqdm.tqdm(image_paths):
+            if image_path.endswith('.jpg') or image_path.endswith('.png'):
+                img = Image.open(osp.join(dspth, image_path))
+                ori_size = img.size
+                image = img.resize((512, 512), Image.BILINEAR)
+                image = image.convert("RGB")
+                img = to_tensor(image)
+                # test-time augmentation.
+                inputs = torch.unsqueeze(img, 0) # [1, 3, 512, 512]
+                outputs = net(inputs.cuda())
+                parsing = outputs.mean(0).cpu().numpy().argmax(0)
+                image_path = int(image_path[:-4])
+                image_path = str(image_path) + '.png'
+                vis_parsing_maps(image, parsing, stride=1, save_im=True, save_path=osp.join(respth, image_path), img_size=ori_size)
+if __name__ == "__main__":
+    parser = configargparse.ArgumentParser()
+    parser.add_argument('--respath', type=str, default='./result/', help='result path for label')
+    parser.add_argument('--imgpath', type=str, default='./imgs/', help='path for input images')
+    parser.add_argument('--modelpath', type=str, default='data_utils/face_parsing/79999_iter.pth')
+    args = parser.parse_args()
+    evaluate(respth=args.respath, dspth=args.imgpath, cp=args.modelpath)

data_utils/face_tracking/3DMM/lands_info.txt ADDED Viewed

	@@ -0,0 +1,403 @@

+136
+19
+155
+22
+177
+19
+196
+15
+211
+12
+223
+8
+231
+6
+237
+4
+241
+6
+247
+4
+251
+6
+257
+8
+265
+12
+277
+15
+292
+19
+311
+22
+333
+19
+352
+1
+353
+1
+354
+1
+355
+1
+356
+1
+357
+1
+358
+1
+359
+1
+360
+1
+361
+1
+362
+1
+363
+1
+364
+1
+365
+1
+366
+1
+367
+1
+368
+1
+369
+1
+370
+1
+371
+1
+372
+1
+373
+1
+374
+1
+375
+1
+376
+1
+377
+1
+378
+1
+379
+1
+380
+1
+381
+1
+382
+1
+383
+1
+384
+1
+385
+1
+386
+1
+387
+1
+388
+1
+389
+1
+390
+1
+391
+1
+392
+1
+393
+1
+394
+1
+395
+1
+396
+1
+397
+1
+398
+1
+399
+1
+400
+1
+401
+1
+402
+1
+16655
+16901
+17155
+17412
+17669
+17926
+18183
+18440
+18826
+19083
+19340
+19726
+19983
+20240
+20625
+21010
+21396
+157
+671
+16922
+17177
+17435
+17821
+18208
+18594
+18980
+19366
+19752
+20139
+20525
+20911
+21168
+21555
+188
+575
+961
+1477
+1863
+2249
+2636
+3280
+16411
+16948
+17589
+18232
+18876
+19262
+19648
+20163
+20678
+21192
+21707
+340
+855
+1370
+1756
+2142
+2657
+3043
+3429
+16363
+16973
+17871
+18644
+19416
+20189
+20833
+21733
+752
+1523
+2037
+2681
+3323
+3708
+4222
+31497
+31491
+31484
+31555
+31626
+31730
+31865
+3224
+3737
+4250
+4764
+5150
+32139
+32192
+32271
+32368
+32436
+32521
+32600
+32655
+32445
+32465
+32506
+32546
+32585
+32640
+32716
+32733
+32750
+32785
+32914
+32913
+32912
+32911
+32910
+32909
+33076
+33057
+33038
+33001
+33357
+33333
+33287
+33243
+33202
+33144
+33675
+33612
+33524
+33420
+33348
+33260
+33179
+33123
+34322
+34316
+34309
+34227
+34147
+34034
+33897
+13269
+12750
+12231
+11713
+11325
+27304
+26767
+25869
+25094
+24318
+23543
+22897
+21991
+15699
+14922
+14404
+13758
+13110
+12721
+12203
+27231
+26742
+26103
+25456
+24810
+24422
+24034
+23517
+23000
+22482
+21965
+16061
+15544
+15027
+14639
+14251
+13734
+13346
+12958
+26716
+26465
+26207
+25819
+25432
+25044
+24656
+24268
+23880
+23493
+23105
+22717
+22458
+22071
+16167
+15780
+15392
+14876
+14488
+14100
+13713
+13067
+26939
+26695
+26443
+26184
+25925
+25666
+25407
+25148
+24760
+24501
+24242
+23854
+23595
+23336
+22947
+22558
+22170
+16136
+15618
+27932
+28270
+28552
+28771
+28990
+29567
+29780
+30000
+30316
+30627
+8155
+8173
+8184
+8190
+6516
+7363
+8203
+9043
+9884
+1828
+4016
+5177
+6341
+4804
+3771
+9955
+11094
+12255
+14323
+12526
+11495
+5262
+6024
+7375
+8215
+9055
+10394
+11179
+9674
+8835
+8235
+7635
+6793
+5779
+7384
+8225
+9064
+10536
+8828
+8228
+7628

data_utils/face_tracking/3DMM/tris.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

data_utils/face_tracking/3DMM/vert_tris.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

data_utils/face_tracking/__init__.py ADDED Viewed

File without changes

data_utils/face_tracking/bundle_adjustment.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import numpy as np
+import os
+from util import *
+import argparse
+def set_requires_grad(tensor_list):
+    for tensor in tensor_list:
+        tensor.requires_grad = True
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "--path", type=str, default="", help="idname of target person")
+parser.add_argument('--img_h', type=int, default=512, help='height if image')
+parser.add_argument('--img_w', type=int, default=512, help='width of image')
+args = parser.parse_args()
+id_dir = args.path
+params_dict = torch.load(os.path.join(id_dir, 'track_params.pt'))
+euler_angle = params_dict['euler'].cuda()
+trans = params_dict['trans'].cuda() / 1000.0
+focal_len = params_dict['focal'].cuda()
+track_xys = torch.as_tensor(
+    np.load(os.path.join(id_dir, 'track_xys.npy'))).float().cuda()
+num_frames = track_xys.shape[0]
+point_num = track_xys.shape[1]
+pts = torch.zeros((point_num, 3), dtype=torch.float32).cuda()
+set_requires_grad([euler_angle, trans, pts])
+cxy = torch.Tensor((args.img_w/2.0, args.img_h/2.0)).float().cuda()
+optimizer_pts = torch.optim.Adam([pts], lr=1e-2)
+iter_num = 500
+for iter in range(iter_num):
+    proj_pts = forward_transform(pts.unsqueeze(0).expand(
+        num_frames, -1, -1), euler_angle, trans, focal_len, cxy)
+    loss = cal_lan_loss(proj_pts[..., :2], track_xys)
+    optimizer_pts.zero_grad()
+    loss.backward()
+    optimizer_pts.step()
+optimizer_ba = torch.optim.Adam([pts, euler_angle, trans], lr=1e-4)
+iter_num = 8000
+for iter in range(iter_num):
+    proj_pts = forward_transform(pts.unsqueeze(0).expand(
+        num_frames, -1, -1), euler_angle, trans, focal_len, cxy)
+    loss_lan = cal_lan_loss(proj_pts[..., :2], track_xys)
+    loss = loss_lan
+    optimizer_ba.zero_grad()
+    loss.backward()
+    optimizer_ba.step()
+torch.save({'euler': euler_angle.detach().cpu(),
+            'trans': trans.detach().cpu(),
+            'focal': focal_len.detach().cpu()}, os.path.join(id_dir, 'bundle_adjustment.pt'))
+print('bundle adjustment params saved')

data_utils/face_tracking/convert_BFM.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import numpy as np
+from scipy.io import loadmat
+original_BFM = loadmat("3DMM/01_MorphableModel.mat")
+sub_inds = np.load("3DMM/topology_info.npy", allow_pickle=True).item()["sub_inds"]
+shapePC = original_BFM["shapePC"]
+shapeEV = original_BFM["shapeEV"]
+shapeMU = original_BFM["shapeMU"]
+texPC = original_BFM["texPC"]
+texEV = original_BFM["texEV"]
+texMU = original_BFM["texMU"]
+b_shape = shapePC.reshape(-1, 199).transpose(1, 0).reshape(199, -1, 3)
+mu_shape = shapeMU.reshape(-1, 3)
+b_tex = texPC.reshape(-1, 199).transpose(1, 0).reshape(199, -1, 3)
+mu_tex = texMU.reshape(-1, 3)
+b_shape = b_shape[:, sub_inds, :].reshape(199, -1)
+mu_shape = mu_shape[sub_inds, :].reshape(-1)
+b_tex = b_tex[:, sub_inds, :].reshape(199, -1)
+mu_tex = mu_tex[sub_inds, :].reshape(-1)
+exp_info = np.load("3DMM/exp_info.npy", allow_pickle=True).item()
+np.save(
+    "3DMM/3DMM_info.npy",
+    {
+        "mu_shape": mu_shape,
+        "b_shape": b_shape,
+        "sig_shape": shapeEV.reshape(-1),
+        "mu_exp": exp_info["mu_exp"],
+        "b_exp": exp_info["base_exp"],
+        "sig_exp": exp_info["sig_exp"],
+        "mu_tex": mu_tex,
+        "b_tex": b_tex,
+        "sig_tex": texEV.reshape(-1),
+    },
+)

data_utils/face_tracking/data_loader.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import torch
+import cv2
+import numpy as np
+import os
+def load_dir(path, start, end):
+    lmss = []
+    for i in range(start, end):
+        datapath = os.path.join(path, str(i) + '.lms')
+        if os.path.isfile(datapath):
+            lms = np.loadtxt(os.path.join(
+                path, str(i) + '.lms'), dtype=np.float32)
+            lmss.append(lms)
+        #
+        # datapath = os.path.join(path, '{:d}.lms'.format(i))
+        # if os.path.isfile(datapath):
+        #     lms = np.loadtxt(os.path.join(
+        #         path, '{:d}.lms'.format(i)), dtype=np.float32)
+        #     lmss.append(lms)
+    lmss = np.stack(lmss)
+    lmss = torch.as_tensor(lmss).cuda()
+    return lmss

data_utils/face_tracking/face_tracker.py ADDED Viewed

	@@ -0,0 +1,146 @@

+# ref: https://github.com/ShunyuYao/DFA-NeRF
+from numpy.core.numeric import require
+from numpy.lib.function_base import quantile
+import torch
+import numpy as np
+from facemodel import Face_3DMM
+from data_loader import load_dir
+from util import *
+import os
+import sys
+import cv2
+import imageio
+import argparse
+dir_path = os.path.dirname(os.path.realpath(__file__))
+def set_requires_grad(tensor_list):
+    for tensor in tensor_list:
+        tensor.requires_grad = True
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "--path", type=str, default="obama/ori_imgs", help="idname of target person")
+parser.add_argument('--img_h', type=int, default=512, help='image height')
+parser.add_argument('--img_w', type=int, default=512, help='image width')
+parser.add_argument('--frame_num', type=int,
+                    default=11000, help='image number')
+args = parser.parse_args()
+start_id = 0
+end_id = args.frame_num
+lms = load_dir(args.path, start_id, end_id)
+num_frames = lms.shape[0]
+h, w = args.img_h, args.img_w
+cxy = torch.tensor((w/2.0, h/2.0), dtype=torch.float).cuda()
+id_dim, exp_dim, tex_dim, point_num = 100, 79, 100, 34650
+model_3dmm = Face_3DMM(os.path.join(dir_path, '3DMM'),
+                       id_dim, exp_dim, tex_dim, point_num)
+lands_info = np.loadtxt(os.path.join(
+    dir_path, '3DMM', 'lands_info.txt'), dtype=np.int32)
+lands_info = torch.as_tensor(lands_info).cuda()
+# mesh = openmesh.read_trimesh(os.path.join(dir_path, '3DMM', 'template.obj'))
+focal = 1150
+id_para = lms.new_zeros((1, id_dim), requires_grad=True)
+exp_para = lms.new_zeros((num_frames, exp_dim), requires_grad=True)
+tex_para = lms.new_zeros((1, tex_dim), requires_grad=True)
+euler_angle = lms.new_zeros((num_frames, 3), requires_grad=True)
+trans = lms.new_zeros((num_frames, 3), requires_grad=True)
+light_para = lms.new_zeros((num_frames, 27), requires_grad=True)
+trans.data[:, 2] -= 600
+focal_length = lms.new_zeros(1, requires_grad=True)
+focal_length.data += focal
+set_requires_grad([id_para, exp_para, tex_para,
+                   euler_angle, trans, light_para])
+sel_ids = np.arange(0, num_frames, 10)
+sel_num = sel_ids.shape[0]
+arg_focal = 0.0
+arg_landis = 1e5
+for focal in range(500, 1500, 50):
+    id_para = lms.new_zeros((1, id_dim), requires_grad=True)
+    exp_para = lms.new_zeros((sel_num, exp_dim), requires_grad=True)
+    euler_angle = lms.new_zeros((sel_num, 3), requires_grad=True)
+    trans = lms.new_zeros((sel_num, 3), requires_grad=True)
+    trans.data[:, 2] -= 600
+    focal_length = lms.new_zeros(1, requires_grad=False)
+    focal_length.data += focal
+    set_requires_grad([id_para, exp_para, euler_angle, trans])
+    optimizer_id = torch.optim.Adam([id_para], lr=.3)
+    optimizer_exp = torch.optim.Adam([exp_para], lr=.3)
+    optimizer_frame = torch.optim.Adam(
+        [euler_angle, trans], lr=.3)
+    iter_num = 2000
+    for iter in range(iter_num):
+        id_para_batch = id_para.expand(sel_num, -1)
+        geometry = model_3dmm.forward_geo_sub(
+            id_para_batch, exp_para, lands_info[-51:].long())
+        proj_geo = forward_transform(
+            geometry, euler_angle, trans, focal_length, cxy)
+        loss_lan = cal_lan_loss(
+            proj_geo[:, :, :2], lms[sel_ids, -51:, :].detach())
+        loss_regid = torch.mean(id_para*id_para)*8
+        loss_regexp = torch.mean(exp_para*exp_para)*0.5
+        loss = loss_lan + loss_regid + loss_regexp
+        optimizer_id.zero_grad()
+        optimizer_exp.zero_grad()
+        optimizer_frame.zero_grad()
+        loss.backward()
+        if iter > 1000:
+            optimizer_id.step()
+            optimizer_exp.step()
+        optimizer_frame.step()
+    print(focal, loss_lan.item(), torch.mean(trans[:, 2]).item())
+    if loss_lan.item() < arg_landis:
+        arg_landis = loss_lan.item()
+        arg_focal = focal
+sel_ids = np.arange(0, num_frames)
+sel_num = sel_ids.shape[0]
+id_para = lms.new_zeros((1, id_dim), requires_grad=True)
+exp_para = lms.new_zeros((sel_num, exp_dim), requires_grad=True)
+euler_angle = lms.new_zeros((sel_num, 3), requires_grad=True)
+trans = lms.new_zeros((sel_num, 3), requires_grad=True)
+trans.data[:, 2] -= 600
+focal_length = lms.new_zeros(1, requires_grad=False)
+focal_length.data += arg_focal
+set_requires_grad([id_para, exp_para, euler_angle, trans])
+optimizer_id = torch.optim.Adam([id_para], lr=.3)
+optimizer_exp = torch.optim.Adam([exp_para], lr=.3)
+optimizer_frame = torch.optim.Adam(
+    [euler_angle, trans], lr=.3)
+iter_num = 2000
+for iter in range(iter_num):
+    id_para_batch = id_para.expand(sel_num, -1)
+    geometry = model_3dmm.forward_geo_sub(
+        id_para_batch, exp_para, lands_info[-51:].long())
+    proj_geo = forward_transform(
+        geometry, euler_angle, trans, focal_length, cxy)
+    loss_lan = cal_lan_loss(
+        proj_geo[:, :, :2], lms[sel_ids, -51:, :].detach())
+    loss_regid = torch.mean(id_para*id_para)*8
+    loss_regexp = torch.mean(exp_para*exp_para)*0.5
+    loss = loss_lan + loss_regid + loss_regexp
+    optimizer_id.zero_grad()
+    optimizer_exp.zero_grad()
+    optimizer_frame.zero_grad()
+    loss.backward()
+    if iter > 1000:
+        optimizer_id.step()
+        optimizer_exp.step()
+    optimizer_frame.step()
+print(arg_focal, loss_lan.item(), torch.mean(trans[:, 2]).item())
+torch.save({'id': id_para.detach().cpu(), 'exp': exp_para.detach().cpu(),
+            'euler': euler_angle.detach().cpu(), 'trans': trans.detach().cpu(),
+            'focal': focal_length.detach().cpu()}, os.path.join(os.path.dirname(args.path), 'track_params.pt'))
+print('face tracking params saved')

data_utils/face_tracking/facemodel.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import torch
+import torch.nn as nn
+import numpy as np
+import os
+class Face_3DMM(nn.Module):
+    def __init__(self, modelpath, id_dim, exp_dim, tex_dim, point_num):
+        super(Face_3DMM, self).__init__()
+        # id_dim = 100
+        # exp_dim = 79
+        # tex_dim = 100
+        self.point_num = point_num
+        DMM_info = np.load(
+            os.path.join(modelpath, "3DMM_info.npy"), allow_pickle=True
+        ).item()
+        base_id = DMM_info["b_shape"][:id_dim, :]
+        mu_id = DMM_info["mu_shape"]
+        base_exp = DMM_info["b_exp"][:exp_dim, :]
+        mu_exp = DMM_info["mu_exp"]
+        mu = mu_id + mu_exp
+        mu = mu.reshape(-1, 3)
+        for i in range(3):
+            mu[:, i] -= np.mean(mu[:, i])
+        mu = mu.reshape(-1)
+        self.base_id = torch.as_tensor(base_id).cuda() /1000.0
+        self.base_exp = torch.as_tensor(base_exp).cuda() /1000.0
+        self.mu = torch.as_tensor(mu).cuda() /1000.0
+        base_tex = DMM_info["b_tex"][:tex_dim, :]
+        mu_tex = DMM_info["mu_tex"]
+        self.base_tex = torch.as_tensor(base_tex).cuda()
+        self.mu_tex = torch.as_tensor(mu_tex).cuda()
+        sig_id = DMM_info["sig_shape"][:id_dim]
+        sig_tex = DMM_info["sig_tex"][:tex_dim]
+        sig_exp = DMM_info["sig_exp"][:exp_dim]
+        self.sig_id = torch.as_tensor(sig_id).cuda()
+        self.sig_tex = torch.as_tensor(sig_tex).cuda()
+        self.sig_exp = torch.as_tensor(sig_exp).cuda()
+    def forward_geo_sub(self, id_para, exp_para, sub_index):
+        id_para = id_para*self.sig_id
+        exp_para = exp_para*self.sig_exp
+        sel_index = torch.cat((3*sub_index.unsqueeze(1), 3*sub_index.unsqueeze(1)+1,
+                               3*sub_index.unsqueeze(1)+2), dim=1).reshape(-1)
+        geometry = torch.mm(id_para, self.base_id[:, sel_index]) + \
+            torch.mm(exp_para, self.base_exp[:,
+                                             sel_index]) + self.mu[sel_index]
+        return geometry.reshape(-1, sub_index.shape[0], 3)
+    def forward_geo(self, id_para, exp_para):
+        id_para = id_para*self.sig_id
+        exp_para = exp_para*self.sig_exp
+        geometry = torch.mm(id_para, self.base_id) + \
+            torch.mm(exp_para, self.base_exp) + self.mu
+        return geometry.reshape(-1, self.point_num, 3)
+    def forward_tex(self, tex_para):
+        tex_para = tex_para*self.sig_tex
+        texture = torch.mm(tex_para, self.base_tex) + self.mu_tex
+        return texture.reshape(-1, self.point_num, 3)

data_utils/face_tracking/geo_transform.py ADDED Viewed

	@@ -0,0 +1,60 @@

+"""This module contains functions for geometry transform and camera projection"""
+import torch
+import torch.nn as nn
+import numpy as np
+def euler2rot(euler_angle):
+    batch_size = euler_angle.shape[0]
+    theta = euler_angle[:, 0].reshape(-1, 1, 1)
+    phi = euler_angle[:, 1].reshape(-1, 1, 1)
+    psi = euler_angle[:, 2].reshape(-1, 1, 1)
+    one = torch.ones((batch_size, 1, 1), dtype=torch.float32,
+                     device=euler_angle.device)
+    zero = torch.zeros((batch_size, 1, 1), dtype=torch.float32,
+                       device=euler_angle.device)
+    rot_x = torch.cat((
+        torch.cat((one, zero, zero), 1),
+        torch.cat((zero, theta.cos(), theta.sin()), 1),
+        torch.cat((zero, -theta.sin(), theta.cos()), 1),
+    ), 2)
+    rot_y = torch.cat((
+        torch.cat((phi.cos(), zero, -phi.sin()), 1),
+        torch.cat((zero, one, zero), 1),
+        torch.cat((phi.sin(), zero, phi.cos()), 1),
+    ), 2)
+    rot_z = torch.cat((
+        torch.cat((psi.cos(), -psi.sin(), zero), 1),
+        torch.cat((psi.sin(), psi.cos(), zero), 1),
+        torch.cat((zero, zero, one), 1)
+    ), 2)
+    return torch.bmm(rot_x, torch.bmm(rot_y, rot_z))
+def rot_trans_geo(geometry, rot, trans):
+    rott_geo = torch.bmm(rot, geometry.permute(0, 2, 1)) + trans.view(-1, 3, 1)
+    return rott_geo.permute(0, 2, 1)
+def euler_trans_geo(geometry, euler, trans):
+    rot = euler2rot(euler)
+    return rot_trans_geo(geometry, rot, trans)
+def proj_geo(rott_geo, camera_para):
+    fx = camera_para[:, 0]
+    fy = camera_para[:, 0]
+    cx = camera_para[:, 1]
+    cy = camera_para[:, 2]
+    X = rott_geo[:, :, 0]
+    Y = rott_geo[:, :, 1]
+    Z = rott_geo[:, :, 2]
+    fxX = fx[:, None]*X
+    fyY = fy[:, None]*Y
+    proj_x = -fxX/Z + cx[:, None]
+    proj_y = fyY/Z + cy[:, None]
+    return torch.cat((proj_x[:, :, None], proj_y[:, :, None], Z[:, :, None]), 2)