|
import gradio as gr |
|
import numpy as np |
|
import torch |
|
from torch import nn |
|
import imageio |
|
import cv2 |
|
|
|
|
|
class RelationModuleMultiScale(torch.nn.Module): |
|
|
|
def __init__(self, img_feature_dim, num_bottleneck, num_frames): |
|
super(RelationModuleMultiScale, self).__init__() |
|
self.subsample_num = 3 |
|
self.img_feature_dim = img_feature_dim |
|
self.scales = [i for i in range(num_frames, 1, -1)] |
|
self.relations_scales = [] |
|
self.subsample_scales = [] |
|
for scale in self.scales: |
|
relations_scale = self.return_relationset(num_frames, scale) |
|
self.relations_scales.append(relations_scale) |
|
self.subsample_scales.append(min(self.subsample_num, len(relations_scale))) |
|
self.num_frames = num_frames |
|
self.fc_fusion_scales = nn.ModuleList() |
|
for i in range(len(self.scales)): |
|
scale = self.scales[i] |
|
fc_fusion = nn.Sequential(nn.ReLU(), nn.Linear(scale * self.img_feature_dim, num_bottleneck), nn.ReLU()) |
|
self.fc_fusion_scales += [fc_fusion] |
|
|
|
def forward(self, input): |
|
act_scale_1 = input[:, self.relations_scales[0][0] , :] |
|
act_scale_1 = act_scale_1.view(act_scale_1.size(0), self.scales[0] * self.img_feature_dim) |
|
act_scale_1 = self.fc_fusion_scales[0](act_scale_1) |
|
act_scale_1 = act_scale_1.unsqueeze(1) |
|
act_all = act_scale_1.clone() |
|
for scaleID in range(1, len(self.scales)): |
|
act_relation_all = torch.zeros_like(act_scale_1) |
|
num_total_relations = len(self.relations_scales[scaleID]) |
|
num_select_relations = self.subsample_scales[scaleID] |
|
idx_relations_evensample = [int(ceil(i * num_total_relations / num_select_relations)) for i in range(num_select_relations)] |
|
for idx in idx_relations_evensample: |
|
act_relation = input[:, self.relations_scales[scaleID][idx], :] |
|
act_relation = act_relation.view(act_relation.size(0), self.scales[scaleID] * self.img_feature_dim) |
|
act_relation = self.fc_fusion_scales[scaleID](act_relation) |
|
act_relation = act_relation.unsqueeze(1) |
|
act_relation_all += act_relation |
|
act_all = torch.cat((act_all, act_relation_all), 1) |
|
return act_all |
|
|
|
def return_relationset(self, num_frames, num_frames_relation): |
|
import itertools |
|
return list(itertools.combinations([i for i in range(num_frames)], num_frames_relation)) |
|
|
|
|
|
parser = argparse.ArgumentParser() |
|
parser.add_argument('--dataset', default='Sprite', help='datasets') |
|
parser.add_argument('--data_root', default='dataset', help='root directory for data') |
|
parser.add_argument('--num_class', type=int, default=15, help='the number of class for jester dataset') |
|
parser.add_argument('--input_type', default='image', choices=['feature', 'image'], help='the type of input') |
|
parser.add_argument('--src', default='domain_1', help='source domain') |
|
parser.add_argument('--tar', default='domain_2', help='target domain') |
|
parser.add_argument('--num_segments', type=int, default=8, help='the number of frame segment') |
|
parser.add_argument('--backbone', type=str, default="dcgan", choices=['dcgan', 'resnet101', 'I3Dpretrain','I3Dfinetune'], help='backbone') |
|
parser.add_argument('--channels', default=3, type=int, help='input channels for image inputs') |
|
parser.add_argument('--add_fc', default=1, type=int, metavar='M', help='number of additional fc layers (excluding the last fc layer) (e.g. 0, 1, 2)') |
|
parser.add_argument('--fc_dim', type=int, default=1024, help='dimension of added fc') |
|
parser.add_argument('--frame_aggregation', type=str, default='trn', choices=[ 'rnn', 'trn'], help='aggregation of frame features (none if baseline_type is not video)') |
|
parser.add_argument('--dropout_rate', default=0.5, type=float, help='dropout ratio for frame-level feature (default: 0.5)') |
|
parser.add_argument('--f_dim', type=int, default=512, help='dim of f') |
|
parser.add_argument('--z_dim', type=int, default=512, help='dimensionality of z_t') |
|
parser.add_argument('--f_rnn_layers', type=int, default=1, help='number of layers (content lstm)') |
|
parser.add_argument('--use_bn', type=str, default='none', choices=['none', 'AdaBN', 'AutoDIAL'], help='normalization-based methods') |
|
parser.add_argument('--prior_sample', type=str, default='random', choices=['random', 'post'], help='how to sample prior') |
|
parser.add_argument('--batch_size', default=128, type=int, help='-batch size') |
|
parser.add_argument('--use_attn', type=str, default='TransAttn', choices=['none', 'TransAttn', 'general'], help='attention-mechanism') |
|
parser.add_argument('--data_threads', type=int, default=5, help='number of data loading threads') |
|
opt = parser.parse_args(args=[]) |
|
|
|
|
|
def display_gif(file_name, save_name): |
|
images = [] |
|
|
|
for frame in range(8): |
|
frame_name = '%d' % (frame) |
|
image_filename = file_name + frame_name + '.png' |
|
images.append(imageio.imread(image_filename)) |
|
|
|
gif_filename = 'avatar_source.gif' |
|
return imageio.mimsave(gif_filename, images) |
|
|
|
|
|
def display_gif_pad(file_name, save_name): |
|
images = [] |
|
|
|
for frame in range(8): |
|
frame_name = '%d' % (frame) |
|
image_filename = file_name + frame_name + '.png' |
|
image = imageio.imread(image_filename) |
|
image = image[:, :, :3] |
|
image_pad = cv2.copyMakeBorder(image, 0, 0, 125, 125, cv2.BORDER_CONSTANT, value=0) |
|
images.append(image_pad) |
|
|
|
return imageio.mimsave(save_name, images) |
|
|
|
|
|
def display_image(file_name): |
|
|
|
image_filename = file_name + '0' + '.png' |
|
print(image_filename) |
|
image = imageio.imread(image_filename) |
|
imageio.imwrite('image.png', image) |
|
|
|
|
|
def run(domain_source, action_source, hair_source, top_source, bottom_source, domain_target, action_target, hair_target, top_target, bottom_target): |
|
|
|
|
|
|
|
body_source = '0' |
|
|
|
|
|
if hair_source == "green": hair_source = '0' |
|
elif hair_source == "yellow": hair_source = '2' |
|
elif hair_source == "rose": hair_source = '4' |
|
elif hair_source == "red": hair_source = '7' |
|
elif hair_source == "wine": hair_source = '8' |
|
|
|
|
|
if top_source == "brown": top_source = '0' |
|
elif top_source == "blue": top_source = '1' |
|
elif top_source == "white": top_source = '2' |
|
|
|
|
|
if bottom_source == "white": bottom_source = '0' |
|
elif bottom_source == "golden": bottom_source = '1' |
|
elif bottom_source == "red": bottom_source = '2' |
|
elif bottom_source == "silver": bottom_source = '3' |
|
|
|
file_name_source = './Sprite/frames/domain_1/' + action_source + '/' |
|
file_name_source = file_name_source + 'front' + '_' + str(body_source) + str(bottom_source) + str(top_source) + str(hair_source) + '_' |
|
|
|
gif = display_gif_pad(file_name_source, 'avatar_source.gif') |
|
|
|
|
|
|
|
body_target = '1' |
|
|
|
|
|
if hair_target == "violet": hair_target = '1' |
|
elif hair_target == "silver": hair_target = '3' |
|
elif hair_target == "purple": hair_target = '5' |
|
elif hair_target == "grey": hair_target = '6' |
|
elif hair_target == "golden": hair_target = '9' |
|
|
|
|
|
if top_target == "grey": top_target = '3' |
|
elif top_target == "khaki": top_target = '4' |
|
elif top_target == "linen": top_target = '5' |
|
elif top_target == "ocre": top_target = '6' |
|
|
|
|
|
if bottom_target == "denim": bottom_target = '4' |
|
elif bottom_target == "olive": bottom_target = '5' |
|
elif bottom_target == "brown": bottom_target = '6' |
|
|
|
file_name_target = './Sprite/frames/domain_2/' + action_target + '/' |
|
file_name_target = file_name_target + 'front' + '_' + str(body_target) + str(bottom_target) + str(top_target) + str(hair_target) + '_' |
|
|
|
gif_target = display_gif_pad(file_name_target, 'avatar_target.gif') |
|
|
|
return 'demo.gif' |
|
|
|
|
|
gr.Interface( |
|
run, |
|
inputs=[ |
|
gr.Textbox(value="Source Avatar - Human", interactive=False), |
|
gr.Radio(choices=["slash", "spellcard", "walk"], value="slash"), |
|
gr.Radio(choices=["green", "yellow", "rose", "red", "wine"], value="green"), |
|
gr.Radio(choices=["brown", "blue", "white"], value="brown"), |
|
gr.Radio(choices=["white", "golden", "red", "silver"], value="white"), |
|
gr.Textbox(value="Target Avatar - Alien", interactive=False), |
|
gr.Radio(choices=["slash", "spellcard", "walk"], value="walk"), |
|
gr.Radio(choices=["violet", "silver", "purple", "grey", "golden"], value="golden"), |
|
gr.Radio(choices=["grey", "khaki", "linen", "ocre"], value="ocre"), |
|
gr.Radio(choices=["denim", "olive", "brown"], value="brown"), |
|
], |
|
outputs=[ |
|
gr.components.Image(type="file", label="Domain Disentanglement"), |
|
], |
|
live=True, |
|
title="TransferVAE for Unsupervised Video Domain Adaptation", |
|
).launch() |
|
|