File size: 5,524 Bytes
be791d6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 |
import os
import math
import torch
import logging
import subprocess
import numpy as np
import torch.distributed as dist
# from torch._six import inf
from torch import inf
from PIL import Image
from typing import Union, Iterable
from collections import OrderedDict
from torch.utils.tensorboard import SummaryWriter
from typing import Dict
import torch_dct
from diffusers.utils import is_bs4_available, is_ftfy_available
import html
import re
import urllib.parse as ul
if is_bs4_available():
from bs4 import BeautifulSoup
if is_ftfy_available():
import ftfy
import torch.fft as fft
_tensor_or_tensors = Union[torch.Tensor, Iterable[torch.Tensor]]
#################################################################################
# Testing Utils #
#################################################################################
def find_model(model_name):
"""
Finds a pre-trained model
"""
assert os.path.isfile(model_name), f'Could not find DiT checkpoint at {model_name}'
checkpoint = torch.load(model_name, map_location=lambda storage, loc: storage)
if "ema" in checkpoint: # supports checkpoints from train.py
print('Using ema ckpt!')
checkpoint = checkpoint["ema"]
else:
checkpoint = checkpoint["model"]
print("Using model ckpt!")
return checkpoint
def save_video_grid(video, nrow=None):
b, t, h, w, c = video.shape
if nrow is None:
nrow = math.ceil(math.sqrt(b))
ncol = math.ceil(b / nrow)
padding = 1
video_grid = torch.zeros((t, (padding + h) * nrow + padding,
(padding + w) * ncol + padding, c), dtype=torch.uint8)
# print(video_grid.shape)
for i in range(b):
r = i // ncol
c = i % ncol
start_r = (padding + h) * r
start_c = (padding + w) * c
video_grid[:, start_r:start_r + h, start_c:start_c + w] = video[i]
return video_grid
def save_videos_grid_tav(videos: torch.Tensor, path: str, rescale=False, nrow=None, fps=8):
from einops import rearrange
import imageio
import torchvision
b, _, _, _, _ = videos.shape
if nrow is None:
nrow = math.ceil(math.sqrt(b))
videos = rearrange(videos, "b c t h w -> t b c h w")
outputs = []
for x in videos:
x = torchvision.utils.make_grid(x, nrow=nrow)
x = x.transpose(0, 1).transpose(1, 2).squeeze(-1)
if rescale:
x = (x + 1.0) / 2.0 # -1,1 -> 0,1
x = (x * 255).numpy().astype(np.uint8)
outputs.append(x)
# os.makedirs(os.path.dirname(path), exist_ok=True)
imageio.mimsave(path, outputs, fps=fps)
#################################################################################
# MMCV Utils #
#################################################################################
def collect_env():
# Copyright (c) OpenMMLab. All rights reserved.
from mmcv.utils import collect_env as collect_base_env
from mmcv.utils import get_git_hash
"""Collect the information of the running environments."""
env_info = collect_base_env()
env_info['MMClassification'] = get_git_hash()[:7]
for name, val in env_info.items():
print(f'{name}: {val}')
print(torch.cuda.get_arch_list())
print(torch.version.cuda)
#################################################################################
# DCT Functions #
#################################################################################
def dct_low_pass_filter(dct_coefficients, percentage=0.3): # 2d [b c f h w]
"""
Applies a low pass filter to the given DCT coefficients.
:param dct_coefficients: 2D tensor of DCT coefficients
:param percentage: percentage of coefficients to keep (between 0 and 1)
:return: 2D tensor of DCT coefficients after applying the low pass filter
"""
# Determine the cutoff indices for both dimensions
cutoff_x = int(dct_coefficients.shape[-2] * percentage)
cutoff_y = int(dct_coefficients.shape[-1] * percentage)
# Create a mask with the same shape as the DCT coefficients
mask = torch.zeros_like(dct_coefficients)
# Set the top-left corner of the mask to 1 (the low-frequency area)
mask[:, :, :, :cutoff_x, :cutoff_y] = 1
return mask
def normalize(tensor):
"""将Tensor归一化到[0, 1]范围内。"""
min_val = tensor.min()
max_val = tensor.max()
normalized = (tensor - min_val) / (max_val - min_val)
return normalized
def denormalize(tensor, max_val_target, min_val_target):
"""将Tensor从[0, 1]范围反归一化到目标的[min_val_target, max_val_target]范围。"""
denormalized = tensor * (max_val_target - min_val_target) + min_val_target
return denormalized
def exchanged_mixed_dct_freq(noise, base_content, LPF_3d, normalized=False):
# noise dct
noise_freq = torch_dct.dct_3d(noise, 'ortho')
# frequency
HPF_3d = 1 - LPF_3d
noise_freq_high = noise_freq * HPF_3d
# base frame dct
base_content_freq = torch_dct.dct_3d(base_content, 'ortho')
# base content low frequency
base_content_freq_low = base_content_freq * LPF_3d
# mixed frequency
mixed_freq = base_content_freq_low + noise_freq_high
# idct
mixed_freq = torch_dct.idct_3d(mixed_freq, 'ortho')
return mixed_freq |