Cinema-Guaratuba-imagem-para-video

Running

App Files Files Community

Cinema-Guaratuba-imagem-para-video / utils.py

maxin-cn

Upload folder using huggingface_hub

be791d6 verified 4 months ago

raw

history blame contribute delete

5.52 kB

	import os
	import math
	import torch
	import logging
	import subprocess
	import numpy as np
	import torch.distributed as dist

	# from torch._six import inf
	from torch import inf
	from PIL import Image
	from typing import Union, Iterable
	from collections import OrderedDict
	from torch.utils.tensorboard import SummaryWriter
	from typing import Dict
	import torch_dct

	from diffusers.utils import is_bs4_available, is_ftfy_available

	import html
	import re
	import urllib.parse as ul

	if is_bs4_available():
	from bs4 import BeautifulSoup

	if is_ftfy_available():
	import ftfy

	import torch.fft as fft

	_tensor_or_tensors = Union[torch.Tensor, Iterable[torch.Tensor]]


	#################################################################################
	# Testing Utils #
	#################################################################################

	def find_model(model_name):
	"""
	Finds a pre-trained model
	"""
	assert os.path.isfile(model_name), f'Could not find DiT checkpoint at {model_name}'
	checkpoint = torch.load(model_name, map_location=lambda storage, loc: storage)

	if "ema" in checkpoint: # supports checkpoints from train.py
	print('Using ema ckpt!')
	checkpoint = checkpoint["ema"]
	else:
	checkpoint = checkpoint["model"]
	print("Using model ckpt!")
	return checkpoint

	def save_video_grid(video, nrow=None):
	b, t, h, w, c = video.shape

	if nrow is None:
	nrow = math.ceil(math.sqrt(b))
	ncol = math.ceil(b / nrow)
	padding = 1
	video_grid = torch.zeros((t, (padding + h) * nrow + padding,
	(padding + w) * ncol + padding, c), dtype=torch.uint8)

	# print(video_grid.shape)
	for i in range(b):
	r = i // ncol
	c = i % ncol
	start_r = (padding + h) * r
	start_c = (padding + w) * c
	video_grid[:, start_r:start_r + h, start_c:start_c + w] = video[i]

	return video_grid

	def save_videos_grid_tav(videos: torch.Tensor, path: str, rescale=False, nrow=None, fps=8):
	from einops import rearrange
	import imageio
	import torchvision

	b, _, _, _, _ = videos.shape
	if nrow is None:
	nrow = math.ceil(math.sqrt(b))
	videos = rearrange(videos, "b c t h w -> t b c h w")
	outputs = []
	for x in videos:
	x = torchvision.utils.make_grid(x, nrow=nrow)
	x = x.transpose(0, 1).transpose(1, 2).squeeze(-1)
	if rescale:
	x = (x + 1.0) / 2.0 # -1,1 -> 0,1
	x = (x * 255).numpy().astype(np.uint8)
	outputs.append(x)

	# os.makedirs(os.path.dirname(path), exist_ok=True)
	imageio.mimsave(path, outputs, fps=fps)


	#################################################################################
	# MMCV Utils #
	#################################################################################


	def collect_env():
	# Copyright (c) OpenMMLab. All rights reserved.
	from mmcv.utils import collect_env as collect_base_env
	from mmcv.utils import get_git_hash
	"""Collect the information of the running environments."""

	env_info = collect_base_env()
	env_info['MMClassification'] = get_git_hash()[:7]

	for name, val in env_info.items():
	print(f'{name}: {val}')

	print(torch.cuda.get_arch_list())
	print(torch.version.cuda)


	#################################################################################
	# DCT Functions #
	#################################################################################

	def dct_low_pass_filter(dct_coefficients, percentage=0.3): # 2d [b c f h w]
	"""
	Applies a low pass filter to the given DCT coefficients.

	:param dct_coefficients: 2D tensor of DCT coefficients
	:param percentage: percentage of coefficients to keep (between 0 and 1)
	:return: 2D tensor of DCT coefficients after applying the low pass filter
	"""
	# Determine the cutoff indices for both dimensions
	cutoff_x = int(dct_coefficients.shape[-2] * percentage)
	cutoff_y = int(dct_coefficients.shape[-1] * percentage)

	# Create a mask with the same shape as the DCT coefficients
	mask = torch.zeros_like(dct_coefficients)
	# Set the top-left corner of the mask to 1 (the low-frequency area)
	mask[:, :, :, :cutoff_x, :cutoff_y] = 1

	return mask

	def normalize(tensor):
	"""将Tensor归一化到[0, 1]范围内。"""
	min_val = tensor.min()
	max_val = tensor.max()
	normalized = (tensor - min_val) / (max_val - min_val)
	return normalized

	def denormalize(tensor, max_val_target, min_val_target):
	"""将Tensor从[0, 1]范围反归一化到目标的[min_val_target, max_val_target]范围。"""
	denormalized = tensor * (max_val_target - min_val_target) + min_val_target
	return denormalized

	def exchanged_mixed_dct_freq(noise, base_content, LPF_3d, normalized=False):
	# noise dct
	noise_freq = torch_dct.dct_3d(noise, 'ortho')

	# frequency
	HPF_3d = 1 - LPF_3d
	noise_freq_high = noise_freq * HPF_3d

	# base frame dct
	base_content_freq = torch_dct.dct_3d(base_content, 'ortho')

	# base content low frequency
	base_content_freq_low = base_content_freq * LPF_3d

	# mixed frequency
	mixed_freq = base_content_freq_low + noise_freq_high

	# idct
	mixed_freq = torch_dct.idct_3d(mixed_freq, 'ortho')

	return mixed_freq