Spaces:

amphion
/

maskgct

Running on Zero

App Files Files Community

maskgct / schedulers /scheduler.py

Hecheng0625

Upload 409 files

c968fc3 verified 14 days ago

raw

history blame contribute delete

6.12 kB

	# Copyright (c) 2023 Amphion.
	#
	# This source code is licensed under the MIT license found in the
	# LICENSE file in the root directory of this source tree.

	import torch
	from torch.optim import Optimizer
	from typing import List, Optional, Tuple, Union


	def calc_lr(step, dim_embed, warmup_steps):
	return dim_embed ** (-0.5) * min(step ** (-0.5), step * warmup_steps ** (-1.5))


	# The function is modified from
	# https://github.com/lifeiteng/vall-e/blob/9c69096d603ce13174fb5cb025f185e2e9b36ac7/valle/modules/scheduler.py
	class NoamScheduler(torch.optim.lr_scheduler._LRScheduler):
	def __init__(
	self,
	base_lr: float,
	optimizer: torch.optim.Optimizer,
	dim_embed: int,
	warmup_steps: int,
	last_epoch: int = -1,
	verbose: bool = False,
	) -> None:
	self.dim_embed = dim_embed
	self.base_lr = base_lr
	self.warmup_steps = warmup_steps
	self.num_param_groups = len(optimizer.param_groups)

	super().__init__(optimizer, last_epoch, verbose)

	def get_lr(self) -> float:
	lr = self.base_lr * calc_lr(self._step_count, self.dim_embed, self.warmup_steps)
	return [lr] * self.num_param_groups

	def set_step(self, step: int):
	self._step_count = step


	class LRScheduler(object):
	"""
	Base-class for learning rate schedulers where the learning-rate depends on both the
	batch and the epoch.
	"""

	def __init__(self, optimizer: Optimizer, verbose: bool = False):
	# Attach optimizer
	if not isinstance(optimizer, Optimizer):
	raise TypeError("{} is not an Optimizer".format(type(optimizer).__name__))
	self.optimizer = optimizer
	self.verbose = verbose

	for group in optimizer.param_groups:
	group.setdefault("base_lr", group["lr"])

	self.base_lrs = [group["base_lr"] for group in optimizer.param_groups]

	self.epoch = 0
	self.batch = 0

	def state_dict(self):
	"""Returns the state of the scheduler as a :class:`dict`.

	It contains an entry for every variable in self.__dict__ which
	is not the optimizer.
	"""
	return {
	"base_lrs": self.base_lrs,
	"epoch": self.epoch,
	"batch": self.batch,
	}

	def load_state_dict(self, state_dict):
	"""Loads the schedulers state.

	Args:
	state_dict (dict): scheduler state. Should be an object returned
	from a call to :meth:`state_dict`.
	"""
	self.__dict__.update(state_dict)

	def get_last_lr(self) -> List[float]:
	"""Return last computed learning rate by current scheduler. Will be a list of float."""
	return self._last_lr

	def get_lr(self):
	# Compute list of learning rates from self.epoch and self.batch and
	# self.base_lrs; this must be overloaded by the user.
	# e.g. return [some_formula(self.batch, self.epoch, base_lr) for base_lr in self.base_lrs ]
	raise NotImplementedError

	def step_batch(self, batch: Optional[int] = None) -> None:
	# Step the batch index, or just set it. If `batch` is specified, it
	# must be the batch index from the start of training, i.e. summed over
	# all epochs.
	# You can call this in any order; if you don't provide 'batch', it should
	# of course be called once per batch.
	if batch is not None:
	self.batch = batch
	else:
	self.batch = self.batch + 1
	self._set_lrs()

	def step_epoch(self, epoch: Optional[int] = None):
	# Step the epoch index, or just set it. If you provide the 'epoch' arg,
	# you should call this at the start of the epoch; if you don't provide the 'epoch'
	# arg, you should call it at the end of the epoch.
	if epoch is not None:
	self.epoch = epoch
	else:
	self.epoch = self.epoch + 1
	self._set_lrs()

	def _set_lrs(self):
	values = self.get_lr()
	assert len(values) == len(self.optimizer.param_groups)

	for i, data in enumerate(zip(self.optimizer.param_groups, values)):
	param_group, lr = data
	param_group["lr"] = lr
	self._last_lr = [group["lr"] for group in self.optimizer.param_groups]


	class Eden(LRScheduler):
	"""
	Eden scheduler.
	The basic formula (before warmup) is:
	lr = base_lr * (((batch2 + lr_batches2) / lr_batches2) -0.25 *
	(((epoch2 + lr_epochs2) / lr_epochs2) -0.25)) * warmup
	where `warmup` increases from linearly 0.5 to 1 over `warmup_batches` batches
	and then stays constant at 1.


	E.g. suggest base_lr = 0.04 (passed to optimizer) if used with ScaledAdam

	Args:
	optimizer: the optimizer to change the learning rates on
	lr_batches: the number of batches after which we start significantly
	decreasing the learning rate, suggest 5000.
	lr_epochs: the number of epochs after which we start significantly
	decreasing the learning rate, suggest 6 if you plan to do e.g.
	20 to 40 epochs, but may need smaller number if dataset is huge
	and you will do few epochs.
	"""

	def __init__(
	self,
	optimizer: Optimizer,
	lr_batches: Union[int, float],
	lr_epochs: Union[int, float],
	warmup_batches: Union[int, float] = 500.0,
	verbose: bool = False,
	):
	super(Eden, self).__init__(optimizer, verbose)
	self.lr_batches = lr_batches
	self.lr_epochs = lr_epochs
	self.warmup_batches = warmup_batches

	def get_lr(self):
	factor = (
	(self.batch2 + self.lr_batches2) / self.lr_batches**2
	) ** -0.25 * (
	((self.epoch2 + self.lr_epochs2) / self.lr_epochs2) -0.25
	)
	warmup_factor = (
	1.0
	if self.batch >= self.warmup_batches
	else 0.5 + 0.5 * (self.batch / self.warmup_batches)
	)

	return [x * factor * warmup_factor for x in self.base_lrs]