defaults: - base - model@model.model: dual_ar_2_codebook_small - _self_ project: text2semantic_finetune_dual_ar max_length: 2048 ckpt_path: checkpoints/text2semantic-medium-v1-2k.pth resume_weights_only: true # Lightning Trainer trainer: accumulate_grad_batches: 1 gradient_clip_val: 1.0 gradient_clip_algorithm: 'norm' max_steps: 1000 precision: bf16-true limit_val_batches: 10 val_check_interval: 100 # Dataset Configuration tokenizer: _target_: transformers.AutoTokenizer.from_pretrained pretrained_model_name_or_path: fishaudio/fish-speech-1 # Dataset Configuration train_dataset: _target_: fish_speech.datasets.text.AutoAugTextDataset proto_files: - data/protos tokenizer: ${tokenizer} max_length: ${max_length} num_codebooks: ${model.model.config.num_codebooks} use_speaker: false val_dataset: _target_: fish_speech.datasets.text.AutoAugTextDataset proto_files: - data/protos tokenizer: ${tokenizer} max_length: ${max_length} num_codebooks: ${model.model.config.num_codebooks} use_speaker: false data: _target_: fish_speech.datasets.text.TextDataModule train_dataset: ${train_dataset} val_dataset: ${val_dataset} num_workers: 4 batch_size: 8 tokenizer: ${tokenizer} max_length: ${max_length} # Model Configuration model: _target_: fish_speech.models.text2semantic.TextToSemantic model: {} optimizer: _target_: torch.optim.AdamW _partial_: true lr: 1e-5 weight_decay: 0 betas: [0.9, 0.95] eps: 1e-5 lr_scheduler: _target_: torch.optim.lr_scheduler.LambdaLR _partial_: true lr_lambda: _target_: fish_speech.scheduler.get_cosine_schedule_with_warmup_lr_lambda _partial_: true num_warmup_steps: 100 num_training_steps: ${trainer.max_steps} # Callbacks callbacks: model_checkpoint: every_n_train_steps: 100