File size: 14,884 Bytes
a663d90 b1b2713 6936e30 6e84e41 8966c38 5d6cc56 0d052e8 774bf4f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
NEW RUN 2023-12-06-03-34-29 {'load_model': '/content/RWKV-LM-LoRA/RWKV-v4neo/RWKV-4-7B-world-one-novel-tuned-65k.pth', 'wandb': '', 'proj_dir': '/content/RWKV-LM-LoRA/RWKV-v4neo/lora/', 'random_seed': -1, 'data_file': '/content/RWKV-LM-LoRA/RWKV-v4neo/data_text_document', 'data_type': 'binidx', 'vocab_size': 65536, 'ctx_len': 1024, 'epoch_steps': 5000, 'epoch_count': 1000, 'epoch_begin': 0, 'epoch_save': 1, 'micro_bsz': 2, 'n_layer': 32, 'n_embd': 4096, 'dim_att': 4096, 'dim_ffn': 16384, 'pre_ffn': 0, 'head_qk': 0, 'tiny_att_dim': 0, 'tiny_att_layer': -999, 'lr_init': 5e-05, 'lr_final': 1e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.999, 'adam_eps': 1e-08, 'grad_cp': 0, 'my_pile_stage': 0, 'my_pile_shift': -1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'my_img_version': 0, 'my_img_size': 0, 'my_img_bit': 0, 'my_img_clip': 'x', 'my_img_clip_scale': 1, 'my_img_l1_scale': 0, 'my_img_encoder': 'x', 'my_sample_len': 0, 'my_ffn_shift': 1, 'my_att_shift': 1, 'my_pos_emb': 0, 'load_partial': 0, 'magic_prime': 0, 'my_qa_mask': 0, 'my_testing': '', 'lora': True, 'lora_load': '/content/RWKV-LM-LoRA/RWKV-v4neo/rwkv-24.pth', 'lora_r': 768, 'lora_alpha': 512.0, 'lora_dropout': 0.01, 'lora_parts': 'att,ln,time', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': True, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': 4, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'deepspeed_stage_2', 'sync_batchnorm': False, 'precision': 'bf16', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2023-12-06-03-34-29', 'betas': (0.9, 0.999), 'real_bsz': 2, 'run_name': '65536 ctx1024 L32 D4096'} {'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 2, 'contiguous_gradients': True, 'overlap_comm': True, 'allgather_partitions': True, 'reduce_scatter': True, 'allgather_bucket_size': 200000000, 'reduce_bucket_size': 200000000, 'sub_group_size': 1000000000000}, 'activation_checkpointing': {'partition_activations': False, 'cpu_checkpointing': False, 'contiguous_memory_optimization': False, 'synchronize_checkpoint_boundary': False}, 'aio': {'block_size': 1048576, 'queue_depth': 8, 'single_submit': False, 'overlap_events': True, 'thread_count': 1}, 'gradient_accumulation_steps': 4, 'train_micro_batch_size_per_gpu': 2, 'gradient_clipping': 1.0, 'bf16': {'enabled': True}} NEW RUN 2023-12-06-03-34-29 {'load_model': '/content/RWKV-LM-LoRA/RWKV-v4neo/RWKV-4-7B-world-one-novel-tuned-65k.pth', 'wandb': '', 'proj_dir': '/content/RWKV-LM-LoRA/RWKV-v4neo/lora/', 'random_seed': -1, 'data_file': '/content/RWKV-LM-LoRA/RWKV-v4neo/data_text_document', 'data_type': 'binidx', 'vocab_size': 65536, 'ctx_len': 1024, 'epoch_steps': 5000, 'epoch_count': 1000, 'epoch_begin': 0, 'epoch_save': 1, 'micro_bsz': 2, 'n_layer': 32, 'n_embd': 4096, 'dim_att': 4096, 'dim_ffn': 16384, 'pre_ffn': 0, 'head_qk': 0, 'tiny_att_dim': 0, 'tiny_att_layer': -999, 'lr_init': 5e-05, 'lr_final': 1e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.999, 'adam_eps': 1e-08, 'grad_cp': 0, 'my_pile_stage': 0, 'my_pile_shift': -1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'my_img_version': 0, 'my_img_size': 0, 'my_img_bit': 0, 'my_img_clip': 'x', 'my_img_clip_scale': 1, 'my_img_l1_scale': 0, 'my_img_encoder': 'x', 'my_sample_len': 0, 'my_ffn_shift': 1, 'my_att_shift': 1, 'my_pos_emb': 0, 'load_partial': 0, 'magic_prime': 0, 'my_qa_mask': 0, 'my_testing': '', 'lora': True, 'lora_load': '/content/RWKV-LM-LoRA/RWKV-v4neo/rwkv-24.pth', 'lora_r': 768, 'lora_alpha': 512.0, 'lora_dropout': 0.01, 'lora_parts': 'att,ln,time', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': True, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': 4, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'deepspeed_stage_2', 'sync_batchnorm': False, 'precision': 'bf16', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2023-12-06-03-34-29', 'betas': (0.9, 0.999), 'real_bsz': 2, 'run_name': '65536 ctx1024 L32 D4096'} {'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 2, 'contiguous_gradients': True, 'overlap_comm': True, 'allgather_partitions': True, 'reduce_scatter': True, 'allgather_bucket_size': 200000000, 'reduce_bucket_size': 200000000, 'sub_group_size': 1000000000000}, 'activation_checkpointing': {'partition_activations': False, 'cpu_checkpointing': False, 'contiguous_memory_optimization': False, 'synchronize_checkpoint_boundary': False}, 'aio': {'block_size': 1048576, 'queue_depth': 8, 'single_submit': False, 'overlap_events': True, 'thread_count': 1}, 'gradient_accumulation_steps': 4, 'train_micro_batch_size_per_gpu': 2, 'gradient_clipping': 1.0, 'bf16': {'enabled': True}} NEW RUN 2023-12-06-03-34-29 {'load_model': '/content/RWKV-LM-LoRA/RWKV-v4neo/RWKV-4-7B-world-one-novel-tuned-65k.pth', 'wandb': '', 'proj_dir': '/content/RWKV-LM-LoRA/RWKV-v4neo/lora/', 'random_seed': -1, 'data_file': '/content/RWKV-LM-LoRA/RWKV-v4neo/data_text_document', 'data_type': 'binidx', 'vocab_size': 65536, 'ctx_len': 1024, 'epoch_steps': 5000, 'epoch_count': 1000, 'epoch_begin': 0, 'epoch_save': 1, 'micro_bsz': 2, 'n_layer': 32, 'n_embd': 4096, 'dim_att': 4096, 'dim_ffn': 16384, 'pre_ffn': 0, 'head_qk': 0, 'tiny_att_dim': 0, 'tiny_att_layer': -999, 'lr_init': 5e-05, 'lr_final': 1e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.999, 'adam_eps': 1e-08, 'grad_cp': 0, 'my_pile_stage': 0, 'my_pile_shift': -1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'my_img_version': 0, 'my_img_size': 0, 'my_img_bit': 0, 'my_img_clip': 'x', 'my_img_clip_scale': 1, 'my_img_l1_scale': 0, 'my_img_encoder': 'x', 'my_sample_len': 0, 'my_ffn_shift': 1, 'my_att_shift': 1, 'my_pos_emb': 0, 'load_partial': 0, 'magic_prime': 0, 'my_qa_mask': 0, 'my_testing': '', 'lora': True, 'lora_load': '/content/RWKV-LM-LoRA/RWKV-v4neo/rwkv-24.pth', 'lora_r': 768, 'lora_alpha': 512.0, 'lora_dropout': 0.01, 'lora_parts': 'att,ln,time', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': True, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': 4, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'deepspeed_stage_2', 'sync_batchnorm': False, 'precision': 'bf16', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2023-12-06-03-34-29', 'betas': (0.9, 0.999), 'real_bsz': 2, 'run_name': '65536 ctx1024 L32 D4096'} {'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 2, 'contiguous_gradients': True, 'overlap_comm': True, 'allgather_partitions': True, 'reduce_scatter': True, 'allgather_bucket_size': 200000000, 'reduce_bucket_size': 200000000, 'sub_group_size': 1000000000000}, 'activation_checkpointing': {'partition_activations': False, 'cpu_checkpointing': False, 'contiguous_memory_optimization': False, 'synchronize_checkpoint_boundary': False}, 'aio': {'block_size': 1048576, 'queue_depth': 8, 'single_submit': False, 'overlap_events': True, 'thread_count': 1}, 'gradient_accumulation_steps': 4, 'train_micro_batch_size_per_gpu': 2, 'gradient_clipping': 1.0, 'bf16': {'enabled': True}} NEW RUN 2023-12-06-03-34-29 {'load_model': '/content/RWKV-LM-LoRA/RWKV-v4neo/RWKV-4-7B-world-one-novel-tuned-65k.pth', 'wandb': '', 'proj_dir': '/content/RWKV-LM-LoRA/RWKV-v4neo/lora/', 'random_seed': -1, 'data_file': '/content/RWKV-LM-LoRA/RWKV-v4neo/data_text_document', 'data_type': 'binidx', 'vocab_size': 65536, 'ctx_len': 1024, 'epoch_steps': 5000, 'epoch_count': 1000, 'epoch_begin': 0, 'epoch_save': 1, 'micro_bsz': 2, 'n_layer': 32, 'n_embd': 4096, 'dim_att': 4096, 'dim_ffn': 16384, 'pre_ffn': 0, 'head_qk': 0, 'tiny_att_dim': 0, 'tiny_att_layer': -999, 'lr_init': 5e-05, 'lr_final': 1e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.999, 'adam_eps': 1e-08, 'grad_cp': 0, 'my_pile_stage': 0, 'my_pile_shift': -1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'my_img_version': 0, 'my_img_size': 0, 'my_img_bit': 0, 'my_img_clip': 'x', 'my_img_clip_scale': 1, 'my_img_l1_scale': 0, 'my_img_encoder': 'x', 'my_sample_len': 0, 'my_ffn_shift': 1, 'my_att_shift': 1, 'my_pos_emb': 0, 'load_partial': 0, 'magic_prime': 0, 'my_qa_mask': 0, 'my_testing': '', 'lora': True, 'lora_load': '/content/RWKV-LM-LoRA/RWKV-v4neo/rwkv-24.pth', 'lora_r': 768, 'lora_alpha': 512.0, 'lora_dropout': 0.01, 'lora_parts': 'att,ln,time', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': True, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': 4, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'deepspeed_stage_2', 'sync_batchnorm': False, 'precision': 'bf16', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2023-12-06-03-34-29', 'betas': (0.9, 0.999), 'real_bsz': 2, 'run_name': '65536 ctx1024 L32 D4096'} {'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 2, 'contiguous_gradients': True, 'overlap_comm': True, 'allgather_partitions': True, 'reduce_scatter': True, 'allgather_bucket_size': 200000000, 'reduce_bucket_size': 200000000, 'sub_group_size': 1000000000000}, 'activation_checkpointing': {'partition_activations': False, 'cpu_checkpointing': False, 'contiguous_memory_optimization': False, 'synchronize_checkpoint_boundary': False}, 'aio': {'block_size': 1048576, 'queue_depth': 8, 'single_submit': False, 'overlap_events': True, 'thread_count': 1}, 'gradient_accumulation_steps': 4, 'train_micro_batch_size_per_gpu': 2, 'gradient_clipping': 1.0, 'bf16': {'enabled': True}} 0 2.144867 8.5409 0.00004998 2023-12-06 04:22:28.220749 0 1 2.140603 8.5046 0.00004996 2023-12-06 05:06:58.944447 1 2 2.138636 8.4879 0.00004994 2023-12-06 05:51:28.449240 2 3 2.129853 8.4136 0.00004992 2023-12-06 06:35:54.271193 3 4 2.135748 8.4634 0.00004990 2023-12-06 07:20:19.087221 4 5 2.138677 8.4882 0.00004988 2023-12-06 08:04:42.354788 5 6 2.131989 8.4316 0.00004986 2023-12-06 08:49:05.056323 6 7 2.139655 8.4965 0.00004984 2023-12-06 09:33:27.777744 7 8 2.137606 8.4791 0.00004982 2023-12-06 10:17:51.087575 8 9 2.132647 8.4372 0.00004980 2023-12-06 11:02:13.482789 9 10 2.127643 8.3951 0.00004978 2023-12-06 11:46:37.697566 10 11 2.131369 8.4264 0.00004976 2023-12-06 12:31:02.055146 11 12 2.136081 8.4662 0.00004974 2023-12-06 13:15:26.174361 12 13 2.129255 8.4086 0.00004972 2023-12-06 13:59:50.118002 13 14 2.129875 8.4138 0.00004970 2023-12-06 14:44:13.598163 14 15 2.129968 8.4146 0.00004968 2023-12-06 15:28:39.138772 15 16 2.121030 8.3397 0.00004966 2023-12-06 16:13:06.116242 16 17 2.118419 8.3180 0.00004964 2023-12-06 16:57:33.430652 17 18 2.124105 8.3654 0.00004962 2023-12-06 17:41:58.919512 18 19 2.121597 8.3445 0.00004960 2023-12-06 18:26:25.833191 19 20 2.126514 8.3856 0.00004958 2023-12-06 19:10:52.312649 20 21 2.122005 8.3479 0.00004956 2023-12-06 19:55:18.800415 21 22 2.124549 8.3691 0.00004954 2023-12-06 20:39:44.929857 22 23 2.124086 8.3652 0.00004952 2023-12-06 21:24:11.073329 23 24 2.121024 8.3397 0.00004950 2023-12-06 22:08:36.157885 24 25 2.115265 8.2918 0.00004948 2023-12-06 22:53:01.720867 25 26 2.120948 8.3390 0.00004946 2023-12-06 23:37:27.113744 26 27 2.124899 8.3721 0.00004944 2023-12-07 00:21:53.865237 27 |