[2022-12-19 18:22:21,564] [WARNING] [runner.py:179:fetch_hostfile] Unable to find hostfile, will proceed with training with local resources only. [2022-12-19 18:22:21,575] [INFO] [runner.py:508:main] cmd = /home/milan/hf_env/bin/python3 -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMF19 --master_addr=127.0.0.1 --master_port=29500 run_speech_recognition_seq2seq_streaming.py --deepspeed=ds_config.json --model_name_or_path=openai/whisper-medium --dataset_name=mozilla-foundation/common_voice_11_0 --dataset_config_name=sl --language=slovenian --train_split_name=train+validation --eval_split_name=test --model_index_name=Whisper Medium Slovenian CV11 --max_steps=5000 --output_dir=./ --per_device_train_batch_size=64 --per_device_eval_batch_size=32 --logging_steps=25 --learning_rate=1e-5 --warmup_steps=500 --evaluation_strategy=steps --eval_steps=1000 --save_strategy=steps --save_steps=1000 --generation_max_length=225 --length_column_name=input_length --max_duration_in_seconds=30 --text_column_name=sentence --freeze_feature_encoder=False --report_to=tensorboard --metric_for_best_model=wer --greater_is_better=False --load_best_model_at_end --gradient_checkpointing --fp16 --overwrite_output_dir --do_train --do_eval --predict_with_generate --do_normalize_eval --streaming=False --use_auth_token --push_to_hub [2022-12-19 18:22:23,159] [INFO] [launch.py:142:main] WORLD INFO DICT: {'localhost': [0]} [2022-12-19 18:22:23,159] [INFO] [launch.py:148:main] nnodes=1, num_local_procs=1, node_rank=0 [2022-12-19 18:22:23,159] [INFO] [launch.py:161:main] global_rank_mapping=defaultdict(, {'localhost': [0]}) [2022-12-19 18:22:23,159] [INFO] [launch.py:162:main] dist_world_size=1 [2022-12-19 18:22:23,159] [INFO] [launch.py:164:main] Setting CUDA_VISIBLE_DEVICES=0 [2022-12-19 18:22:27,335] [INFO] [comm.py:654:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl 12/19/2022 18:22:27 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1distributed training: True, 16-bits training: True 12/19/2022 18:22:27 - INFO - __main__ - Training/evaluation parameters Seq2SeqTrainingArguments( _n_gpu=1, adafactor=False, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, auto_find_batch_size=False, bf16=False, bf16_full_eval=False, data_seed=None, dataloader_drop_last=False, dataloader_num_workers=0, dataloader_pin_memory=True, ddp_bucket_cap_mb=None, ddp_find_unused_parameters=None, ddp_timeout=1800, debug=[], deepspeed=ds_config.json, disable_tqdm=False, do_eval=True, do_predict=False, do_train=True, eval_accumulation_steps=None, eval_delay=0, eval_steps=1000, evaluation_strategy=steps, fp16=True, fp16_backend=auto, fp16_full_eval=False, fp16_opt_level=O1, fsdp=[], fsdp_min_num_params=0, fsdp_transformer_layer_cls_to_wrap=None, full_determinism=False, generation_max_length=225, generation_num_beams=None, gradient_accumulation_steps=1, gradient_checkpointing=True, greater_is_better=False, group_by_length=False, half_precision_backend=auto, hub_model_id=None, hub_private_repo=False, hub_strategy=every_save, hub_token=, ignore_data_skip=False, include_inputs_for_metrics=False, jit_mode_eval=False, label_names=None, label_smoothing_factor=0.0, learning_rate=1e-05, length_column_name=input_length, load_best_model_at_end=True, local_rank=0, log_level=passive, log_level_replica=passive, log_on_each_node=True, logging_dir=./runs/Dec19_18-22-27_129-146-123-136, logging_first_step=False, logging_nan_inf_filter=True, logging_steps=25, logging_strategy=steps, lr_scheduler_type=linear, max_grad_norm=1.0, max_steps=5000, metric_for_best_model=wer, mp_parameters=, no_cuda=False, num_train_epochs=3.0, optim=adamw_hf, optim_args=None, output_dir=./, overwrite_output_dir=True, past_index=-1, per_device_eval_batch_size=32, per_device_train_batch_size=64, predict_with_generate=True, prediction_loss_only=False, push_to_hub=True, push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=, ray_scope=last, remove_unused_columns=True, report_to=['tensorboard'], resume_from_checkpoint=None, run_name=./, save_on_each_node=False, save_steps=1000, save_strategy=steps, save_total_limit=None, seed=42, sharded_ddp=[], skip_memory_metrics=True, sortish_sampler=False, tf32=None, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, torchdynamo=None, tpu_metrics_debug=False, tpu_num_cores=None, use_ipex=False, use_legacy_prediction_loop=False, use_mps_device=False, warmup_ratio=0.0, warmup_steps=500, weight_decay=0.0, xpu_backend=None, ) 12/19/2022 18:22:27 - INFO - __main__ - Training/evaluation parameters Seq2SeqTrainingArguments( _n_gpu=1, adafactor=False, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, auto_find_batch_size=False, bf16=False, bf16_full_eval=False, data_seed=None, dataloader_drop_last=False, dataloader_num_workers=0, dataloader_pin_memory=True, ddp_bucket_cap_mb=None, ddp_find_unused_parameters=None, ddp_timeout=1800, debug=[], deepspeed=ds_config.json, disable_tqdm=False, do_eval=True, do_predict=False, do_train=True, eval_accumulation_steps=None, eval_delay=0, eval_steps=1000, evaluation_strategy=steps, fp16=True, fp16_backend=auto, fp16_full_eval=False, fp16_opt_level=O1, fsdp=[], fsdp_min_num_params=0, fsdp_transformer_layer_cls_to_wrap=None, full_determinism=False, generation_max_length=225, generation_num_beams=None, gradient_accumulation_steps=1, gradient_checkpointing=True, greater_is_better=False, group_by_length=False, half_precision_backend=auto, hub_model_id=None, hub_private_repo=False, hub_strategy=every_save, hub_token=, ignore_data_skip=False, include_inputs_for_metrics=False, jit_mode_eval=False, label_names=None, label_smoothing_factor=0.0, learning_rate=1e-05, length_column_name=input_length, load_best_model_at_end=True, local_rank=0, log_level=passive, log_level_replica=passive, log_on_each_node=True, logging_dir=./runs/Dec19_18-22-27_129-146-123-136, logging_first_step=False, logging_nan_inf_filter=True, logging_steps=25, logging_strategy=steps, lr_scheduler_type=linear, max_grad_norm=1.0, max_steps=5000, metric_for_best_model=wer, mp_parameters=, no_cuda=False, num_train_epochs=3.0, optim=adamw_hf, optim_args=None, output_dir=./, overwrite_output_dir=True, past_index=-1, per_device_eval_batch_size=32, per_device_train_batch_size=64, predict_with_generate=True, prediction_loss_only=False, push_to_hub=True, push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=, ray_scope=last, remove_unused_columns=True, report_to=['tensorboard'], resume_from_checkpoint=None, run_name=./, save_on_each_node=False, save_steps=1000, save_strategy=steps, save_total_limit=None, seed=42, sharded_ddp=[], skip_memory_metrics=True, sortish_sampler=False, tf32=None, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, torchdynamo=None, tpu_metrics_debug=False, tpu_num_cores=None, use_ipex=False, use_legacy_prediction_loop=False, use_mps_device=False, warmup_ratio=0.0, warmup_steps=500, weight_decay=0.0, xpu_backend=None, ) 12/19/2022 18:22:29 - INFO - datasets.info - Loading Dataset Infos from /home/milan/.cache/huggingface/modules/datasets_modules/datasets/mozilla-foundation--common_voice_11_0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f 12/19/2022 18:22:29 - INFO - datasets.builder - Overwrite dataset info from restored data version. 12/19/2022 18:22:29 - INFO - datasets.info - Loading Dataset info from /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/sl/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f 12/19/2022 18:22:29 - WARNING - datasets.builder - Found cached dataset common_voice_11_0 (/home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/sl/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f) 12/19/2022 18:22:29 - INFO - datasets.info - Loading Dataset info from /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/sl/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f 12/19/2022 18:22:31 - INFO - datasets.info - Loading Dataset Infos from /home/milan/.cache/huggingface/modules/datasets_modules/datasets/mozilla-foundation--common_voice_11_0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f 12/19/2022 18:22:31 - INFO - datasets.builder - Overwrite dataset info from restored data version. 12/19/2022 18:22:31 - INFO - datasets.info - Loading Dataset info from /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/sl/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f 12/19/2022 18:22:31 - WARNING - datasets.builder - Found cached dataset common_voice_11_0 (/home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/sl/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f) 12/19/2022 18:22:31 - INFO - datasets.info - Loading Dataset info from /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/sl/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f 12/19/2022 18:22:33 - INFO - datasets.info - Loading Dataset Infos from /home/milan/.cache/huggingface/modules/datasets_modules/datasets/mozilla-foundation--common_voice_11_0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f 12/19/2022 18:22:33 - INFO - datasets.builder - Overwrite dataset info from restored data version. 12/19/2022 18:22:33 - INFO - datasets.info - Loading Dataset info from /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/sl/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f 12/19/2022 18:22:33 - WARNING - datasets.builder - Found cached dataset common_voice_11_0 (/home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/sl/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f) 12/19/2022 18:22:33 - INFO - datasets.info - Loading Dataset info from /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/sl/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f 12/19/2022 18:22:44 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/sl/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f/cache-41ec946a05a262bb.arrow 12/19/2022 18:22:45 - INFO - datasets.arrow_dataset - Caching processed dataset at /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/sl/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f/cache-3645a625c071a58a.arrow 12/19/2022 18:25:44 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/sl/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f/cache-d896a0b0378699aa.arrow 12/19/2022 18:25:46 - WARNING - huggingface_hub.repository - /home/milan/whisper-medium-sl-cv11/./ is already a clone of https://huggingface.co/mikr/whisper-medium-sl-cv11. Make sure you pull the latest changes with `repo.git_pull()`. [2022-12-19 18:25:50,570] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed info: version=0.7.7, git-hash=unknown, git-branch=unknown [2022-12-19 18:25:51,215] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False [2022-12-19 18:25:52,405] [WARNING] [cpu_adam.py:83:__init__] FP16 params for CPUAdam may not work on AMD CPUs Installed CUDA version 11.6 does not match the version torch was compiled with 11.7 but since the APIs are compatible, accepting this combination ninja: no work to do. Time to load cpu_adam op: 2.9568140506744385 seconds Adam Optimizer #0 is created with AVX2 arithmetic capability. Config: alpha=0.000010, betas=(0.900000, 0.999000), weight_decay=0.000000, adam_w=1 [2022-12-19 18:25:57,177] [INFO] [logging.py:68:log_dist] [Rank 0] Using DeepSpeed Optimizer param name adamw as basic optimizer [2022-12-19 18:25:57,350] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Basic Optimizer = DeepSpeedCPUAdam [2022-12-19 18:25:57,350] [INFO] [utils.py:52:is_zero_supported_optimizer] Checking ZeRO support for optimizer=DeepSpeedCPUAdam type= [2022-12-19 18:25:57,350] [INFO] [logging.py:68:log_dist] [Rank 0] Creating fp16 ZeRO stage 2 optimizer [2022-12-19 18:25:57,350] [INFO] [stage_1_and_2.py:140:__init__] Reduce bucket size 200000000 [2022-12-19 18:25:57,350] [INFO] [stage_1_and_2.py:141:__init__] Allgather bucket size 200000000 [2022-12-19 18:25:57,350] [INFO] [stage_1_and_2.py:142:__init__] CPU Offload: True [2022-12-19 18:25:57,350] [INFO] [stage_1_and_2.py:143:__init__] Round robin gradient partitioning: False ninja: no work to do. Time to load utils op: 0.4852731227874756 seconds Rank: 0 partition count [1] and sizes[(763857920, False)] [2022-12-19 18:25:59,864] [INFO] [utils.py:827:see_memory_usage] Before initializing optimizer states [2022-12-19 18:25:59,865] [INFO] [utils.py:828:see_memory_usage] MA 1.52 GB Max_MA 1.52 GB CA 2.86 GB Max_CA 3 GB [2022-12-19 18:25:59,865] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 10.81 GB, percent = 5.5% [2022-12-19 18:26:01,836] [INFO] [utils.py:827:see_memory_usage] After initializing optimizer states [2022-12-19 18:26:01,837] [INFO] [utils.py:828:see_memory_usage] MA 1.52 GB Max_MA 1.52 GB CA 2.86 GB Max_CA 3 GB [2022-12-19 18:26:01,837] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 20.58 GB, percent = 10.5% [2022-12-19 18:26:01,837] [INFO] [stage_1_and_2.py:525:__init__] optimizer state initialized [2022-12-19 18:26:01,907] [INFO] [utils.py:827:see_memory_usage] After initializing ZeRO optimizer [2022-12-19 18:26:01,908] [INFO] [utils.py:828:see_memory_usage] MA 1.52 GB Max_MA 1.52 GB CA 2.86 GB Max_CA 3 GB [2022-12-19 18:26:01,908] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 20.58 GB, percent = 10.5% [2022-12-19 18:26:01,926] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Final Optimizer = adamw [2022-12-19 18:26:01,926] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed using configured LR scheduler = WarmupDecayLR [2022-12-19 18:26:01,926] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed LR Scheduler = [2022-12-19 18:26:01,926] [INFO] [logging.py:68:log_dist] [Rank 0] step=0, skipped=0, lr=[1e-05], mom=[[0.9, 0.999]] [2022-12-19 18:26:01,927] [INFO] [config.py:1020:print] DeepSpeedEngine configuration: [2022-12-19 18:26:01,928] [INFO] [config.py:1024:print] activation_checkpointing_config { "partition_activations": false, "contiguous_memory_optimization": false, "cpu_checkpointing": false, "number_checkpoints": null, "synchronize_checkpoint_boundary": false, "profile": false } [2022-12-19 18:26:01,928] [INFO] [config.py:1024:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} [2022-12-19 18:26:01,928] [INFO] [config.py:1024:print] amp_enabled .................. False [2022-12-19 18:26:01,928] [INFO] [config.py:1024:print] amp_params ................... False [2022-12-19 18:26:01,928] [INFO] [config.py:1024:print] autotuning_config ............ { "enabled": false, "start_step": null, "end_step": null, "metric_path": null, "arg_mappings": null, "metric": "throughput", "model_info": null, "results_dir": "autotuning_results", "exps_dir": "autotuning_exps", "overwrite": true, "fast": true, "start_profile_step": 3, "end_profile_step": 5, "tuner_type": "gridsearch", "tuner_early_stopping": 5, "tuner_num_trials": 50, "model_info_path": null, "mp_size": 1, "max_train_batch_size": null, "min_train_batch_size": 1, "max_train_micro_batch_size_per_gpu": 1.024000e+03, "min_train_micro_batch_size_per_gpu": 1, "num_tuning_micro_batch_sizes": 3 } [2022-12-19 18:26:01,928] [INFO] [config.py:1024:print] bfloat16_enabled ............. False [2022-12-19 18:26:01,928] [INFO] [config.py:1024:print] checkpoint_parallel_write_pipeline False [2022-12-19 18:26:01,928] [INFO] [config.py:1024:print] checkpoint_tag_validation_enabled True [2022-12-19 18:26:01,928] [INFO] [config.py:1024:print] checkpoint_tag_validation_fail False [2022-12-19 18:26:01,928] [INFO] [config.py:1024:print] comms_config ................. [2022-12-19 18:26:01,928] [INFO] [config.py:1024:print] communication_data_type ...... None [2022-12-19 18:26:01,928] [INFO] [config.py:1024:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} [2022-12-19 18:26:01,928] [INFO] [config.py:1024:print] curriculum_enabled ........... False [2022-12-19 18:26:01,928] [INFO] [config.py:1024:print] curriculum_params ............ False [2022-12-19 18:26:01,928] [INFO] [config.py:1024:print] dataloader_drop_last ......... False [2022-12-19 18:26:01,928] [INFO] [config.py:1024:print] disable_allgather ............ False [2022-12-19 18:26:01,928] [INFO] [config.py:1024:print] dump_state ................... False [2022-12-19 18:26:01,928] [INFO] [config.py:1024:print] dynamic_loss_scale_args ...... {'init_scale': 65536, 'scale_window': 1000, 'delayed_shift': 2, 'min_scale': 1} [2022-12-19 18:26:01,928] [INFO] [config.py:1024:print] eigenvalue_enabled ........... False [2022-12-19 18:26:01,928] [INFO] [config.py:1024:print] eigenvalue_gas_boundary_resolution 1 [2022-12-19 18:26:01,928] [INFO] [config.py:1024:print] eigenvalue_layer_name ........ bert.encoder.layer [2022-12-19 18:26:01,928] [INFO] [config.py:1024:print] eigenvalue_layer_num ......... 0 [2022-12-19 18:26:01,928] [INFO] [config.py:1024:print] eigenvalue_max_iter .......... 100 [2022-12-19 18:26:01,928] [INFO] [config.py:1024:print] eigenvalue_stability ......... 1e-06 [2022-12-19 18:26:01,928] [INFO] [config.py:1024:print] eigenvalue_tol ............... 0.01 [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] eigenvalue_verbose ........... False [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] elasticity_enabled ........... False [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] flops_profiler_config ........ { "enabled": false, "profile_step": 1, "module_depth": -1, "top_modules": 1, "detailed": true, "output_file": null } [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] fp16_auto_cast ............... False [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] fp16_enabled ................. True [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] fp16_master_weights_and_gradients False [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] global_rank .................. 0 [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] grad_accum_dtype ............. None [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] gradient_accumulation_steps .. 1 [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] gradient_clipping ............ 1.0 [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] gradient_predivide_factor .... 1.0 [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] initial_dynamic_scale ........ 65536 [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] load_universal_checkpoint .... False [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] loss_scale ................... 0 [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] memory_breakdown ............. False [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] monitor_config ............... [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] nebula_config ................ { "enabled": false, "persistent_storage_path": null, "persistent_time_interval": 100, "num_of_version_in_retention": 2, "enable_nebula_load": true, "load_path": null } [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] optimizer_legacy_fusion ...... False [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] optimizer_name ............... adamw [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] optimizer_params ............. {'lr': 1e-05, 'betas': [0.9, 0.999], 'eps': 1e-08, 'weight_decay': 0.0} [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] pld_enabled .................. False [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] pld_params ................... False [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] prescale_gradients ........... False [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] scheduler_name ............... WarmupDecayLR [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] scheduler_params ............. {'last_batch_iteration': -1, 'total_num_steps': 5000, 'warmup_min_lr': 0, 'warmup_max_lr': 1e-05, 'warmup_num_steps': 500} [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] sparse_attention ............. None [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] sparse_gradients_enabled ..... False [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] steps_per_print .............. 10 [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] train_batch_size ............. 64 [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] train_micro_batch_size_per_gpu 64 [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] use_node_local_storage ....... False [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] wall_clock_breakdown ......... False [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] world_size ................... 1 [2022-12-19 18:26:01,929] [INFO] [config.py:1024:print] zero_allow_untested_optimizer False [2022-12-19 18:26:01,930] [INFO] [config.py:1024:print] zero_config .................. stage=2 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=200000000 allgather_partitions=True allgather_bucket_size=200000000 overlap_comm=True load_from_fp32_weights=True elastic_checkpoint=False offload_param=None offload_optimizer=DeepSpeedZeroOffloadOptimizerConfig(device='cpu', nvme_path=None, buffer_count=4, pin_memory=True, pipeline=False, pipeline_read=False, pipeline_write=False, fast_init=False) sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50,000,000 param_persistence_threshold=100,000 model_persistence_threshold=sys.maxsize max_live_parameters=1,000,000,000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False [2022-12-19 18:26:01,930] [INFO] [config.py:1024:print] zero_enabled ................. True [2022-12-19 18:26:01,930] [INFO] [config.py:1024:print] zero_optimization_stage ...... 2 [2022-12-19 18:26:01,930] [INFO] [config.py:1009:print_user_config] json = { "fp16": { "enabled": true, "loss_scale": 0, "loss_scale_window": 1000, "initial_scale_power": 16, "hysteresis": 2, "min_loss_scale": 1 }, "optimizer": { "type": "AdamW", "params": { "lr": 1e-05, "betas": [0.9, 0.999], "eps": 1e-08, "weight_decay": 0.0 } }, "scheduler": { "type": "WarmupDecayLR", "params": { "last_batch_iteration": -1, "total_num_steps": 5.000000e+03, "warmup_min_lr": 0, "warmup_max_lr": 1e-05, "warmup_num_steps": 500 } }, "zero_optimization": { "stage": 2, "offload_optimizer": { "device": "cpu", "pin_memory": true }, "allgather_partitions": true, "allgather_bucket_size": 2.000000e+08, "overlap_comm": true, "reduce_scatter": true, "reduce_bucket_size": 2.000000e+08, "contiguous_gradients": true }, "gradient_accumulation_steps": 1, "gradient_clipping": 1.0, "train_batch_size": 64, "train_micro_batch_size_per_gpu": 64 } Time to load utils op: 0.00036025047302246094 seconds [2022-12-19 18:26:08,603] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 65536 [2022-12-19 18:26:14,791] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768.0 [2022-12-19 18:26:20,916] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768.0, reducing to 16384.0 [2022-12-19 18:26:20,917] [INFO] [timer.py:197:stop] 0/3, RunningAvgSamplesPerSec=12.770695296775497, CurrSamplesPerSec=12.770695296775497, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:26:27,184] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384.0, reducing to 8192.0 [2022-12-19 18:26:27,185] [INFO] [timer.py:197:stop] 0/4, RunningAvgSamplesPerSec=12.72161336521847, CurrSamplesPerSec=12.672907264842044, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:26:33,823] [INFO] [timer.py:197:stop] 0/5, RunningAvgSamplesPerSec=12.417447570891687, CurrSamplesPerSec=11.850759138646605, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:26:40,361] [INFO] [timer.py:197:stop] 0/6, RunningAvgSamplesPerSec=12.289279867666883, CurrSamplesPerSec=11.920174686536152, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:26:46,889] [INFO] [timer.py:197:stop] 0/7, RunningAvgSamplesPerSec=12.202032162762992, CurrSamplesPerSec=11.86508755136586, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:26:53,443] [INFO] [timer.py:197:stop] 0/8, RunningAvgSamplesPerSec=12.134885303524678, CurrSamplesPerSec=11.809939294195038, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:26:59,982] [INFO] [timer.py:197:stop] 0/9, RunningAvgSamplesPerSec=12.090958404070973, CurrSamplesPerSec=11.833933474922075, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:27:06,514] [INFO] [logging.py:68:log_dist] [Rank 0] step=10, skipped=4, lr=[2.883141528559073e-06], mom=[[0.9, 0.999]] [2022-12-19 18:27:06,515] [INFO] [timer.py:197:stop] 0/10, RunningAvgSamplesPerSec=12.063121856911412, CurrSamplesPerSec=11.871797978947413, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:27:13,066] [INFO] [timer.py:197:stop] 0/11, RunningAvgSamplesPerSec=12.042646092618936, CurrSamplesPerSec=11.881308832152559, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:27:19,501] [INFO] [timer.py:197:stop] 0/12, RunningAvgSamplesPerSec=12.03670392322164, CurrSamplesPerSec=11.983487114497631, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:27:25,966] [INFO] [timer.py:197:stop] 0/13, RunningAvgSamplesPerSec=12.030092689268644, CurrSamplesPerSec=11.964377606499204, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:27:32,539] [INFO] [timer.py:197:stop] 0/14, RunningAvgSamplesPerSec=12.016084789228547, CurrSamplesPerSec=11.864123695174017, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:27:39,048] [INFO] [timer.py:197:stop] 0/15, RunningAvgSamplesPerSec=12.003122298261156, CurrSamplesPerSec=11.849725945066293, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:27:45,553] [INFO] [timer.py:197:stop] 0/16, RunningAvgSamplesPerSec=11.996271649287452, CurrSamplesPerSec=11.907919579281874, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:27:52,175] [INFO] [timer.py:197:stop] 0/17, RunningAvgSamplesPerSec=11.986143633379378, CurrSamplesPerSec=11.846126084826565, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:27:58,757] [INFO] [timer.py:197:stop] 0/18, RunningAvgSamplesPerSec=11.974989551776911, CurrSamplesPerSec=11.81013518046904, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:28:05,297] [INFO] [timer.py:197:stop] 0/19, RunningAvgSamplesPerSec=11.96641749179651, CurrSamplesPerSec=11.830914662772678, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:28:11,830] [INFO] [logging.py:68:log_dist] [Rank 0] step=20, skipped=4, lr=[4.461405575910259e-06], mom=[[0.9, 0.999]] [2022-12-19 18:28:11,830] [INFO] [timer.py:197:stop] 0/20, RunningAvgSamplesPerSec=11.96237746913552, CurrSamplesPerSec=11.89411207559048, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:28:18,307] [INFO] [timer.py:197:stop] 0/21, RunningAvgSamplesPerSec=11.960873700663855, CurrSamplesPerSec=11.933870372518308, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:28:24,815] [INFO] [timer.py:197:stop] 0/22, RunningAvgSamplesPerSec=11.959183627554138, CurrSamplesPerSec=11.927162742367901, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:28:31,421] [INFO] [timer.py:197:stop] 0/23, RunningAvgSamplesPerSec=11.95565665870325, CurrSamplesPerSec=11.885551588767477, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:28:37,888] [INFO] [timer.py:197:stop] 0/24, RunningAvgSamplesPerSec=11.951338000586484, CurrSamplesPerSec=11.861361473509685, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:28:44,383] [INFO] [timer.py:197:stop] 0/25, RunningAvgSamplesPerSec=11.951529935048148, CurrSamplesPerSec=11.95575405345182, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.8218, 'learning_rate': 4.898977360288234e-06, 'epoch': 0.66} [2022-12-19 18:28:50,941] [INFO] [timer.py:197:stop] 0/26, RunningAvgSamplesPerSec=11.94718818601409, CurrSamplesPerSec=11.848191396639134, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:28:57,748] [INFO] [timer.py:197:stop] 0/27, RunningAvgSamplesPerSec=11.944532057257671, CurrSamplesPerSec=11.881137396942313, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:29:04,233] [INFO] [timer.py:197:stop] 0/28, RunningAvgSamplesPerSec=11.945743275920867, CurrSamplesPerSec=11.97610377966594, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:29:10,929] [INFO] [timer.py:197:stop] 0/29, RunningAvgSamplesPerSec=11.947151312098287, CurrSamplesPerSec=11.983877117733229, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:29:17,648] [INFO] [logging.py:68:log_dist] [Rank 0] step=30, skipped=4, lr=[5.242641991936178e-06], mom=[[0.9, 0.999]] [2022-12-19 18:29:17,649] [INFO] [timer.py:197:stop] 0/30, RunningAvgSamplesPerSec=11.92522066157641, CurrSamplesPerSec=11.3620900436752, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:29:24,228] [INFO] [timer.py:197:stop] 0/31, RunningAvgSamplesPerSec=11.923891439812145, CurrSamplesPerSec=11.886793161338273, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:29:30,731] [INFO] [timer.py:197:stop] 0/32, RunningAvgSamplesPerSec=11.925303916588168, CurrSamplesPerSec=11.966411812194945, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:29:37,552] [INFO] [timer.py:197:stop] 0/33, RunningAvgSamplesPerSec=11.923892449639919, CurrSamplesPerSec=11.881703257095392, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:29:44,037] [INFO] [timer.py:197:stop] 0/34, RunningAvgSamplesPerSec=11.923702348930892, CurrSamplesPerSec=11.917812231965113, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:29:50,877] [INFO] [timer.py:197:stop] 0/35, RunningAvgSamplesPerSec=11.922450480041357, CurrSamplesPerSec=11.882529004768674, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:29:57,416] [INFO] [timer.py:197:stop] 0/36, RunningAvgSamplesPerSec=11.92235585714169, CurrSamplesPerSec=11.919234143828925, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:30:04,014] [INFO] [timer.py:197:stop] 0/37, RunningAvgSamplesPerSec=11.923196315496618, CurrSamplesPerSec=11.951842573527628, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:30:08,617] [INFO] [timer.py:197:stop] 0/38, RunningAvgSamplesPerSec=12.017485639102567, CurrSamplesPerSec=16.61668531876392, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:30:15,093] [INFO] [timer.py:197:stop] 0/39, RunningAvgSamplesPerSec=12.015775214061128, CurrSamplesPerSec=11.954522523549615, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:30:21,534] [INFO] [logging.py:68:log_dist] [Rank 0] step=40, skipped=4, lr=[5.766283057118146e-06], mom=[[0.9, 0.999]] [2022-12-19 18:30:21,534] [INFO] [timer.py:197:stop] 0/40, RunningAvgSamplesPerSec=12.013080562944676, CurrSamplesPerSec=11.914221126787318, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:30:28,065] [INFO] [timer.py:197:stop] 0/41, RunningAvgSamplesPerSec=12.007720882024694, CurrSamplesPerSec=11.8075377475698, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:30:34,563] [INFO] [timer.py:197:stop] 0/42, RunningAvgSamplesPerSec=12.00465544250619, CurrSamplesPerSec=11.886312080397534, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:30:41,100] [INFO] [timer.py:197:stop] 0/43, RunningAvgSamplesPerSec=12.000052226980072, CurrSamplesPerSec=11.818774664140612, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:30:47,622] [INFO] [timer.py:197:stop] 0/44, RunningAvgSamplesPerSec=11.99744423321191, CurrSamplesPerSec=11.89148389838182, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:30:54,128] [INFO] [timer.py:197:stop] 0/45, RunningAvgSamplesPerSec=11.994739168724625, CurrSamplesPerSec=11.882217626561658, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:31:00,688] [INFO] [timer.py:197:stop] 0/46, RunningAvgSamplesPerSec=11.991993376082611, CurrSamplesPerSec=11.875101930347217, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:31:07,167] [INFO] [timer.py:197:stop] 0/47, RunningAvgSamplesPerSec=11.989124471747616, CurrSamplesPerSec=11.864237482882771, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:31:13,970] [INFO] [timer.py:197:stop] 0/48, RunningAvgSamplesPerSec=11.988300349175946, CurrSamplesPerSec=11.95133173632273, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:31:20,448] [INFO] [timer.py:197:stop] 0/49, RunningAvgSamplesPerSec=11.98839091553399, CurrSamplesPerSec=11.9925584477353, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:31:27,193] [INFO] [logging.py:68:log_dist] [Rank 0] step=50, skipped=4, lr=[6.160712527409633e-06], mom=[[0.9, 0.999]] [2022-12-19 18:31:27,194] [INFO] [timer.py:197:stop] 0/50, RunningAvgSamplesPerSec=11.98648719937796, CurrSamplesPerSec=11.897689484904296, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.2781, 'learning_rate': 6.160712527409633e-06, 'epoch': 1.32} [2022-12-19 18:31:33,809] [INFO] [timer.py:197:stop] 0/51, RunningAvgSamplesPerSec=11.979564343681941, CurrSamplesPerSec=11.656417669142503, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:31:40,647] [INFO] [timer.py:197:stop] 0/52, RunningAvgSamplesPerSec=11.978034201609391, CurrSamplesPerSec=11.90353309973444, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:31:47,208] [INFO] [timer.py:197:stop] 0/53, RunningAvgSamplesPerSec=11.976828784462107, CurrSamplesPerSec=11.91686571359723, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:31:54,197] [INFO] [timer.py:197:stop] 0/54, RunningAvgSamplesPerSec=11.9748057793317, CurrSamplesPerSec=11.872530981266877, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:32:00,665] [INFO] [timer.py:197:stop] 0/55, RunningAvgSamplesPerSec=11.973632916007181, CurrSamplesPerSec=11.912959014937295, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:32:07,622] [INFO] [timer.py:197:stop] 0/56, RunningAvgSamplesPerSec=11.971082287878863, CurrSamplesPerSec=11.83743666385719, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:32:14,104] [INFO] [timer.py:197:stop] 0/57, RunningAvgSamplesPerSec=11.97083497117326, CurrSamplesPerSec=11.957495027203272, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:32:20,725] [INFO] [timer.py:197:stop] 0/58, RunningAvgSamplesPerSec=11.967168120211076, CurrSamplesPerSec=11.768893495278926, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:32:27,156] [INFO] [timer.py:197:stop] 0/59, RunningAvgSamplesPerSec=11.967203877717253, CurrSamplesPerSec=11.969206639160747, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:32:33,640] [INFO] [logging.py:68:log_dist] [Rank 0] step=60, skipped=4, lr=[6.4772414076394205e-06], mom=[[0.9, 0.999]] [2022-12-19 18:32:33,640] [INFO] [timer.py:197:stop] 0/60, RunningAvgSamplesPerSec=11.96614803045232, CurrSamplesPerSec=11.906271168097536, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:32:40,160] [INFO] [timer.py:197:stop] 0/61, RunningAvgSamplesPerSec=11.96498915328232, CurrSamplesPerSec=11.898156193380135, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:32:46,598] [INFO] [timer.py:197:stop] 0/62, RunningAvgSamplesPerSec=11.964632111030582, CurrSamplesPerSec=11.943604268286796, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:32:53,065] [INFO] [timer.py:197:stop] 0/63, RunningAvgSamplesPerSec=11.96506026405694, CurrSamplesPerSec=11.990805642653113, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:32:59,647] [INFO] [timer.py:197:stop] 0/64, RunningAvgSamplesPerSec=11.962476461050668, CurrSamplesPerSec=11.806947250765349, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:33:06,113] [INFO] [timer.py:197:stop] 0/65, RunningAvgSamplesPerSec=11.962677733759069, CurrSamplesPerSec=11.975169883088887, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:33:12,602] [INFO] [timer.py:197:stop] 0/66, RunningAvgSamplesPerSec=11.961295706682522, CurrSamplesPerSec=11.874867111068959, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:33:19,121] [INFO] [timer.py:197:stop] 0/67, RunningAvgSamplesPerSec=11.960030019809254, CurrSamplesPerSec=11.879579456476584, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:33:25,847] [INFO] [timer.py:197:stop] 0/68, RunningAvgSamplesPerSec=11.951028229726973, CurrSamplesPerSec=11.393622052761886, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:33:32,357] [INFO] [timer.py:197:stop] 0/69, RunningAvgSamplesPerSec=11.951568501396249, CurrSamplesPerSec=11.987334758286156, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:33:38,865] [INFO] [logging.py:68:log_dist] [Rank 0] step=70, skipped=4, lr=[6.741623406776245e-06], mom=[[0.9, 0.999]] [2022-12-19 18:33:38,866] [INFO] [timer.py:197:stop] 0/70, RunningAvgSamplesPerSec=11.950573636747201, CurrSamplesPerSec=11.884292912668403, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:33:45,329] [INFO] [timer.py:197:stop] 0/71, RunningAvgSamplesPerSec=11.949826825011192, CurrSamplesPerSec=11.899261673865789, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:33:51,947] [INFO] [timer.py:197:stop] 0/72, RunningAvgSamplesPerSec=11.945692974330852, CurrSamplesPerSec=11.667203342186168, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:33:58,411] [INFO] [timer.py:197:stop] 0/73, RunningAvgSamplesPerSec=11.945205371204342, CurrSamplesPerSec=11.911171789125206, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:34:04,946] [INFO] [timer.py:197:stop] 0/74, RunningAvgSamplesPerSec=11.944205406112932, CurrSamplesPerSec=11.873633280494387, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:34:11,483] [INFO] [timer.py:197:stop] 0/75, RunningAvgSamplesPerSec=11.94337558373894, CurrSamplesPerSec=11.883929882564297, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.1633, 'learning_rate': 6.85912902234906e-06, 'epoch': 1.97} [2022-12-19 18:34:16,140] [INFO] [timer.py:197:stop] 0/76, RunningAvgSamplesPerSec=11.989055368710677, CurrSamplesPerSec=16.63305007801444, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:34:22,797] [INFO] [timer.py:197:stop] 0/77, RunningAvgSamplesPerSec=11.988778641946762, CurrSamplesPerSec=11.96833625049005, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:34:29,288] [INFO] [timer.py:197:stop] 0/78, RunningAvgSamplesPerSec=11.98765603744316, CurrSamplesPerSec=11.904055695817226, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:34:35,928] [INFO] [timer.py:197:stop] 0/79, RunningAvgSamplesPerSec=11.985019760105475, CurrSamplesPerSec=11.787999662391314, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:34:42,411] [INFO] [logging.py:68:log_dist] [Rank 0] step=80, skipped=4, lr=[6.968634661590082e-06], mom=[[0.9, 0.999]] [2022-12-19 18:34:42,412] [INFO] [timer.py:197:stop] 0/80, RunningAvgSamplesPerSec=11.98450592126031, CurrSamplesPerSec=11.94507220719026, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:34:49,194] [INFO] [timer.py:197:stop] 0/81, RunningAvgSamplesPerSec=11.984615261788422, CurrSamplesPerSec=11.99314997436825, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:34:55,719] [INFO] [timer.py:197:stop] 0/82, RunningAvgSamplesPerSec=11.983718552141424, CurrSamplesPerSec=11.913300028430148, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:35:02,203] [INFO] [timer.py:197:stop] 0/83, RunningAvgSamplesPerSec=11.982591198535706, CurrSamplesPerSec=11.893085009217916, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:35:08,689] [INFO] [timer.py:197:stop] 0/84, RunningAvgSamplesPerSec=11.981385521650415, CurrSamplesPerSec=11.884524947874823, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:35:15,210] [INFO] [timer.py:197:stop] 0/85, RunningAvgSamplesPerSec=11.980351942273366, CurrSamplesPerSec=11.896201008606102, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:35:21,707] [INFO] [timer.py:197:stop] 0/86, RunningAvgSamplesPerSec=11.980288340600778, CurrSamplesPerSec=11.975011754839112, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:35:28,095] [INFO] [timer.py:197:stop] 0/87, RunningAvgSamplesPerSec=11.980492749372829, CurrSamplesPerSec=11.997688023723322, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:35:34,860] [INFO] [timer.py:197:stop] 0/88, RunningAvgSamplesPerSec=11.973205093163857, CurrSamplesPerSec=11.384566644801064, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:35:41,317] [INFO] [timer.py:197:stop] 0/89, RunningAvgSamplesPerSec=11.97254430523956, CurrSamplesPerSec=11.915988109556679, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:35:47,771] [INFO] [logging.py:68:log_dist] [Rank 0] step=90, skipped=4, lr=[7.1675433522258775e-06], mom=[[0.9, 0.999]] [2022-12-19 18:35:47,772] [INFO] [timer.py:197:stop] 0/90, RunningAvgSamplesPerSec=11.972879471110938, CurrSamplesPerSec=12.002110912130712, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:35:54,183] [INFO] [timer.py:197:stop] 0/91, RunningAvgSamplesPerSec=11.972986631129855, CurrSamplesPerSec=11.982424230439614, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:36:00,996] [INFO] [timer.py:197:stop] 0/92, RunningAvgSamplesPerSec=11.964624476181502, CurrSamplesPerSec=11.264435677007024, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:36:07,463] [INFO] [timer.py:197:stop] 0/93, RunningAvgSamplesPerSec=11.964893034868153, CurrSamplesPerSec=11.98911278661595, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:36:13,935] [INFO] [timer.py:197:stop] 0/94, RunningAvgSamplesPerSec=11.965195569846097, CurrSamplesPerSec=11.992790443526756, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:36:20,419] [INFO] [timer.py:197:stop] 0/95, RunningAvgSamplesPerSec=11.964468078174255, CurrSamplesPerSec=11.897915187934306, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:36:27,204] [INFO] [timer.py:197:stop] 0/96, RunningAvgSamplesPerSec=11.9646461154338, CurrSamplesPerSec=11.981226772701694, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:36:33,737] [INFO] [timer.py:197:stop] 0/97, RunningAvgSamplesPerSec=11.963508419495465, CurrSamplesPerSec=11.857522502684597, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:36:40,244] [INFO] [timer.py:197:stop] 0/98, RunningAvgSamplesPerSec=11.962280878172274, CurrSamplesPerSec=11.846802068379677, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:36:46,717] [INFO] [timer.py:197:stop] 0/99, RunningAvgSamplesPerSec=11.961254613684387, CurrSamplesPerSec=11.863546400382022, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:36:53,169] [INFO] [logging.py:68:log_dist] [Rank 0] step=100, skipped=4, lr=[7.344547104469332e-06], mom=[[0.9, 0.999]] [2022-12-19 18:36:53,170] [INFO] [timer.py:197:stop] 0/100, RunningAvgSamplesPerSec=11.960833391854557, CurrSamplesPerSec=11.920115402027617, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0649, 'learning_rate': 7.344547104469332e-06, 'epoch': 2.63} [2022-12-19 18:36:59,620] [INFO] [timer.py:197:stop] 0/101, RunningAvgSamplesPerSec=11.96118042101715, CurrSamplesPerSec=11.995287243374307, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:37:06,134] [INFO] [timer.py:197:stop] 0/102, RunningAvgSamplesPerSec=11.95990428660375, CurrSamplesPerSec=11.834900780286048, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:37:12,761] [INFO] [timer.py:197:stop] 0/103, RunningAvgSamplesPerSec=11.955906047923378, CurrSamplesPerSec=11.569145389261527, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:37:19,274] [INFO] [timer.py:197:stop] 0/104, RunningAvgSamplesPerSec=11.955430896240086, CurrSamplesPerSec=11.907634336105545, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:37:25,829] [INFO] [timer.py:197:stop] 0/105, RunningAvgSamplesPerSec=11.954819456194633, CurrSamplesPerSec=11.892779400315366, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:37:32,289] [INFO] [timer.py:197:stop] 0/106, RunningAvgSamplesPerSec=11.955050082573079, CurrSamplesPerSec=11.978852353504577, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:37:39,064] [INFO] [timer.py:197:stop] 0/107, RunningAvgSamplesPerSec=11.949117331314653, CurrSamplesPerSec=11.362683535059936, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:37:45,550] [INFO] [timer.py:197:stop] 0/108, RunningAvgSamplesPerSec=11.949403097124424, CurrSamplesPerSec=11.979484762760327, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:37:52,041] [INFO] [timer.py:197:stop] 0/109, RunningAvgSamplesPerSec=11.948781088218709, CurrSamplesPerSec=11.883213357800784, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:37:58,563] [INFO] [logging.py:68:log_dist] [Rank 0] step=110, skipped=4, lr=[7.503995457567235e-06], mom=[[0.9, 0.999]] [2022-12-19 18:37:58,564] [INFO] [timer.py:197:stop] 0/110, RunningAvgSamplesPerSec=11.948342450463205, CurrSamplesPerSec=11.901593560971051, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:38:05,256] [INFO] [timer.py:197:stop] 0/111, RunningAvgSamplesPerSec=11.947155357237115, CurrSamplesPerSec=11.820322941938132, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:38:11,847] [INFO] [timer.py:197:stop] 0/112, RunningAvgSamplesPerSec=11.94614608161501, CurrSamplesPerSec=11.837148001603007, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:38:18,436] [INFO] [timer.py:197:stop] 0/113, RunningAvgSamplesPerSec=11.945807279393875, CurrSamplesPerSec=11.908655992436962, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:38:23,107] [INFO] [timer.py:197:stop] 0/114, RunningAvgSamplesPerSec=11.975399622944872, CurrSamplesPerSec=16.517131060398814, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:38:29,929] [INFO] [timer.py:197:stop] 0/115, RunningAvgSamplesPerSec=11.97564932824106, CurrSamplesPerSec=12.00368237210802, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:38:36,367] [INFO] [timer.py:197:stop] 0/116, RunningAvgSamplesPerSec=11.975750223871604, CurrSamplesPerSec=11.9871623909319, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:38:42,845] [INFO] [timer.py:197:stop] 0/117, RunningAvgSamplesPerSec=11.975163876554683, CurrSamplesPerSec=11.908694558906802, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:38:49,381] [INFO] [timer.py:197:stop] 0/118, RunningAvgSamplesPerSec=11.974870190861088, CurrSamplesPerSec=11.941192147416698, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:38:55,854] [INFO] [timer.py:197:stop] 0/119, RunningAvgSamplesPerSec=11.974060078277441, CurrSamplesPerSec=11.880825039984053, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:39:02,349] [INFO] [logging.py:68:log_dist] [Rank 0] step=120, skipped=4, lr=[7.649058662787184e-06], mom=[[0.9, 0.999]] [2022-12-19 18:39:02,349] [INFO] [timer.py:197:stop] 0/120, RunningAvgSamplesPerSec=11.974403931668107, CurrSamplesPerSec=12.014771562178144, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:39:08,849] [INFO] [timer.py:197:stop] 0/121, RunningAvgSamplesPerSec=11.973817366938638, CurrSamplesPerSec=11.905003876154641, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:39:15,351] [INFO] [timer.py:197:stop] 0/122, RunningAvgSamplesPerSec=11.972927883840688, CurrSamplesPerSec=11.868014690343205, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:39:21,901] [INFO] [timer.py:197:stop] 0/123, RunningAvgSamplesPerSec=11.971012656961737, CurrSamplesPerSec=11.745550079009458, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:39:28,332] [INFO] [timer.py:197:stop] 0/124, RunningAvgSamplesPerSec=11.970529099538052, CurrSamplesPerSec=11.912305592465955, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:39:34,765] [INFO] [timer.py:197:stop] 0/125, RunningAvgSamplesPerSec=11.970787058332547, CurrSamplesPerSec=12.002341667703618, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0433, 'learning_rate': 7.716963756434345e-06, 'epoch': 3.29} [2022-12-19 18:39:41,238] [INFO] [timer.py:197:stop] 0/126, RunningAvgSamplesPerSec=11.970920647681927, CurrSamplesPerSec=11.9873749066736, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:39:47,734] [INFO] [timer.py:197:stop] 0/127, RunningAvgSamplesPerSec=11.9705270240146, CurrSamplesPerSec=11.921917491064518, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:39:54,225] [INFO] [timer.py:197:stop] 0/128, RunningAvgSamplesPerSec=11.969771430750296, CurrSamplesPerSec=11.876067571207951, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:40:00,694] [INFO] [timer.py:197:stop] 0/129, RunningAvgSamplesPerSec=11.969909002492257, CurrSamplesPerSec=11.987268380208794, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:40:07,286] [INFO] [logging.py:68:log_dist] [Rank 0] step=130, skipped=4, lr=[7.782118888847307e-06], mom=[[0.9, 0.999]] [2022-12-19 18:40:07,287] [INFO] [timer.py:197:stop] 0/130, RunningAvgSamplesPerSec=11.967800546277202, CurrSamplesPerSec=11.705931930100745, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:40:13,791] [INFO] [timer.py:197:stop] 0/131, RunningAvgSamplesPerSec=11.967365051497387, CurrSamplesPerSec=11.911882174918238, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:40:20,269] [INFO] [timer.py:197:stop] 0/132, RunningAvgSamplesPerSec=11.966801221834064, CurrSamplesPerSec=11.894509986324449, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:40:26,733] [INFO] [timer.py:197:stop] 0/133, RunningAvgSamplesPerSec=11.966475206732566, CurrSamplesPerSec=11.924243965525896, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:40:33,575] [INFO] [timer.py:197:stop] 0/134, RunningAvgSamplesPerSec=11.966063229480717, CurrSamplesPerSec=11.912338367613689, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:40:40,046] [INFO] [timer.py:197:stop] 0/135, RunningAvgSamplesPerSec=11.965953999258348, CurrSamplesPerSec=11.951553093749999, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:40:46,623] [INFO] [timer.py:197:stop] 0/136, RunningAvgSamplesPerSec=11.96463166729028, CurrSamplesPerSec=11.791328087793204, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:40:53,187] [INFO] [timer.py:197:stop] 0/137, RunningAvgSamplesPerSec=11.963321308455823, CurrSamplesPerSec=11.790291766346405, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:40:59,627] [INFO] [timer.py:197:stop] 0/138, RunningAvgSamplesPerSec=11.963099130714472, CurrSamplesPerSec=11.933180703039122, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:41:06,159] [INFO] [timer.py:197:stop] 0/139, RunningAvgSamplesPerSec=11.962131778309548, CurrSamplesPerSec=11.832013419715604, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:41:12,719] [INFO] [logging.py:68:log_dist] [Rank 0] step=140, skipped=4, lr=[7.905011559752758e-06], mom=[[0.9, 0.999]] [2022-12-19 18:41:12,720] [INFO] [timer.py:197:stop] 0/140, RunningAvgSamplesPerSec=11.960726369629006, CurrSamplesPerSec=11.771257666422239, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:41:19,671] [INFO] [timer.py:197:stop] 0/141, RunningAvgSamplesPerSec=11.960250749029747, CurrSamplesPerSec=11.894975918292486, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:41:26,762] [INFO] [timer.py:197:stop] 0/142, RunningAvgSamplesPerSec=11.959594585538905, CurrSamplesPerSec=11.869083088525297, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:41:33,845] [INFO] [timer.py:197:stop] 0/143, RunningAvgSamplesPerSec=11.95858879396008, CurrSamplesPerSec=11.8194282741326, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:41:40,940] [INFO] [timer.py:197:stop] 0/144, RunningAvgSamplesPerSec=11.958371856842502, CurrSamplesPerSec=11.927862316617233, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:41:47,653] [INFO] [timer.py:197:stop] 0/145, RunningAvgSamplesPerSec=11.958427022037231, CurrSamplesPerSec=11.966265650601596, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:41:54,085] [INFO] [timer.py:197:stop] 0/146, RunningAvgSamplesPerSec=11.958374727281898, CurrSamplesPerSec=11.950901283456991, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:42:00,522] [INFO] [timer.py:197:stop] 0/147, RunningAvgSamplesPerSec=11.95828940184098, CurrSamplesPerSec=11.946015237346073, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:42:07,030] [INFO] [timer.py:197:stop] 0/148, RunningAvgSamplesPerSec=11.958220538194965, CurrSamplesPerSec=11.948243697733337, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:42:13,549] [INFO] [timer.py:197:stop] 0/149, RunningAvgSamplesPerSec=11.957292030051242, CurrSamplesPerSec=11.823259798906044, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:42:20,067] [INFO] [logging.py:68:log_dist] [Rank 0] step=150, skipped=4, lr=[8.019180844200955e-06], mom=[[0.9, 0.999]] [2022-12-19 18:42:20,067] [INFO] [timer.py:197:stop] 0/150, RunningAvgSamplesPerSec=11.956663235993481, CurrSamplesPerSec=11.86494437892723, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0237, 'learning_rate': 8.019180844200955e-06, 'epoch': 3.95} [2022-12-19 18:42:26,647] [INFO] [timer.py:197:stop] 0/151, RunningAvgSamplesPerSec=11.955515629909023, CurrSamplesPerSec=11.788064887272855, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:42:31,290] [INFO] [timer.py:197:stop] 0/152, RunningAvgSamplesPerSec=11.977348129310863, CurrSamplesPerSec=16.45456057843749, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:42:37,747] [INFO] [timer.py:197:stop] 0/153, RunningAvgSamplesPerSec=11.976952415552413, CurrSamplesPerSec=11.91789001293746, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:42:44,534] [INFO] [timer.py:197:stop] 0/154, RunningAvgSamplesPerSec=11.976760757249068, CurrSamplesPerSec=11.94789057676298, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:42:51,045] [INFO] [timer.py:197:stop] 0/155, RunningAvgSamplesPerSec=11.97646824430772, CurrSamplesPerSec=11.932171807157655, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:42:57,909] [INFO] [timer.py:197:stop] 0/156, RunningAvgSamplesPerSec=11.97604480143561, CurrSamplesPerSec=11.91160889892653, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:43:04,737] [INFO] [timer.py:197:stop] 0/157, RunningAvgSamplesPerSec=11.970746697659047, CurrSamplesPerSec=11.207217630449145, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:43:11,615] [INFO] [timer.py:197:stop] 0/158, RunningAvgSamplesPerSec=11.970268407906728, CurrSamplesPerSec=11.896592732283754, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:43:18,066] [INFO] [timer.py:197:stop] 0/159, RunningAvgSamplesPerSec=11.97016882755755, CurrSamplesPerSec=11.954654556116928, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:43:24,961] [INFO] [logging.py:68:log_dist] [Rank 0] step=160, skipped=4, lr=[8.125783520495252e-06], mom=[[0.9, 0.999]] [2022-12-19 18:43:24,962] [INFO] [timer.py:197:stop] 0/160, RunningAvgSamplesPerSec=11.969499477827759, CurrSamplesPerSec=11.86533194893277, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:43:31,501] [INFO] [timer.py:197:stop] 0/161, RunningAvgSamplesPerSec=11.969072512738691, CurrSamplesPerSec=11.901992500163498, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:43:38,664] [INFO] [timer.py:197:stop] 0/162, RunningAvgSamplesPerSec=11.968689703878965, CurrSamplesPerSec=11.908132992372748, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:43:45,179] [INFO] [timer.py:197:stop] 0/163, RunningAvgSamplesPerSec=11.96822662187064, CurrSamplesPerSec=11.894592206960546, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:43:52,001] [INFO] [timer.py:197:stop] 0/164, RunningAvgSamplesPerSec=11.968409771172443, CurrSamplesPerSec=11.997970090025632, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:43:58,690] [INFO] [timer.py:197:stop] 0/165, RunningAvgSamplesPerSec=11.96621957944813, CurrSamplesPerSec=11.62168732577348, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:44:05,609] [INFO] [timer.py:197:stop] 0/166, RunningAvgSamplesPerSec=11.966306152383915, CurrSamplesPerSec=11.980434303897045, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:44:12,120] [INFO] [timer.py:197:stop] 0/167, RunningAvgSamplesPerSec=11.96568829387472, CurrSamplesPerSec=11.865215517934883, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:44:18,982] [INFO] [timer.py:197:stop] 0/168, RunningAvgSamplesPerSec=11.96475516122947, CurrSamplesPerSec=11.812756109103413, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:44:25,547] [INFO] [timer.py:197:stop] 0/169, RunningAvgSamplesPerSec=11.964024371006682, CurrSamplesPerSec=11.843938162865731, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:44:32,589] [INFO] [logging.py:68:log_dist] [Rank 0] step=170, skipped=4, lr=[8.225760510392298e-06], mom=[[0.9, 0.999]] [2022-12-19 18:44:32,590] [INFO] [timer.py:197:stop] 0/170, RunningAvgSamplesPerSec=11.963545874564042, CurrSamplesPerSec=11.8841703217617, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:44:39,154] [INFO] [timer.py:197:stop] 0/171, RunningAvgSamplesPerSec=11.96285847388959, CurrSamplesPerSec=11.848485827569215, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:44:45,970] [INFO] [timer.py:197:stop] 0/172, RunningAvgSamplesPerSec=11.96268001448524, CurrSamplesPerSec=11.932596668382528, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:44:52,532] [INFO] [timer.py:197:stop] 0/173, RunningAvgSamplesPerSec=11.962371310623036, CurrSamplesPerSec=11.910122222355328, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:44:59,112] [INFO] [timer.py:197:stop] 0/174, RunningAvgSamplesPerSec=11.961408098069231, CurrSamplesPerSec=11.79894890701397, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:45:05,661] [INFO] [timer.py:197:stop] 0/175, RunningAvgSamplesPerSec=11.960593062480887, CurrSamplesPerSec=11.822040313293776, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0133, 'learning_rate': 8.27351214279797e-06, 'epoch': 4.61} [2022-12-19 18:45:12,212] [INFO] [timer.py:197:stop] 0/176, RunningAvgSamplesPerSec=11.959797940843089, CurrSamplesPerSec=11.823814951256551, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:45:18,710] [INFO] [timer.py:197:stop] 0/177, RunningAvgSamplesPerSec=11.959499301634326, CurrSamplesPerSec=11.907762165616957, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:45:25,365] [INFO] [timer.py:197:stop] 0/178, RunningAvgSamplesPerSec=11.95747328248155, CurrSamplesPerSec=11.613186770970227, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:45:31,875] [INFO] [timer.py:197:stop] 0/179, RunningAvgSamplesPerSec=11.957232795454122, CurrSamplesPerSec=11.91505721823269, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:45:38,364] [INFO] [logging.py:68:log_dist] [Rank 0] step=180, skipped=4, lr=[8.31988745412743e-06], mom=[[0.9, 0.999]] [2022-12-19 18:45:38,365] [INFO] [timer.py:197:stop] 0/180, RunningAvgSamplesPerSec=11.957260905931054, CurrSamplesPerSec=11.962238543302307, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:45:44,854] [INFO] [timer.py:197:stop] 0/181, RunningAvgSamplesPerSec=11.956980731274534, CurrSamplesPerSec=11.907317943457292, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:45:51,446] [INFO] [timer.py:197:stop] 0/182, RunningAvgSamplesPerSec=11.955415137054269, CurrSamplesPerSec=11.681627357786487, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:45:57,906] [INFO] [timer.py:197:stop] 0/183, RunningAvgSamplesPerSec=11.95545821814125, CurrSamplesPerSec=11.963217874858184, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:46:04,350] [INFO] [timer.py:197:stop] 0/184, RunningAvgSamplesPerSec=11.955471935263988, CurrSamplesPerSec=11.957955253040835, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:46:10,891] [INFO] [timer.py:197:stop] 0/185, RunningAvgSamplesPerSec=11.955210228044418, CurrSamplesPerSec=11.907769560796638, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:46:17,428] [INFO] [timer.py:197:stop] 0/186, RunningAvgSamplesPerSec=11.95481220795668, CurrSamplesPerSec=11.882418021813043, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:46:23,951] [INFO] [timer.py:197:stop] 0/187, RunningAvgSamplesPerSec=11.954920753920847, CurrSamplesPerSec=11.974926816050653, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:46:30,425] [INFO] [timer.py:197:stop] 0/188, RunningAvgSamplesPerSec=11.9545948193769, CurrSamplesPerSec=11.894601166970343, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:46:36,987] [INFO] [timer.py:197:stop] 0/189, RunningAvgSamplesPerSec=11.954255796824288, CurrSamplesPerSec=11.89153025551718, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:46:41,618] [INFO] [logging.py:68:log_dist] [Rank 0] step=190, skipped=4, lr=[8.408811289387583e-06], mom=[[0.9, 0.999]] [2022-12-19 18:46:41,618] [INFO] [timer.py:197:stop] 0/190, RunningAvgSamplesPerSec=11.972085726575438, CurrSamplesPerSec=16.602830247845535, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:46:48,709] [INFO] [timer.py:197:stop] 0/191, RunningAvgSamplesPerSec=11.971718365805724, CurrSamplesPerSec=11.90305277410919, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:46:55,095] [INFO] [timer.py:197:stop] 0/192, RunningAvgSamplesPerSec=11.971805924957366, CurrSamplesPerSec=11.988377632963648, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:47:02,024] [INFO] [timer.py:197:stop] 0/193, RunningAvgSamplesPerSec=11.971837716369658, CurrSamplesPerSec=11.97788114995674, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:47:08,479] [INFO] [timer.py:197:stop] 0/194, RunningAvgSamplesPerSec=11.97144893606086, CurrSamplesPerSec=11.897652044341887, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:47:15,165] [INFO] [timer.py:197:stop] 0/195, RunningAvgSamplesPerSec=11.971633060620068, CurrSamplesPerSec=12.007090225390371, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:47:21,585] [INFO] [timer.py:197:stop] 0/196, RunningAvgSamplesPerSec=11.971344937010267, CurrSamplesPerSec=11.91599551495715, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:47:28,301] [INFO] [timer.py:197:stop] 0/197, RunningAvgSamplesPerSec=11.971002632316775, CurrSamplesPerSec=11.90496374964986, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:47:34,888] [INFO] [timer.py:197:stop] 0/198, RunningAvgSamplesPerSec=11.970098213872838, CurrSamplesPerSec=11.796310255242567, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:47:41,810] [INFO] [timer.py:197:stop] 0/199, RunningAvgSamplesPerSec=11.969640373249437, CurrSamplesPerSec=11.880574745588348, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:47:48,256] [INFO] [logging.py:68:log_dist] [Rank 0] step=200, skipped=4, lr=[8.49307723936858e-06], mom=[[0.9, 0.999]] [2022-12-19 18:47:48,257] [INFO] [timer.py:197:stop] 0/200, RunningAvgSamplesPerSec=11.969722973591413, CurrSamplesPerSec=11.986017505043487, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0104, 'learning_rate': 8.49307723936858e-06, 'epoch': 5.26} [2022-12-19 18:47:55,106] [INFO] [timer.py:197:stop] 0/201, RunningAvgSamplesPerSec=11.969823417978477, CurrSamplesPerSec=11.989744673162933, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:48:01,561] [INFO] [timer.py:197:stop] 0/202, RunningAvgSamplesPerSec=11.969927263071423, CurrSamplesPerSec=11.990628355028486, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:48:08,306] [INFO] [timer.py:197:stop] 0/203, RunningAvgSamplesPerSec=11.96891234037618, CurrSamplesPerSec=11.769329513044736, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:48:14,865] [INFO] [timer.py:197:stop] 0/204, RunningAvgSamplesPerSec=11.969023012232435, CurrSamplesPerSec=11.991309682333466, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:48:21,407] [INFO] [timer.py:197:stop] 0/205, RunningAvgSamplesPerSec=11.969053823741456, CurrSamplesPerSec=11.97528100273575, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:48:27,912] [INFO] [timer.py:197:stop] 0/206, RunningAvgSamplesPerSec=11.968591674220969, CurrSamplesPerSec=11.875508552134493, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:48:34,506] [INFO] [timer.py:197:stop] 0/207, RunningAvgSamplesPerSec=11.967412798901279, CurrSamplesPerSec=11.731682560399587, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:48:41,043] [INFO] [timer.py:197:stop] 0/208, RunningAvgSamplesPerSec=11.966704638128206, CurrSamplesPerSec=11.82328010843916, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:48:47,586] [INFO] [timer.py:197:stop] 0/209, RunningAvgSamplesPerSec=11.966013544546255, CurrSamplesPerSec=11.825330167934954, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:48:54,055] [INFO] [logging.py:68:log_dist] [Rank 0] step=210, skipped=4, lr=[8.573149077803088e-06], mom=[[0.9, 0.999]] [2022-12-19 18:48:54,056] [INFO] [timer.py:197:stop] 0/210, RunningAvgSamplesPerSec=11.965662492259098, CurrSamplesPerSec=11.893435424960474, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:49:00,650] [INFO] [timer.py:197:stop] 0/211, RunningAvgSamplesPerSec=11.963980559182376, CurrSamplesPerSec=11.6241240946051, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:49:07,097] [INFO] [timer.py:197:stop] 0/212, RunningAvgSamplesPerSec=11.963972501348298, CurrSamplesPerSec=11.962288652184109, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:49:13,536] [INFO] [timer.py:197:stop] 0/213, RunningAvgSamplesPerSec=11.963948947736387, CurrSamplesPerSec=11.959004743052576, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:49:20,077] [INFO] [timer.py:197:stop] 0/214, RunningAvgSamplesPerSec=11.963615780081513, CurrSamplesPerSec=11.893730000935333, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:49:26,934] [INFO] [timer.py:197:stop] 0/215, RunningAvgSamplesPerSec=11.95974524944009, CurrSamplesPerSec=11.19210838767328, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:49:33,451] [INFO] [timer.py:197:stop] 0/216, RunningAvgSamplesPerSec=11.959438531125091, CurrSamplesPerSec=11.894464133001891, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:49:39,989] [INFO] [timer.py:197:stop] 0/217, RunningAvgSamplesPerSec=11.959055902810736, CurrSamplesPerSec=11.877732856221867, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:49:46,523] [INFO] [timer.py:197:stop] 0/218, RunningAvgSamplesPerSec=11.958804526134285, CurrSamplesPerSec=11.905002820190525, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:49:53,081] [INFO] [timer.py:197:stop] 0/219, RunningAvgSamplesPerSec=11.958266565260303, CurrSamplesPerSec=11.843190398788378, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:49:59,604] [INFO] [logging.py:68:log_dist] [Rank 0] step=220, skipped=4, lr=[8.64942458567722e-06], mom=[[0.9, 0.999]] [2022-12-19 18:49:59,605] [INFO] [timer.py:197:stop] 0/220, RunningAvgSamplesPerSec=11.957966784064723, CurrSamplesPerSec=11.893267855157005, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:50:06,093] [INFO] [timer.py:197:stop] 0/221, RunningAvgSamplesPerSec=11.957712018435108, CurrSamplesPerSec=11.902431047514135, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:50:12,612] [INFO] [timer.py:197:stop] 0/222, RunningAvgSamplesPerSec=11.9578483384126, CurrSamplesPerSec=11.987777476038888, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:50:19,318] [INFO] [timer.py:197:stop] 0/223, RunningAvgSamplesPerSec=11.955153298789504, CurrSamplesPerSec=11.390381422993498, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:50:25,838] [INFO] [timer.py:197:stop] 0/224, RunningAvgSamplesPerSec=11.954664165619358, CurrSamplesPerSec=11.84753878486171, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:50:32,264] [INFO] [timer.py:197:stop] 0/225, RunningAvgSamplesPerSec=11.954815573235472, CurrSamplesPerSec=11.9885232643992, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.009, 'learning_rate': 8.686247975778677e-06, 'epoch': 5.92} [2022-12-19 18:50:38,807] [INFO] [timer.py:197:stop] 0/226, RunningAvgSamplesPerSec=11.954573554152372, CurrSamplesPerSec=11.90084694109717, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:50:45,353] [INFO] [timer.py:197:stop] 0/227, RunningAvgSamplesPerSec=11.954174532329091, CurrSamplesPerSec=11.865459920773597, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:50:50,197] [INFO] [timer.py:197:stop] 0/228, RunningAvgSamplesPerSec=11.968873837017497, CurrSamplesPerSec=16.546872163509338, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:50:56,705] [INFO] [timer.py:197:stop] 0/229, RunningAvgSamplesPerSec=11.968564190531376, CurrSamplesPerSec=11.898992668768397, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:51:03,366] [INFO] [logging.py:68:log_dist] [Rank 0] step=230, skipped=4, lr=[8.722247506883805e-06], mom=[[0.9, 0.999]] [2022-12-19 18:51:03,367] [INFO] [timer.py:197:stop] 0/230, RunningAvgSamplesPerSec=11.968529183355098, CurrSamplesPerSec=11.96058785029835, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:51:09,913] [INFO] [timer.py:197:stop] 0/231, RunningAvgSamplesPerSec=11.967879020850004, CurrSamplesPerSec=11.82146346241042, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:51:16,442] [INFO] [timer.py:197:stop] 0/232, RunningAvgSamplesPerSec=11.967061938247793, CurrSamplesPerSec=11.782842970197558, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:51:22,911] [INFO] [timer.py:197:stop] 0/233, RunningAvgSamplesPerSec=11.966750754380776, CurrSamplesPerSec=11.895605827793345, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:51:29,433] [INFO] [timer.py:197:stop] 0/234, RunningAvgSamplesPerSec=11.96624630459988, CurrSamplesPerSec=11.850847033807803, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:51:35,937] [INFO] [timer.py:197:stop] 0/235, RunningAvgSamplesPerSec=11.965826672166944, CurrSamplesPerSec=11.869260998652239, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:51:42,458] [INFO] [timer.py:197:stop] 0/236, RunningAvgSamplesPerSec=11.96532584363076, CurrSamplesPerSec=11.849764653880941, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:51:48,927] [INFO] [timer.py:197:stop] 0/237, RunningAvgSamplesPerSec=11.965001571041652, CurrSamplesPerSec=11.889601998535788, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:51:55,464] [INFO] [timer.py:197:stop] 0/238, RunningAvgSamplesPerSec=11.96464550642478, CurrSamplesPerSec=11.881553898436401, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:52:01,965] [INFO] [timer.py:197:stop] 0/239, RunningAvgSamplesPerSec=11.964587134742043, CurrSamplesPerSec=11.950827327433414, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:52:08,499] [INFO] [logging.py:68:log_dist] [Rank 0] step=240, skipped=4, lr=[8.79191691333329e-06], mom=[[0.9, 0.999]] [2022-12-19 18:52:08,500] [INFO] [timer.py:197:stop] 0/240, RunningAvgSamplesPerSec=11.964211432829098, CurrSamplesPerSec=11.87583061214829, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:52:15,263] [INFO] [timer.py:197:stop] 0/241, RunningAvgSamplesPerSec=11.964415439189414, CurrSamplesPerSec=12.013167628413601, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:52:21,725] [INFO] [timer.py:197:stop] 0/242, RunningAvgSamplesPerSec=11.96410428726764, CurrSamplesPerSec=11.890200265385754, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:52:28,480] [INFO] [timer.py:197:stop] 0/243, RunningAvgSamplesPerSec=11.963701648160816, CurrSamplesPerSec=11.867845737322513, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:52:35,258] [INFO] [timer.py:197:stop] 0/244, RunningAvgSamplesPerSec=11.960666342571534, CurrSamplesPerSec=11.271482733143172, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:52:42,087] [INFO] [timer.py:197:stop] 0/245, RunningAvgSamplesPerSec=11.960401224587407, CurrSamplesPerSec=11.896586405451426, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:52:48,622] [INFO] [timer.py:197:stop] 0/246, RunningAvgSamplesPerSec=11.960025506642335, CurrSamplesPerSec=11.86942054501301, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:52:55,390] [INFO] [timer.py:197:stop] 0/247, RunningAvgSamplesPerSec=11.959757371131706, CurrSamplesPerSec=11.894689713675385, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:53:02,150] [INFO] [timer.py:197:stop] 0/248, RunningAvgSamplesPerSec=11.95709827152116, CurrSamplesPerSec=11.33941077575699, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:53:09,084] [INFO] [timer.py:197:stop] 0/249, RunningAvgSamplesPerSec=11.95694381985539, CurrSamplesPerSec=11.919069550884041, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:53:15,576] [INFO] [logging.py:68:log_dist] [Rank 0] step=250, skipped=4, lr=[8.858694625217149e-06], mom=[[0.9, 0.999]] [2022-12-19 18:53:15,577] [INFO] [timer.py:197:stop] 0/250, RunningAvgSamplesPerSec=11.957018858592273, CurrSamplesPerSec=11.975582318309264, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0091, 'learning_rate': 8.858694625217149e-06, 'epoch': 6.58} [2022-12-19 18:53:22,422] [INFO] [timer.py:197:stop] 0/251, RunningAvgSamplesPerSec=11.956752555170752, CurrSamplesPerSec=11.891073548137316, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:53:29,100] [INFO] [timer.py:197:stop] 0/252, RunningAvgSamplesPerSec=11.955038834334175, CurrSamplesPerSec=11.54308543330756, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:53:35,931] [INFO] [timer.py:197:stop] 0/253, RunningAvgSamplesPerSec=11.954886252238936, CurrSamplesPerSec=11.916862539389484, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:53:42,452] [INFO] [timer.py:197:stop] 0/254, RunningAvgSamplesPerSec=11.954717288242081, CurrSamplesPerSec=11.912457839840409, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:53:49,316] [INFO] [timer.py:197:stop] 0/255, RunningAvgSamplesPerSec=11.954474788088856, CurrSamplesPerSec=11.89367677593137, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:53:55,800] [INFO] [timer.py:197:stop] 0/256, RunningAvgSamplesPerSec=11.954679467655913, CurrSamplesPerSec=12.006689580133575, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:54:02,501] [INFO] [timer.py:197:stop] 0/257, RunningAvgSamplesPerSec=11.95421380754742, CurrSamplesPerSec=11.837099457615839, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:54:09,043] [INFO] [timer.py:197:stop] 0/258, RunningAvgSamplesPerSec=11.954035325155182, CurrSamplesPerSec=11.90869561552593, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:54:15,709] [INFO] [timer.py:197:stop] 0/259, RunningAvgSamplesPerSec=11.953507335780868, CurrSamplesPerSec=11.819859196692144, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:54:22,235] [INFO] [logging.py:68:log_dist] [Rank 0] step=260, skipped=4, lr=[8.922811151820517e-06], mom=[[0.9, 0.999]] [2022-12-19 18:54:22,235] [INFO] [timer.py:197:stop] 0/260, RunningAvgSamplesPerSec=11.953524918056072, CurrSamplesPerSec=11.958045278209452, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:54:28,909] [INFO] [timer.py:197:stop] 0/261, RunningAvgSamplesPerSec=11.95294627807689, CurrSamplesPerSec=11.805505793389244, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:54:35,345] [INFO] [timer.py:197:stop] 0/262, RunningAvgSamplesPerSec=11.953099633869945, CurrSamplesPerSec=11.99295172071658, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:54:41,740] [INFO] [timer.py:197:stop] 0/263, RunningAvgSamplesPerSec=11.953154915407932, CurrSamplesPerSec=11.967545485935645, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:54:48,296] [INFO] [timer.py:197:stop] 0/264, RunningAvgSamplesPerSec=11.952958959230942, CurrSamplesPerSec=11.902033134359753, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:54:54,814] [INFO] [timer.py:197:stop] 0/265, RunningAvgSamplesPerSec=11.95284308146731, CurrSamplesPerSec=11.922560318565687, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:54:59,477] [INFO] [timer.py:197:stop] 0/266, RunningAvgSamplesPerSec=11.965370601881446, CurrSamplesPerSec=16.518648561648973, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:55:06,056] [INFO] [timer.py:197:stop] 0/267, RunningAvgSamplesPerSec=11.964809451324921, CurrSamplesPerSec=11.818484309644248, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:55:12,602] [INFO] [timer.py:197:stop] 0/268, RunningAvgSamplesPerSec=11.964545614341409, CurrSamplesPerSec=11.89503653429736, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:55:19,345] [INFO] [timer.py:197:stop] 0/269, RunningAvgSamplesPerSec=11.962250067229826, CurrSamplesPerSec=11.381395826072504, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:55:25,788] [INFO] [logging.py:68:log_dist] [Rank 0] step=270, skipped=4, lr=[8.984470493319244e-06], mom=[[0.9, 0.999]] [2022-12-19 18:55:25,788] [INFO] [timer.py:197:stop] 0/270, RunningAvgSamplesPerSec=11.96239745029132, CurrSamplesPerSec=12.001879092210856, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:55:32,290] [INFO] [timer.py:197:stop] 0/271, RunningAvgSamplesPerSec=11.962217071876005, CurrSamplesPerSec=11.91407094948462, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:55:38,796] [INFO] [timer.py:197:stop] 0/272, RunningAvgSamplesPerSec=11.962011088215489, CurrSamplesPerSec=11.90685791002186, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:55:45,334] [INFO] [timer.py:197:stop] 0/273, RunningAvgSamplesPerSec=11.961737058625605, CurrSamplesPerSec=11.888205574663681, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:55:52,031] [INFO] [timer.py:197:stop] 0/274, RunningAvgSamplesPerSec=11.961766852881928, CurrSamplesPerSec=11.969846570313806, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:55:58,536] [INFO] [timer.py:197:stop] 0/275, RunningAvgSamplesPerSec=11.961562278487147, CurrSamplesPerSec=11.906176639929061, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0073, 'learning_rate': 9.014436199608479e-06, 'epoch': 7.24} [2022-12-19 18:56:05,346] [INFO] [timer.py:197:stop] 0/276, RunningAvgSamplesPerSec=11.961402092808727, CurrSamplesPerSec=11.917831280272631, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:56:11,904] [INFO] [timer.py:197:stop] 0/277, RunningAvgSamplesPerSec=11.960747605566851, CurrSamplesPerSec=11.784076626416892, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:56:18,880] [INFO] [timer.py:197:stop] 0/278, RunningAvgSamplesPerSec=11.960585834456229, CurrSamplesPerSec=11.916264231429453, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:56:25,467] [INFO] [timer.py:197:stop] 0/279, RunningAvgSamplesPerSec=11.96036717579041, CurrSamplesPerSec=11.900321461301223, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:56:32,269] [INFO] [logging.py:68:log_dist] [Rank 0] step=280, skipped=4, lr=[9.043854055968706e-06], mom=[[0.9, 0.999]] [2022-12-19 18:56:32,270] [INFO] [timer.py:197:stop] 0/280, RunningAvgSamplesPerSec=11.960424377177432, CurrSamplesPerSec=11.976290255866592, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:56:38,829] [INFO] [timer.py:197:stop] 0/281, RunningAvgSamplesPerSec=11.960524268425525, CurrSamplesPerSec=11.988358893881443, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:56:45,526] [INFO] [timer.py:197:stop] 0/282, RunningAvgSamplesPerSec=11.960187110686173, CurrSamplesPerSec=11.86685677654128, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:56:52,089] [INFO] [timer.py:197:stop] 0/283, RunningAvgSamplesPerSec=11.959997466173446, CurrSamplesPerSec=11.907132552572872, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:56:58,834] [INFO] [timer.py:197:stop] 0/284, RunningAvgSamplesPerSec=11.959887502729112, CurrSamplesPerSec=11.929067684740032, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:57:05,438] [INFO] [timer.py:197:stop] 0/285, RunningAvgSamplesPerSec=11.960026464892223, CurrSamplesPerSec=11.99934307356995, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:57:12,118] [INFO] [timer.py:197:stop] 0/286, RunningAvgSamplesPerSec=11.95953376724836, CurrSamplesPerSec=11.821712834178557, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:57:18,800] [INFO] [timer.py:197:stop] 0/287, RunningAvgSamplesPerSec=11.958237330825453, CurrSamplesPerSec=11.601084653221788, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:57:25,361] [INFO] [timer.py:197:stop] 0/288, RunningAvgSamplesPerSec=11.958169600997557, CurrSamplesPerSec=11.938897817912004, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:57:31,940] [INFO] [timer.py:197:stop] 0/289, RunningAvgSamplesPerSec=11.957908031342138, CurrSamplesPerSec=11.883565822473857, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:57:38,679] [INFO] [logging.py:68:log_dist] [Rank 0] step=290, skipped=4, lr=[9.10112387015335e-06], mom=[[0.9, 0.999]] [2022-12-19 18:57:38,680] [INFO] [timer.py:197:stop] 0/290, RunningAvgSamplesPerSec=11.956220313805838, CurrSamplesPerSec=11.490767677092833, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:57:45,226] [INFO] [timer.py:197:stop] 0/291, RunningAvgSamplesPerSec=11.956082900079373, CurrSamplesPerSec=11.916638762005238, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:57:51,812] [INFO] [timer.py:197:stop] 0/292, RunningAvgSamplesPerSec=11.955730994704545, CurrSamplesPerSec=11.854891097574198, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:57:58,341] [INFO] [timer.py:197:stop] 0/293, RunningAvgSamplesPerSec=11.955736719084909, CurrSamplesPerSec=11.957397020720714, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:58:04,972] [INFO] [timer.py:197:stop] 0/294, RunningAvgSamplesPerSec=11.955152271599806, CurrSamplesPerSec=11.787471678016537, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:58:11,565] [INFO] [timer.py:197:stop] 0/295, RunningAvgSamplesPerSec=11.955288294524502, CurrSamplesPerSec=11.995139839539275, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:58:18,184] [INFO] [timer.py:197:stop] 0/296, RunningAvgSamplesPerSec=11.954982205488552, CurrSamplesPerSec=11.865968164063977, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:58:24,753] [INFO] [timer.py:197:stop] 0/297, RunningAvgSamplesPerSec=11.95499616872737, CurrSamplesPerSec=11.95910277589178, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:58:31,415] [INFO] [timer.py:197:stop] 0/298, RunningAvgSamplesPerSec=11.954584893681709, CurrSamplesPerSec=11.834481803911697, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:58:38,026] [INFO] [timer.py:197:stop] 0/299, RunningAvgSamplesPerSec=11.95441259901824, CurrSamplesPerSec=11.903630753092107, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:58:44,654] [INFO] [logging.py:68:log_dist] [Rank 0] step=300, skipped=4, lr=[9.156425255148058e-06], mom=[[0.9, 0.999]] [2022-12-19 18:58:44,655] [INFO] [timer.py:197:stop] 0/300, RunningAvgSamplesPerSec=11.954225909330887, CurrSamplesPerSec=11.899035919747792, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0072, 'learning_rate': 9.156425255148058e-06, 'epoch': 7.89} [2022-12-19 18:58:51,209] [INFO] [timer.py:197:stop] 0/301, RunningAvgSamplesPerSec=11.954295198335558, CurrSamplesPerSec=11.97497916811743, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:58:57,988] [INFO] [timer.py:197:stop] 0/302, RunningAvgSamplesPerSec=11.952520257598009, CurrSamplesPerSec=11.444447509347373, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:59:04,551] [INFO] [timer.py:197:stop] 0/303, RunningAvgSamplesPerSec=11.95235028071164, CurrSamplesPerSec=11.901574564533727, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:59:09,222] [INFO] [timer.py:197:stop] 0/304, RunningAvgSamplesPerSec=11.963231281508516, CurrSamplesPerSec=16.478730573596483, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:59:15,948] [INFO] [timer.py:197:stop] 0/305, RunningAvgSamplesPerSec=11.963293698186234, CurrSamplesPerSec=11.982173380930739, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:59:22,600] [INFO] [timer.py:197:stop] 0/306, RunningAvgSamplesPerSec=11.963279070866392, CurrSamplesPerSec=11.95884863972922, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:59:29,276] [INFO] [timer.py:197:stop] 0/307, RunningAvgSamplesPerSec=11.962831393051811, CurrSamplesPerSec=11.828273162298121, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:59:35,891] [INFO] [timer.py:197:stop] 0/308, RunningAvgSamplesPerSec=11.96221346985623, CurrSamplesPerSec=11.776679594881331, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:59:42,468] [INFO] [timer.py:197:stop] 0/309, RunningAvgSamplesPerSec=11.961672712620798, CurrSamplesPerSec=11.798466094329111, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:59:48,978] [INFO] [logging.py:68:log_dist] [Rank 0] step=310, skipped=4, lr=[9.209889040960644e-06], mom=[[0.9, 0.999]] [2022-12-19 18:59:48,979] [INFO] [timer.py:197:stop] 0/310, RunningAvgSamplesPerSec=11.96172355487196, CurrSamplesPerSec=11.97735258636264, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 18:59:55,642] [INFO] [timer.py:197:stop] 0/311, RunningAvgSamplesPerSec=11.961362875300894, CurrSamplesPerSec=11.851299086661022, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:00:02,124] [INFO] [timer.py:197:stop] 0/312, RunningAvgSamplesPerSec=11.961481228079201, CurrSamplesPerSec=11.998164755683883, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:00:08,693] [INFO] [timer.py:197:stop] 0/313, RunningAvgSamplesPerSec=11.961509967070256, CurrSamplesPerSec=11.970425716284513, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:00:15,309] [INFO] [timer.py:197:stop] 0/314, RunningAvgSamplesPerSec=11.961354905041691, CurrSamplesPerSec=11.913324878238512, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:00:21,866] [INFO] [timer.py:197:stop] 0/315, RunningAvgSamplesPerSec=11.960772949711172, CurrSamplesPerSec=11.781926559310214, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:00:28,469] [INFO] [timer.py:197:stop] 0/316, RunningAvgSamplesPerSec=11.960372169375326, CurrSamplesPerSec=11.836234084319031, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:00:35,049] [INFO] [timer.py:197:stop] 0/317, RunningAvgSamplesPerSec=11.960166919545655, CurrSamplesPerSec=11.896064992151805, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:00:41,588] [INFO] [timer.py:197:stop] 0/318, RunningAvgSamplesPerSec=11.960189302297533, CurrSamplesPerSec=11.967244041130852, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:00:48,435] [INFO] [timer.py:197:stop] 0/319, RunningAvgSamplesPerSec=11.957889184809606, CurrSamplesPerSec=11.272824180860752, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:00:54,950] [INFO] [logging.py:68:log_dist] [Rank 0] step=320, skipped=4, lr=[9.261633432763397e-06], mom=[[0.9, 0.999]] [2022-12-19 19:00:54,951] [INFO] [timer.py:197:stop] 0/320, RunningAvgSamplesPerSec=11.958074000379625, CurrSamplesPerSec=12.016949898399902, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:01:01,414] [INFO] [timer.py:197:stop] 0/321, RunningAvgSamplesPerSec=11.958221540162944, CurrSamplesPerSec=12.005324579488992, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:01:07,941] [INFO] [timer.py:197:stop] 0/322, RunningAvgSamplesPerSec=11.958316563801247, CurrSamplesPerSec=11.988706379534284, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:01:14,491] [INFO] [timer.py:197:stop] 0/323, RunningAvgSamplesPerSec=11.957953460815437, CurrSamplesPerSec=11.842882123827714, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:01:20,992] [INFO] [timer.py:197:stop] 0/324, RunningAvgSamplesPerSec=11.957815514721151, CurrSamplesPerSec=11.913698695311629, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:01:27,478] [INFO] [timer.py:197:stop] 0/325, RunningAvgSamplesPerSec=11.95756150142011, CurrSamplesPerSec=11.876326608226695, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0062, 'learning_rate': 9.28689473531776e-06, 'epoch': 8.55} [2022-12-19 19:01:33,939] [INFO] [timer.py:197:stop] 0/326, RunningAvgSamplesPerSec=11.95768468917118, CurrSamplesPerSec=11.99760758899516, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:01:40,451] [INFO] [timer.py:197:stop] 0/327, RunningAvgSamplesPerSec=11.95717498461903, CurrSamplesPerSec=11.794287345232508, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:01:47,109] [INFO] [timer.py:197:stop] 0/328, RunningAvgSamplesPerSec=11.957030227941305, CurrSamplesPerSec=11.910169253362525, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:01:53,573] [INFO] [timer.py:197:stop] 0/329, RunningAvgSamplesPerSec=11.957083348227176, CurrSamplesPerSec=11.974425755140082, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:02:00,267] [INFO] [logging.py:68:log_dist] [Rank 0] step=330, skipped=4, lr=[9.311765584761373e-06], mom=[[0.9, 0.999]] [2022-12-19 19:02:00,268] [INFO] [timer.py:197:stop] 0/330, RunningAvgSamplesPerSec=11.95695695647057, CurrSamplesPerSec=11.915769654383563, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:02:06,908] [INFO] [timer.py:197:stop] 0/331, RunningAvgSamplesPerSec=11.95592407981561, CurrSamplesPerSec=11.626503471291613, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:02:13,765] [INFO] [timer.py:197:stop] 0/332, RunningAvgSamplesPerSec=11.955770330016128, CurrSamplesPerSec=11.90540040392296, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:02:20,322] [INFO] [timer.py:197:stop] 0/333, RunningAvgSamplesPerSec=11.955500645148247, CurrSamplesPerSec=11.867164202396488, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:02:27,160] [INFO] [timer.py:197:stop] 0/334, RunningAvgSamplesPerSec=11.9552736740695, CurrSamplesPerSec=11.880616811136067, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:02:34,043] [INFO] [timer.py:197:stop] 0/335, RunningAvgSamplesPerSec=11.952698494700247, CurrSamplesPerSec=11.154971118923546, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:02:40,802] [INFO] [timer.py:197:stop] 0/336, RunningAvgSamplesPerSec=11.95256033183708, CurrSamplesPerSec=11.906729043653556, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:02:47,362] [INFO] [timer.py:197:stop] 0/337, RunningAvgSamplesPerSec=11.952411104397754, CurrSamplesPerSec=11.90277673637311, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:02:54,241] [INFO] [timer.py:197:stop] 0/338, RunningAvgSamplesPerSec=11.952245235431137, CurrSamplesPerSec=11.89693702755643, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:03:00,666] [INFO] [timer.py:197:stop] 0/339, RunningAvgSamplesPerSec=11.952297693119737, CurrSamplesPerSec=11.969949584808198, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:03:07,655] [INFO] [logging.py:68:log_dist] [Rank 0] step=340, skipped=4, lr=[9.360382936198493e-06], mom=[[0.9, 0.999]] [2022-12-19 19:03:07,656] [INFO] [timer.py:197:stop] 0/340, RunningAvgSamplesPerSec=11.952046517578895, CurrSamplesPerSec=11.86799737506084, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:03:14,176] [INFO] [timer.py:197:stop] 0/341, RunningAvgSamplesPerSec=11.952098305201716, CurrSamplesPerSec=11.969628270846362, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:03:18,888] [INFO] [timer.py:197:stop] 0/342, RunningAvgSamplesPerSec=11.96189269685124, CurrSamplesPerSec=16.563141536427064, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:03:25,404] [INFO] [timer.py:197:stop] 0/343, RunningAvgSamplesPerSec=11.961780248007488, CurrSamplesPerSec=11.923669809142542, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:03:31,889] [INFO] [timer.py:197:stop] 0/344, RunningAvgSamplesPerSec=11.96179881331923, CurrSamplesPerSec=11.968132946789327, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:03:38,378] [INFO] [timer.py:197:stop] 0/345, RunningAvgSamplesPerSec=11.961887642140821, CurrSamplesPerSec=11.992344676649527, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:03:44,925] [INFO] [timer.py:197:stop] 0/346, RunningAvgSamplesPerSec=11.96147734624168, CurrSamplesPerSec=11.822387075662531, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:03:51,462] [INFO] [timer.py:197:stop] 0/347, RunningAvgSamplesPerSec=11.961137472988574, CurrSamplesPerSec=11.845356090828643, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:03:58,075] [INFO] [timer.py:197:stop] 0/348, RunningAvgSamplesPerSec=11.960436412503, CurrSamplesPerSec=11.723378268033263, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:04:04,729] [INFO] [timer.py:197:stop] 0/349, RunningAvgSamplesPerSec=11.96029100371295, CurrSamplesPerSec=11.910190919457103, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:04:11,186] [INFO] [logging.py:68:log_dist] [Rank 0] step=350, skipped=4, lr=[9.407574351377137e-06], mom=[[0.9, 0.999]] [2022-12-19 19:04:11,187] [INFO] [timer.py:197:stop] 0/350, RunningAvgSamplesPerSec=11.960093519523195, CurrSamplesPerSec=11.891958022502779, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0047, 'learning_rate': 9.407574351377137e-06, 'epoch': 9.21} [2022-12-19 19:04:17,928] [INFO] [timer.py:197:stop] 0/351, RunningAvgSamplesPerSec=11.959944877859641, CurrSamplesPerSec=11.908440975738559, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:04:24,730] [INFO] [timer.py:197:stop] 0/352, RunningAvgSamplesPerSec=11.95804721874999, CurrSamplesPerSec=11.330613472945423, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:04:31,607] [INFO] [timer.py:197:stop] 0/353, RunningAvgSamplesPerSec=11.957861895038432, CurrSamplesPerSec=11.893349531672605, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:04:38,061] [INFO] [timer.py:197:stop] 0/354, RunningAvgSamplesPerSec=11.957734430166152, CurrSamplesPerSec=11.913161505866146, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:04:44,866] [INFO] [timer.py:197:stop] 0/355, RunningAvgSamplesPerSec=11.957589078844109, CurrSamplesPerSec=11.906644014503605, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:04:51,414] [INFO] [timer.py:197:stop] 0/356, RunningAvgSamplesPerSec=11.95726131803857, CurrSamplesPerSec=11.842673655450627, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:04:58,367] [INFO] [timer.py:197:stop] 0/357, RunningAvgSamplesPerSec=11.956966978504596, CurrSamplesPerSec=11.853673453499752, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:05:04,799] [INFO] [timer.py:197:stop] 0/358, RunningAvgSamplesPerSec=11.95705627127964, CurrSamplesPerSec=11.988839703721833, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:05:11,528] [INFO] [timer.py:197:stop] 0/359, RunningAvgSamplesPerSec=11.956769756407205, CurrSamplesPerSec=11.855635627450615, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:05:17,977] [INFO] [logging.py:68:log_dist] [Rank 0] step=360, skipped=4, lr=[9.45342109721062e-06], mom=[[0.9, 0.999]] [2022-12-19 19:05:17,978] [INFO] [timer.py:197:stop] 0/360, RunningAvgSamplesPerSec=11.95675756161352, CurrSamplesPerSec=11.952405609283566, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:05:24,692] [INFO] [timer.py:197:stop] 0/361, RunningAvgSamplesPerSec=11.956812377241004, CurrSamplesPerSec=11.976468722769011, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:05:31,187] [INFO] [timer.py:197:stop] 0/362, RunningAvgSamplesPerSec=11.956668418638309, CurrSamplesPerSec=11.905210320737845, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:05:37,637] [INFO] [timer.py:197:stop] 0/363, RunningAvgSamplesPerSec=11.956682423437918, CurrSamplesPerSec=11.961726284030215, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:05:44,186] [INFO] [timer.py:197:stop] 0/364, RunningAvgSamplesPerSec=11.95656022455115, CurrSamplesPerSec=11.912609033792682, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:05:50,740] [INFO] [timer.py:197:stop] 0/365, RunningAvgSamplesPerSec=11.9561902869176, CurrSamplesPerSec=11.823760266956167, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:05:57,270] [INFO] [timer.py:197:stop] 0/366, RunningAvgSamplesPerSec=11.956040415843384, CurrSamplesPerSec=11.901884319565061, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:06:03,705] [INFO] [timer.py:197:stop] 0/367, RunningAvgSamplesPerSec=11.956143339080421, CurrSamplesPerSec=11.993725482907088, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:06:10,145] [INFO] [timer.py:197:stop] 0/368, RunningAvgSamplesPerSec=11.956283255814228, CurrSamplesPerSec=12.007572538927086, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:06:16,704] [INFO] [timer.py:197:stop] 0/369, RunningAvgSamplesPerSec=11.956092776604171, CurrSamplesPerSec=11.886782633986288, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:06:23,395] [INFO] [logging.py:68:log_dist] [Rank 0] step=370, skipped=4, lr=[9.497997685324628e-06], mom=[[0.9, 0.999]] [2022-12-19 19:06:23,396] [INFO] [timer.py:197:stop] 0/370, RunningAvgSamplesPerSec=11.955945369165262, CurrSamplesPerSec=11.902091183692848, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:06:29,867] [INFO] [timer.py:197:stop] 0/371, RunningAvgSamplesPerSec=11.955848869740944, CurrSamplesPerSec=11.920442532826955, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:06:36,663] [INFO] [timer.py:197:stop] 0/372, RunningAvgSamplesPerSec=11.95576061031763, CurrSamplesPerSec=11.92328159635384, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:06:43,170] [INFO] [timer.py:197:stop] 0/373, RunningAvgSamplesPerSec=11.955788598150932, CurrSamplesPerSec=11.966153097939433, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:06:49,783] [INFO] [timer.py:197:stop] 0/374, RunningAvgSamplesPerSec=11.955901664679635, CurrSamplesPerSec=11.997997439364816, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:06:56,240] [INFO] [timer.py:197:stop] 0/375, RunningAvgSamplesPerSec=11.955797558161814, CurrSamplesPerSec=11.917195311371742, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0042, 'learning_rate': 9.519831289296397e-06, 'epoch': 9.87} [2022-12-19 19:07:02,841] [INFO] [timer.py:197:stop] 0/376, RunningAvgSamplesPerSec=11.955506054871446, CurrSamplesPerSec=11.847757882882286, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:07:09,293] [INFO] [timer.py:197:stop] 0/377, RunningAvgSamplesPerSec=11.95562445595161, CurrSamplesPerSec=12.000071526000058, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:07:15,821] [INFO] [timer.py:197:stop] 0/378, RunningAvgSamplesPerSec=11.955092856675767, CurrSamplesPerSec=11.759021316061945, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:07:22,317] [INFO] [timer.py:197:stop] 0/379, RunningAvgSamplesPerSec=11.954902461156566, CurrSamplesPerSec=11.883741010455603, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:07:26,981] [INFO] [logging.py:68:log_dist] [Rank 0] step=380, skipped=4, lr=[9.541372600623587e-06], mom=[[0.9, 0.999]] [2022-12-19 19:07:26,982] [INFO] [timer.py:197:stop] 0/380, RunningAvgSamplesPerSec=11.963689065685433, CurrSamplesPerSec=16.549282728973843, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:07:33,625] [INFO] [timer.py:197:stop] 0/381, RunningAvgSamplesPerSec=11.963515822089455, CurrSamplesPerSec=11.898387188069284, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:07:40,071] [INFO] [timer.py:197:stop] 0/382, RunningAvgSamplesPerSec=11.963524869652602, CurrSamplesPerSec=11.966954881801362, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:07:46,910] [INFO] [timer.py:197:stop] 0/383, RunningAvgSamplesPerSec=11.96353714862309, CurrSamplesPerSec=11.968204982743584, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:07:53,397] [INFO] [timer.py:197:stop] 0/384, RunningAvgSamplesPerSec=11.96361537290439, CurrSamplesPerSec=11.993493450938637, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:07:59,897] [INFO] [timer.py:197:stop] 0/385, RunningAvgSamplesPerSec=11.96348999361119, CurrSamplesPerSec=11.91578658040611, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:08:06,462] [INFO] [timer.py:197:stop] 0/386, RunningAvgSamplesPerSec=11.963306585276023, CurrSamplesPerSec=11.893472312029314, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:08:13,071] [INFO] [timer.py:197:stop] 0/387, RunningAvgSamplesPerSec=11.962700255973227, CurrSamplesPerSec=11.73432623045743, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:08:19,557] [INFO] [timer.py:197:stop] 0/388, RunningAvgSamplesPerSec=11.96266029050814, CurrSamplesPerSec=11.947293403103313, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:08:26,069] [INFO] [timer.py:197:stop] 0/389, RunningAvgSamplesPerSec=11.962412303548426, CurrSamplesPerSec=11.867451182543181, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:08:32,472] [INFO] [logging.py:68:log_dist] [Rank 0] step=390, skipped=4, lr=[9.583608934209288e-06], mom=[[0.9, 0.999]] [2022-12-19 19:08:32,473] [INFO] [timer.py:197:stop] 0/390, RunningAvgSamplesPerSec=11.96248295309582, CurrSamplesPerSec=11.989887124584424, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:08:38,911] [INFO] [timer.py:197:stop] 0/391, RunningAvgSamplesPerSec=11.962487447405872, CurrSamplesPerSec=11.964231494594209, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:08:45,448] [INFO] [timer.py:197:stop] 0/392, RunningAvgSamplesPerSec=11.962310595796598, CurrSamplesPerSec=11.893909704715535, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:08:51,951] [INFO] [timer.py:197:stop] 0/393, RunningAvgSamplesPerSec=11.962018215900912, CurrSamplesPerSec=11.849069503005135, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:08:58,516] [INFO] [timer.py:197:stop] 0/394, RunningAvgSamplesPerSec=11.961865640501298, CurrSamplesPerSec=11.902505461282438, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:09:05,046] [INFO] [timer.py:197:stop] 0/395, RunningAvgSamplesPerSec=11.961759973224343, CurrSamplesPerSec=11.920481705032536, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:09:11,734] [INFO] [timer.py:197:stop] 0/396, RunningAvgSamplesPerSec=11.961591799465188, CurrSamplesPerSec=11.895863609159935, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:09:18,253] [INFO] [timer.py:197:stop] 0/397, RunningAvgSamplesPerSec=11.961578446223642, CurrSamplesPerSec=11.956319587980142, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:09:25,110] [INFO] [timer.py:197:stop] 0/398, RunningAvgSamplesPerSec=11.961438450551832, CurrSamplesPerSec=11.906395271603161, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:09:31,550] [INFO] [timer.py:197:stop] 0/399, RunningAvgSamplesPerSec=11.961280801547348, CurrSamplesPerSec=11.899176751335, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:09:38,129] [INFO] [logging.py:68:log_dist] [Rank 0] step=400, skipped=4, lr=[9.624764935335318e-06], mom=[[0.9, 0.999]] [2022-12-19 19:09:38,130] [INFO] [timer.py:197:stop] 0/400, RunningAvgSamplesPerSec=11.961067135437117, CurrSamplesPerSec=11.87684051226843, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0048, 'learning_rate': 9.624764935335318e-06, 'epoch': 10.53} [2022-12-19 19:09:44,555] [INFO] [timer.py:197:stop] 0/401, RunningAvgSamplesPerSec=11.961098071202871, CurrSamplesPerSec=11.97342322503396, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:09:51,076] [INFO] [timer.py:197:stop] 0/402, RunningAvgSamplesPerSec=11.96077417537623, CurrSamplesPerSec=11.8329245988732, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:09:57,490] [INFO] [timer.py:197:stop] 0/403, RunningAvgSamplesPerSec=11.960802453686547, CurrSamplesPerSec=11.972124511845799, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:10:03,987] [INFO] [timer.py:197:stop] 0/404, RunningAvgSamplesPerSec=11.96073005384135, CurrSamplesPerSec=11.931768190498323, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:10:10,534] [INFO] [timer.py:197:stop] 0/405, RunningAvgSamplesPerSec=11.960600365426814, CurrSamplesPerSec=11.908692445669109, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:10:17,109] [INFO] [timer.py:197:stop] 0/406, RunningAvgSamplesPerSec=11.96039876029092, CurrSamplesPerSec=11.87970142694694, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:10:23,826] [INFO] [timer.py:197:stop] 0/407, RunningAvgSamplesPerSec=11.960286379575, CurrSamplesPerSec=11.915056689358508, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:10:30,565] [INFO] [timer.py:197:stop] 0/408, RunningAvgSamplesPerSec=11.958714014744611, CurrSamplesPerSec=11.354177622628272, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:10:37,154] [INFO] [timer.py:197:stop] 0/409, RunningAvgSamplesPerSec=11.958522461852873, CurrSamplesPerSec=11.881255718260933, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:10:43,692] [INFO] [logging.py:68:log_dist] [Rank 0] step=410, skipped=4, lr=[9.664894494516345e-06], mom=[[0.9, 0.999]] [2022-12-19 19:10:43,693] [INFO] [timer.py:197:stop] 0/410, RunningAvgSamplesPerSec=11.95830970299223, CurrSamplesPerSec=11.872340896041663, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:10:50,188] [INFO] [timer.py:197:stop] 0/411, RunningAvgSamplesPerSec=11.958350637485843, CurrSamplesPerSec=11.975075326167289, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:10:56,765] [INFO] [timer.py:197:stop] 0/412, RunningAvgSamplesPerSec=11.95823281659097, CurrSamplesPerSec=11.910237951006847, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:11:03,490] [INFO] [timer.py:197:stop] 0/413, RunningAvgSamplesPerSec=11.956746920884248, CurrSamplesPerSec=11.377134006335005, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:11:10,019] [INFO] [timer.py:197:stop] 0/414, RunningAvgSamplesPerSec=11.956655669909175, CurrSamplesPerSec=11.919269074106126, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:11:16,449] [INFO] [timer.py:197:stop] 0/415, RunningAvgSamplesPerSec=11.956655685292697, CurrSamplesPerSec=11.956662023307778, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:11:22,954] [INFO] [timer.py:197:stop] 0/416, RunningAvgSamplesPerSec=11.956780517069827, CurrSamplesPerSec=12.00855984492215, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:11:29,464] [INFO] [timer.py:197:stop] 0/417, RunningAvgSamplesPerSec=11.956549843725488, CurrSamplesPerSec=11.86180961146304, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:11:34,064] [INFO] [timer.py:197:stop] 0/418, RunningAvgSamplesPerSec=11.964625106550153, CurrSamplesPerSec=16.62410075896584, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:11:40,480] [INFO] [timer.py:197:stop] 0/419, RunningAvgSamplesPerSec=11.964686513632762, CurrSamplesPerSec=11.990286649261607, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:11:47,155] [INFO] [logging.py:68:log_dist] [Rank 0] step=420, skipped=4, lr=[9.704047567846437e-06], mom=[[0.9, 0.999]] [2022-12-19 19:11:47,156] [INFO] [timer.py:197:stop] 0/420, RunningAvgSamplesPerSec=11.964784959543438, CurrSamplesPerSec=12.005978581193482, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:11:53,887] [INFO] [timer.py:197:stop] 0/421, RunningAvgSamplesPerSec=11.963580703242256, CurrSamplesPerSec=11.480573193271733, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:12:00,749] [INFO] [timer.py:197:stop] 0/422, RunningAvgSamplesPerSec=11.963679846406063, CurrSamplesPerSec=12.005365922314523, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:12:07,207] [INFO] [timer.py:197:stop] 0/423, RunningAvgSamplesPerSec=11.963662142630655, CurrSamplesPerSec=11.956231186399881, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:12:14,197] [INFO] [timer.py:197:stop] 0/424, RunningAvgSamplesPerSec=11.963453143781376, CurrSamplesPerSec=11.876108554000023, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:12:20,780] [INFO] [timer.py:197:stop] 0/425, RunningAvgSamplesPerSec=11.962856104514723, CurrSamplesPerSec=11.716114491124491, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0047, 'learning_rate': 9.723272550712454e-06, 'epoch': 11.18} [2022-12-19 19:12:27,491] [INFO] [timer.py:197:stop] 0/426, RunningAvgSamplesPerSec=11.9627655738164, CurrSamplesPerSec=11.92459357121858, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:12:33,946] [INFO] [timer.py:197:stop] 0/427, RunningAvgSamplesPerSec=11.962573738945366, CurrSamplesPerSec=11.881786352622898, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:12:40,448] [INFO] [timer.py:197:stop] 0/428, RunningAvgSamplesPerSec=11.962651896566687, CurrSamplesPerSec=11.99596159511273, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:12:46,950] [INFO] [timer.py:197:stop] 0/429, RunningAvgSamplesPerSec=11.962543178028197, CurrSamplesPerSec=11.916408116097363, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:12:53,427] [INFO] [logging.py:68:log_dist] [Rank 0] step=430, skipped=4, lr=[9.742270550908135e-06], mom=[[0.9, 0.999]] [2022-12-19 19:12:53,427] [INFO] [timer.py:197:stop] 0/430, RunningAvgSamplesPerSec=11.962451764284422, CurrSamplesPerSec=11.923545345263417, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:12:59,896] [INFO] [timer.py:197:stop] 0/431, RunningAvgSamplesPerSec=11.962560863599244, CurrSamplesPerSec=12.00943878053189, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:13:06,411] [INFO] [timer.py:197:stop] 0/432, RunningAvgSamplesPerSec=11.962281997422737, CurrSamplesPerSec=11.843835738165968, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:13:12,890] [INFO] [timer.py:197:stop] 0/433, RunningAvgSamplesPerSec=11.96228584457164, CurrSamplesPerSec=11.963940347935317, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:13:19,644] [INFO] [timer.py:197:stop] 0/434, RunningAvgSamplesPerSec=11.960699638292724, CurrSamplesPerSec=11.314089698343578, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:13:26,112] [INFO] [timer.py:197:stop] 0/435, RunningAvgSamplesPerSec=11.960734196921816, CurrSamplesPerSec=11.97568222593352, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:13:32,572] [INFO] [timer.py:197:stop] 0/436, RunningAvgSamplesPerSec=11.960840034207777, CurrSamplesPerSec=12.006844249664644, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:13:39,117] [INFO] [timer.py:197:stop] 0/437, RunningAvgSamplesPerSec=11.960979997113505, CurrSamplesPerSec=12.022034679168051, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:13:45,582] [INFO] [timer.py:197:stop] 0/438, RunningAvgSamplesPerSec=11.961056155926537, CurrSamplesPerSec=11.99427746570264, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:13:52,036] [INFO] [timer.py:197:stop] 0/439, RunningAvgSamplesPerSec=11.961136989532214, CurrSamplesPerSec=11.996484832797169, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:13:58,526] [INFO] [logging.py:68:log_dist] [Rank 0] step=440, skipped=4, lr=[9.779606609292176e-06], mom=[[0.9, 0.999]] [2022-12-19 19:13:58,527] [INFO] [timer.py:197:stop] 0/440, RunningAvgSamplesPerSec=11.961036552526675, CurrSamplesPerSec=11.917306415853819, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:14:05,155] [INFO] [timer.py:197:stop] 0/441, RunningAvgSamplesPerSec=11.960953853945096, CurrSamplesPerSec=11.924841485859334, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:14:11,831] [INFO] [timer.py:197:stop] 0/442, RunningAvgSamplesPerSec=11.961060435577396, CurrSamplesPerSec=12.008033941923953, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:14:18,344] [INFO] [timer.py:197:stop] 0/443, RunningAvgSamplesPerSec=11.961049702856819, CurrSamplesPerSec=11.956329173772223, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:14:24,855] [INFO] [timer.py:197:stop] 0/444, RunningAvgSamplesPerSec=11.960906092686594, CurrSamplesPerSec=11.897908332342341, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:14:31,346] [INFO] [timer.py:197:stop] 0/445, RunningAvgSamplesPerSec=11.960658339820073, CurrSamplesPerSec=11.852147302025633, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:14:38,050] [INFO] [timer.py:197:stop] 0/446, RunningAvgSamplesPerSec=11.960672121279263, CurrSamplesPerSec=11.966780432654314, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:14:44,600] [INFO] [timer.py:197:stop] 0/447, RunningAvgSamplesPerSec=11.96078237187159, CurrSamplesPerSec=12.009935253344844, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:14:51,124] [INFO] [timer.py:197:stop] 0/448, RunningAvgSamplesPerSec=11.960669747874908, CurrSamplesPerSec=11.910761664482132, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:14:57,689] [INFO] [timer.py:197:stop] 0/449, RunningAvgSamplesPerSec=11.960243530588247, CurrSamplesPerSec=11.773131201979238, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:15:04,175] [INFO] [logging.py:68:log_dist] [Rank 0] step=450, skipped=4, lr=[9.816095971633122e-06], mom=[[0.9, 0.999]] [2022-12-19 19:15:04,175] [INFO] [timer.py:197:stop] 0/450, RunningAvgSamplesPerSec=11.960103229206762, CurrSamplesPerSec=11.897716378974831, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0048, 'learning_rate': 9.816095971633122e-06, 'epoch': 11.84} [2022-12-19 19:15:10,657] [INFO] [timer.py:197:stop] 0/451, RunningAvgSamplesPerSec=11.960000696720844, CurrSamplesPerSec=11.914242278824265, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:15:17,119] [INFO] [timer.py:197:stop] 0/452, RunningAvgSamplesPerSec=11.959940219437032, CurrSamplesPerSec=11.932847568183282, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:15:23,587] [INFO] [timer.py:197:stop] 0/453, RunningAvgSamplesPerSec=11.960066198484084, CurrSamplesPerSec=12.017027364896247, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:15:30,315] [INFO] [timer.py:197:stop] 0/454, RunningAvgSamplesPerSec=11.959986501887036, CurrSamplesPerSec=11.924151270485098, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:15:36,723] [INFO] [timer.py:197:stop] 0/455, RunningAvgSamplesPerSec=11.960105717157514, CurrSamplesPerSec=12.014235436095843, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:15:41,374] [INFO] [timer.py:197:stop] 0/456, RunningAvgSamplesPerSec=11.967390180794478, CurrSamplesPerSec=16.527393090849067, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:15:47,861] [INFO] [timer.py:197:stop] 0/457, RunningAvgSamplesPerSec=11.967289505176039, CurrSamplesPerSec=11.921757059754666, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:15:54,337] [INFO] [timer.py:197:stop] 0/458, RunningAvgSamplesPerSec=11.967092049162074, CurrSamplesPerSec=11.877920486233057, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:16:00,838] [INFO] [timer.py:197:stop] 0/459, RunningAvgSamplesPerSec=11.966991307584832, CurrSamplesPerSec=11.921229202389917, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:16:07,298] [INFO] [logging.py:68:log_dist] [Rank 0] step=460, skipped=4, lr=[9.851776190149156e-06], mom=[[0.9, 0.999]] [2022-12-19 19:16:07,299] [INFO] [timer.py:197:stop] 0/460, RunningAvgSamplesPerSec=11.966799772932442, CurrSamplesPerSec=11.87990541764102, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:16:13,782] [INFO] [timer.py:197:stop] 0/461, RunningAvgSamplesPerSec=11.966896572288283, CurrSamplesPerSec=12.011395895133065, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:16:20,264] [INFO] [timer.py:197:stop] 0/462, RunningAvgSamplesPerSec=11.96680418513396, CurrSamplesPerSec=11.92454854509294, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:16:26,755] [INFO] [timer.py:197:stop] 0/463, RunningAvgSamplesPerSec=11.966607455898583, CurrSamplesPerSec=11.876792693144612, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:16:33,193] [INFO] [timer.py:197:stop] 0/464, RunningAvgSamplesPerSec=11.966614079413304, CurrSamplesPerSec=11.96966830071487, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:16:39,837] [INFO] [timer.py:197:stop] 0/465, RunningAvgSamplesPerSec=11.96663256488401, CurrSamplesPerSec=11.975178964905716, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:16:46,434] [INFO] [timer.py:197:stop] 0/466, RunningAvgSamplesPerSec=11.96647316075237, CurrSamplesPerSec=11.893122421041914, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:16:52,958] [INFO] [timer.py:197:stop] 0/467, RunningAvgSamplesPerSec=11.966300651739248, CurrSamplesPerSec=11.886789476762956, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:16:59,404] [INFO] [timer.py:197:stop] 0/468, RunningAvgSamplesPerSec=11.966318566858343, CurrSamplesPerSec=11.974654913188974, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:17:05,941] [INFO] [timer.py:197:stop] 0/469, RunningAvgSamplesPerSec=11.96599109239272, CurrSamplesPerSec=11.815313713905496, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:17:12,443] [INFO] [logging.py:68:log_dist] [Rank 0] step=470, skipped=4, lr=[9.886682372916766e-06], mom=[[0.9, 0.999]] [2022-12-19 19:17:12,444] [INFO] [timer.py:197:stop] 0/470, RunningAvgSamplesPerSec=11.965779796564986, CurrSamplesPerSec=11.867913422893002, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:17:19,128] [INFO] [timer.py:197:stop] 0/471, RunningAvgSamplesPerSec=11.965573350892216, CurrSamplesPerSec=11.869732302583541, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:17:25,633] [INFO] [timer.py:197:stop] 0/472, RunningAvgSamplesPerSec=11.965648345094552, CurrSamplesPerSec=12.00092453926605, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:17:32,044] [INFO] [timer.py:197:stop] 0/473, RunningAvgSamplesPerSec=11.965736066432036, CurrSamplesPerSec=12.007107948921425, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:17:38,523] [INFO] [timer.py:197:stop] 0/474, RunningAvgSamplesPerSec=11.965855329328619, CurrSamplesPerSec=12.022293661720143, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:17:45,006] [INFO] [timer.py:197:stop] 0/475, RunningAvgSamplesPerSec=11.965804922445507, CurrSamplesPerSec=11.94206018617277, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0037, 'learning_rate': 9.90385555539545e-06, 'epoch': 12.5} [2022-12-19 19:17:51,531] [INFO] [timer.py:197:stop] 0/476, RunningAvgSamplesPerSec=11.965675277428401, CurrSamplesPerSec=11.904666505150017, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:17:58,087] [INFO] [timer.py:197:stop] 0/477, RunningAvgSamplesPerSec=11.965494902945602, CurrSamplesPerSec=11.88060524308075, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:18:04,553] [INFO] [timer.py:197:stop] 0/478, RunningAvgSamplesPerSec=11.965566826450265, CurrSamplesPerSec=11.999828519887469, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:18:11,010] [INFO] [timer.py:197:stop] 0/479, RunningAvgSamplesPerSec=11.965573289633536, CurrSamplesPerSec=11.968650557730545, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:18:17,702] [INFO] [logging.py:68:log_dist] [Rank 0] step=480, skipped=4, lr=[9.92084739148192e-06], mom=[[0.9, 0.999]] [2022-12-19 19:18:17,703] [INFO] [timer.py:197:stop] 0/480, RunningAvgSamplesPerSec=11.965460057094816, CurrSamplesPerSec=11.911691356125353, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:18:24,201] [INFO] [timer.py:197:stop] 0/481, RunningAvgSamplesPerSec=11.96552406855967, CurrSamplesPerSec=11.996200155813812, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:18:30,721] [INFO] [timer.py:197:stop] 0/482, RunningAvgSamplesPerSec=11.965187447997225, CurrSamplesPerSec=11.806094588280331, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:18:37,309] [INFO] [timer.py:197:stop] 0/483, RunningAvgSamplesPerSec=11.965072049912216, CurrSamplesPerSec=11.909936744138914, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:18:43,797] [INFO] [timer.py:197:stop] 0/484, RunningAvgSamplesPerSec=11.964938508978381, CurrSamplesPerSec=11.901049020481066, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:18:50,333] [INFO] [timer.py:197:stop] 0/485, RunningAvgSamplesPerSec=11.964813801717309, CurrSamplesPerSec=11.905005988083438, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:18:56,880] [INFO] [timer.py:197:stop] 0/486, RunningAvgSamplesPerSec=11.964642634590682, CurrSamplesPerSec=11.882537420622386, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:19:03,454] [INFO] [timer.py:197:stop] 0/487, RunningAvgSamplesPerSec=11.964751683093262, CurrSamplesPerSec=12.017765498552977, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:19:10,172] [INFO] [timer.py:197:stop] 0/488, RunningAvgSamplesPerSec=11.964681054332619, CurrSamplesPerSec=11.930524098608846, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:19:16,891] [INFO] [timer.py:197:stop] 0/489, RunningAvgSamplesPerSec=11.964601924421471, CurrSamplesPerSec=11.926268254854502, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:19:23,383] [INFO] [logging.py:68:log_dist] [Rank 0] step=490, skipped=4, lr=[9.954302066885107e-06], mom=[[0.9, 0.999]] [2022-12-19 19:19:23,384] [INFO] [timer.py:197:stop] 0/490, RunningAvgSamplesPerSec=11.964479521534537, CurrSamplesPerSec=11.905165440871563, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:19:29,863] [INFO] [timer.py:197:stop] 0/491, RunningAvgSamplesPerSec=11.964440521888939, CurrSamplesPerSec=11.945438982511188, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:19:36,326] [INFO] [timer.py:197:stop] 0/492, RunningAvgSamplesPerSec=11.964520793316316, CurrSamplesPerSec=12.003902989077892, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:19:42,994] [INFO] [timer.py:197:stop] 0/493, RunningAvgSamplesPerSec=11.964693088494137, CurrSamplesPerSec=12.04971890471031, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:19:47,636] [INFO] [timer.py:197:stop] 0/494, RunningAvgSamplesPerSec=11.97149869912848, CurrSamplesPerSec=16.610577646885343, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:19:54,118] [INFO] [timer.py:197:stop] 0/495, RunningAvgSamplesPerSec=11.971497733240914, CurrSamplesPerSec=11.971022535461254, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:20:00,784] [INFO] [timer.py:197:stop] 0/496, RunningAvgSamplesPerSec=11.971336688771379, CurrSamplesPerSec=11.892465904054477, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:20:07,363] [INFO] [timer.py:197:stop] 0/497, RunningAvgSamplesPerSec=11.971197147138936, CurrSamplesPerSec=11.902659041575854, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:20:14,024] [INFO] [timer.py:197:stop] 0/498, RunningAvgSamplesPerSec=11.97120584935865, CurrSamplesPerSec=11.975515001812596, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:20:20,568] [INFO] [timer.py:197:stop] 0/499, RunningAvgSamplesPerSec=11.971001559434184, CurrSamplesPerSec=11.870525941213016, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:20:27,050] [INFO] [logging.py:68:log_dist] [Rank 0] step=500, skipped=4, lr=[9.987075336738768e-06], mom=[[0.9, 0.999]] [2022-12-19 19:20:27,050] [INFO] [timer.py:197:stop] 0/500, RunningAvgSamplesPerSec=11.970882028414971, CurrSamplesPerSec=11.911769057222811, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0038, 'learning_rate': 9.987075336738768e-06, 'epoch': 13.16} [2022-12-19 19:20:33,554] [INFO] [timer.py:197:stop] 0/501, RunningAvgSamplesPerSec=11.970903542802073, CurrSamplesPerSec=11.98162732482164, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:20:40,103] [INFO] [timer.py:197:stop] 0/502, RunningAvgSamplesPerSec=11.970767152561097, CurrSamplesPerSec=11.903093943358158, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:20:46,815] [INFO] [timer.py:197:stop] 0/503, RunningAvgSamplesPerSec=11.97060946488682, CurrSamplesPerSec=11.892282555918243, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:20:53,323] [INFO] [timer.py:197:stop] 0/504, RunningAvgSamplesPerSec=11.970491524090875, CurrSamplesPerSec=11.911693999003171, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:20:59,789] [INFO] [timer.py:197:stop] 0/505, RunningAvgSamplesPerSec=11.970489548814147, CurrSamplesPerSec=11.969498042193239, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:21:06,331] [INFO] [timer.py:197:stop] 0/506, RunningAvgSamplesPerSec=11.970397710866356, CurrSamplesPerSec=11.924381156831144, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:21:12,818] [INFO] [timer.py:197:stop] 0/507, RunningAvgSamplesPerSec=11.970445463779823, CurrSamplesPerSec=11.994561515483362, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:21:19,429] [INFO] [timer.py:197:stop] 0/508, RunningAvgSamplesPerSec=11.97046189142773, CurrSamplesPerSec=11.978763618410035, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:21:25,999] [INFO] [timer.py:197:stop] 0/509, RunningAvgSamplesPerSec=11.970345202381258, CurrSamplesPerSec=11.91159092763728, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:21:32,449] [INFO] [logging.py:68:log_dist] [Rank 0] step=510, skipped=4, lr=[9.98888888888889e-06], mom=[[0.9, 0.999]] [2022-12-19 19:21:32,450] [INFO] [timer.py:197:stop] 0/510, RunningAvgSamplesPerSec=11.97041264973506, CurrSamplesPerSec=12.004706618723947, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:21:38,913] [INFO] [timer.py:197:stop] 0/511, RunningAvgSamplesPerSec=11.970248192883536, CurrSamplesPerSec=11.887284283033926, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:21:45,554] [INFO] [timer.py:197:stop] 0/512, RunningAvgSamplesPerSec=11.970127696373872, CurrSamplesPerSec=11.909108239628411, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:21:52,021] [INFO] [timer.py:197:stop] 0/513, RunningAvgSamplesPerSec=11.96994284545872, CurrSamplesPerSec=11.876407001323265, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:21:58,446] [INFO] [timer.py:197:stop] 0/514, RunningAvgSamplesPerSec=11.97000660132427, CurrSamplesPerSec=12.002674937343045, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:22:05,170] [INFO] [timer.py:197:stop] 0/515, RunningAvgSamplesPerSec=11.969899246291638, CurrSamplesPerSec=11.91518520716523, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:22:11,627] [INFO] [timer.py:197:stop] 0/516, RunningAvgSamplesPerSec=11.969935225269287, CurrSamplesPerSec=11.988421000778564, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:22:18,089] [INFO] [timer.py:197:stop] 0/517, RunningAvgSamplesPerSec=11.969880020421801, CurrSamplesPerSec=11.941571965248388, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:22:24,567] [INFO] [timer.py:197:stop] 0/518, RunningAvgSamplesPerSec=11.969677927576155, CurrSamplesPerSec=11.866499007461336, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:22:31,240] [INFO] [timer.py:197:stop] 0/519, RunningAvgSamplesPerSec=11.969563267659181, CurrSamplesPerSec=11.910690318362558, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:22:37,713] [INFO] [logging.py:68:log_dist] [Rank 0] step=520, skipped=4, lr=[9.966666666666667e-06], mom=[[0.9, 0.999]] [2022-12-19 19:22:37,713] [INFO] [timer.py:197:stop] 0/520, RunningAvgSamplesPerSec=11.969394647037719, CurrSamplesPerSec=11.882849341609326, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:22:44,244] [INFO] [timer.py:197:stop] 0/521, RunningAvgSamplesPerSec=11.969365331913723, CurrSamplesPerSec=11.954199375497849, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:22:50,651] [INFO] [timer.py:197:stop] 0/522, RunningAvgSamplesPerSec=11.969211039043975, CurrSamplesPerSec=11.889666246139303, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:22:57,087] [INFO] [timer.py:197:stop] 0/523, RunningAvgSamplesPerSec=11.96923942085822, CurrSamplesPerSec=11.984016219678896, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:23:03,646] [INFO] [timer.py:197:stop] 0/524, RunningAvgSamplesPerSec=11.969128333123505, CurrSamplesPerSec=11.911530671357285, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:23:10,116] [INFO] [timer.py:197:stop] 0/525, RunningAvgSamplesPerSec=11.969007049771843, CurrSamplesPerSec=11.906030889940311, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0034, 'learning_rate': 9.955555555555556e-06, 'epoch': 13.82} [2022-12-19 19:23:16,605] [INFO] [timer.py:197:stop] 0/526, RunningAvgSamplesPerSec=11.968961223189154, CurrSamplesPerSec=11.94504190936655, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:23:23,111] [INFO] [timer.py:197:stop] 0/527, RunningAvgSamplesPerSec=11.96878588899259, CurrSamplesPerSec=11.877611977281537, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:23:29,721] [INFO] [timer.py:197:stop] 0/528, RunningAvgSamplesPerSec=11.968603648091593, CurrSamplesPerSec=11.873687376673056, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:23:36,296] [INFO] [timer.py:197:stop] 0/529, RunningAvgSamplesPerSec=11.96850037213131, CurrSamplesPerSec=11.914423131806448, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:23:42,964] [INFO] [logging.py:68:log_dist] [Rank 0] step=530, skipped=4, lr=[9.944444444444445e-06], mom=[[0.9, 0.999]] [2022-12-19 19:23:42,965] [INFO] [timer.py:197:stop] 0/530, RunningAvgSamplesPerSec=11.968289035892484, CurrSamplesPerSec=11.857943635912738, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:23:49,644] [INFO] [timer.py:197:stop] 0/531, RunningAvgSamplesPerSec=11.9681654720526, CurrSamplesPerSec=11.9032781528446, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:23:54,355] [INFO] [timer.py:197:stop] 0/532, RunningAvgSamplesPerSec=11.974241365725362, CurrSamplesPerSec=16.370734748869264, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:24:00,845] [INFO] [timer.py:197:stop] 0/533, RunningAvgSamplesPerSec=11.974096085288423, CurrSamplesPerSec=11.89759034702779, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:24:07,308] [INFO] [timer.py:197:stop] 0/534, RunningAvgSamplesPerSec=11.974109244933327, CurrSamplesPerSec=11.981101104325049, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:24:13,817] [INFO] [timer.py:197:stop] 0/535, RunningAvgSamplesPerSec=11.973972548985593, CurrSamplesPerSec=11.901690127131745, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:24:20,452] [INFO] [timer.py:197:stop] 0/536, RunningAvgSamplesPerSec=11.973906687366869, CurrSamplesPerSec=11.93890525181514, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:24:26,964] [INFO] [timer.py:197:stop] 0/537, RunningAvgSamplesPerSec=11.973718586057204, CurrSamplesPerSec=11.874109658931614, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:24:33,494] [INFO] [timer.py:197:stop] 0/538, RunningAvgSamplesPerSec=11.973388733887482, CurrSamplesPerSec=11.799485700046317, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:24:39,996] [INFO] [timer.py:197:stop] 0/539, RunningAvgSamplesPerSec=11.97315662249098, CurrSamplesPerSec=11.85002672967011, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:24:46,538] [INFO] [logging.py:68:log_dist] [Rank 0] step=540, skipped=4, lr=[9.922222222222222e-06], mom=[[0.9, 0.999]] [2022-12-19 19:24:46,539] [INFO] [timer.py:197:stop] 0/540, RunningAvgSamplesPerSec=11.972922414016997, CurrSamplesPerSec=11.848462293475453, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:24:52,984] [INFO] [timer.py:197:stop] 0/541, RunningAvgSamplesPerSec=11.972941329547208, CurrSamplesPerSec=11.983126557957817, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:24:59,544] [INFO] [timer.py:197:stop] 0/542, RunningAvgSamplesPerSec=11.972901582717197, CurrSamplesPerSec=11.951516377647954, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:25:06,046] [INFO] [timer.py:197:stop] 0/543, RunningAvgSamplesPerSec=11.972886270601725, CurrSamplesPerSec=11.964623445166541, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:25:12,488] [INFO] [timer.py:197:stop] 0/544, RunningAvgSamplesPerSec=11.972836592575407, CurrSamplesPerSec=11.94602108523544, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:25:19,134] [INFO] [timer.py:197:stop] 0/545, RunningAvgSamplesPerSec=11.97287271643062, CurrSamplesPerSec=11.992483975238882, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:25:25,818] [INFO] [timer.py:197:stop] 0/546, RunningAvgSamplesPerSec=11.97293932472891, CurrSamplesPerSec=12.009217422768652, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:25:32,301] [INFO] [timer.py:197:stop] 0/547, RunningAvgSamplesPerSec=11.97274218755734, CurrSamplesPerSec=11.866453369848655, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:25:38,720] [INFO] [timer.py:197:stop] 0/548, RunningAvgSamplesPerSec=11.972600852552905, CurrSamplesPerSec=11.89606657372061, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:25:45,190] [INFO] [timer.py:197:stop] 0/549, RunningAvgSamplesPerSec=11.97249143431459, CurrSamplesPerSec=11.913046249088778, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:25:51,700] [INFO] [logging.py:68:log_dist] [Rank 0] step=550, skipped=4, lr=[9.9e-06], mom=[[0.9, 0.999]] [2022-12-19 19:25:51,701] [INFO] [timer.py:197:stop] 0/550, RunningAvgSamplesPerSec=11.972313631431716, CurrSamplesPerSec=11.875840594711676, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0037, 'learning_rate': 9.9e-06, 'epoch': 14.47} [2022-12-19 19:25:58,203] [INFO] [timer.py:197:stop] 0/551, RunningAvgSamplesPerSec=11.97229690899298, CurrSamplesPerSec=11.963140034258014, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:26:04,842] [INFO] [timer.py:197:stop] 0/552, RunningAvgSamplesPerSec=11.972317530025434, CurrSamplesPerSec=11.983649211525123, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:26:11,220] [INFO] [timer.py:197:stop] 0/553, RunningAvgSamplesPerSec=11.97233080516178, CurrSamplesPerSec=11.979636593687708, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:26:17,697] [INFO] [timer.py:197:stop] 0/554, RunningAvgSamplesPerSec=11.972341337218756, CurrSamplesPerSec=11.978147319962417, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:26:24,210] [INFO] [timer.py:197:stop] 0/555, RunningAvgSamplesPerSec=11.972180623461126, CurrSamplesPerSec=11.88412033907062, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:26:30,936] [INFO] [timer.py:197:stop] 0/556, RunningAvgSamplesPerSec=11.97197065456156, CurrSamplesPerSec=11.856975178951966, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:26:37,428] [INFO] [timer.py:197:stop] 0/557, RunningAvgSamplesPerSec=11.971821629939019, CurrSamplesPerSec=11.889828448096438, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:26:43,966] [INFO] [timer.py:197:stop] 0/558, RunningAvgSamplesPerSec=11.971596975681212, CurrSamplesPerSec=11.84820133279073, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:26:50,660] [INFO] [timer.py:197:stop] 0/559, RunningAvgSamplesPerSec=11.971477153010794, CurrSamplesPerSec=11.905225104767887, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:26:57,411] [INFO] [logging.py:68:log_dist] [Rank 0] step=560, skipped=4, lr=[9.877777777777778e-06], mom=[[0.9, 0.999]] [2022-12-19 19:26:57,412] [INFO] [timer.py:197:stop] 0/560, RunningAvgSamplesPerSec=11.971211700768691, CurrSamplesPerSec=11.825161907109676, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:27:03,933] [INFO] [timer.py:197:stop] 0/561, RunningAvgSamplesPerSec=11.970985773174483, CurrSamplesPerSec=11.84623429974481, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:27:10,421] [INFO] [timer.py:197:stop] 0/562, RunningAvgSamplesPerSec=11.971007464587084, CurrSamplesPerSec=11.98314528068618, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:27:16,962] [INFO] [timer.py:197:stop] 0/563, RunningAvgSamplesPerSec=11.970878612588608, CurrSamplesPerSec=11.899154597830668, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:27:23,465] [INFO] [timer.py:197:stop] 0/564, RunningAvgSamplesPerSec=11.970581890232928, CurrSamplesPerSec=11.806407701546343, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:27:30,209] [INFO] [timer.py:197:stop] 0/565, RunningAvgSamplesPerSec=11.970576624558733, CurrSamplesPerSec=11.967618048368738, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:27:36,762] [INFO] [timer.py:197:stop] 0/566, RunningAvgSamplesPerSec=11.970390571654809, CurrSamplesPerSec=11.866553038652004, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:27:43,318] [INFO] [timer.py:197:stop] 0/567, RunningAvgSamplesPerSec=11.97020571971344, CurrSamplesPerSec=11.866851005901077, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:27:49,769] [INFO] [timer.py:197:stop] 0/568, RunningAvgSamplesPerSec=11.970223645870083, CurrSamplesPerSec=11.980360516590608, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:27:56,326] [INFO] [timer.py:197:stop] 0/569, RunningAvgSamplesPerSec=11.970059366843243, CurrSamplesPerSec=11.87779539890028, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:28:01,013] [INFO] [logging.py:68:log_dist] [Rank 0] step=570, skipped=4, lr=[9.855555555555555e-06], mom=[[0.9, 0.999]] [2022-12-19 19:28:01,013] [INFO] [timer.py:197:stop] 0/570, RunningAvgSamplesPerSec=11.975816137271956, CurrSamplesPerSec=16.465852896423467, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:28:07,546] [INFO] [timer.py:197:stop] 0/571, RunningAvgSamplesPerSec=11.975599087928371, CurrSamplesPerSec=11.853573477502735, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:28:14,093] [INFO] [timer.py:197:stop] 0/572, RunningAvgSamplesPerSec=11.975379716876418, CurrSamplesPerSec=11.851847456317538, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:28:20,584] [INFO] [timer.py:197:stop] 0/573, RunningAvgSamplesPerSec=11.975246990556267, CurrSamplesPerSec=11.900068762140933, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:28:27,009] [INFO] [timer.py:197:stop] 0/574, RunningAvgSamplesPerSec=11.975135743276645, CurrSamplesPerSec=11.911949306529012, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:28:33,507] [INFO] [timer.py:197:stop] 0/575, RunningAvgSamplesPerSec=11.975145618361788, CurrSamplesPerSec=11.980796837344585, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0039, 'learning_rate': 9.844444444444446e-06, 'epoch': 15.13} [2022-12-19 19:28:40,027] [INFO] [timer.py:197:stop] 0/576, RunningAvgSamplesPerSec=11.975071602480421, CurrSamplesPerSec=11.932810436469357, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:28:46,584] [INFO] [timer.py:197:stop] 0/577, RunningAvgSamplesPerSec=11.975063936699872, CurrSamplesPerSec=11.970665397694008, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:28:53,082] [INFO] [timer.py:197:stop] 0/578, RunningAvgSamplesPerSec=11.974917879295877, CurrSamplesPerSec=11.891520773346453, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:28:59,545] [INFO] [timer.py:197:stop] 0/579, RunningAvgSamplesPerSec=11.974940407212763, CurrSamplesPerSec=11.98793058797454, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:29:06,083] [INFO] [logging.py:68:log_dist] [Rank 0] step=580, skipped=4, lr=[9.833333333333333e-06], mom=[[0.9, 0.999]] [2022-12-19 19:29:06,084] [INFO] [timer.py:197:stop] 0/580, RunningAvgSamplesPerSec=11.97473211906743, CurrSamplesPerSec=11.85574611066734, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:29:12,822] [INFO] [timer.py:197:stop] 0/581, RunningAvgSamplesPerSec=11.974661744030799, CurrSamplesPerSec=11.93412291758945, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:29:19,202] [INFO] [timer.py:197:stop] 0/582, RunningAvgSamplesPerSec=11.97463069654697, CurrSamplesPerSec=11.9566811960055, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:29:25,682] [INFO] [timer.py:197:stop] 0/583, RunningAvgSamplesPerSec=11.974457518761687, CurrSamplesPerSec=11.874851351718402, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:29:32,191] [INFO] [timer.py:197:stop] 0/584, RunningAvgSamplesPerSec=11.97426286311294, CurrSamplesPerSec=11.862227904105406, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:29:38,688] [INFO] [timer.py:197:stop] 0/585, RunningAvgSamplesPerSec=11.97427603984016, CurrSamplesPerSec=11.981949818151952, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:29:45,356] [INFO] [timer.py:197:stop] 0/586, RunningAvgSamplesPerSec=11.974055086764459, CurrSamplesPerSec=11.846612806121012, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:29:51,815] [INFO] [timer.py:197:stop] 0/587, RunningAvgSamplesPerSec=11.974034203852659, CurrSamplesPerSec=11.961851013256428, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:29:58,275] [INFO] [timer.py:197:stop] 0/588, RunningAvgSamplesPerSec=11.973988810012955, CurrSamplesPerSec=11.947492277104441, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:30:04,807] [INFO] [timer.py:197:stop] 0/589, RunningAvgSamplesPerSec=11.97378158739042, CurrSamplesPerSec=11.853570336922818, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:30:11,318] [INFO] [logging.py:68:log_dist] [Rank 0] step=590, skipped=4, lr=[9.811111111111112e-06], mom=[[0.9, 0.999]] [2022-12-19 19:30:11,319] [INFO] [timer.py:197:stop] 0/590, RunningAvgSamplesPerSec=11.973594325518631, CurrSamplesPerSec=11.864673252311206, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:30:17,903] [INFO] [timer.py:197:stop] 0/591, RunningAvgSamplesPerSec=11.973427390586687, CurrSamplesPerSec=11.876069147463953, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:30:24,417] [INFO] [timer.py:197:stop] 0/592, RunningAvgSamplesPerSec=11.973246508972519, CurrSamplesPerSec=11.86764845665348, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:30:30,952] [INFO] [timer.py:197:stop] 0/593, RunningAvgSamplesPerSec=11.973023018843532, CurrSamplesPerSec=11.842602600237324, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:30:37,391] [INFO] [timer.py:197:stop] 0/594, RunningAvgSamplesPerSec=11.972969564747007, CurrSamplesPerSec=11.941461470296202, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:30:43,892] [INFO] [timer.py:197:stop] 0/595, RunningAvgSamplesPerSec=11.97284001124814, CurrSamplesPerSec=11.896633329617973, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:30:50,450] [INFO] [timer.py:197:stop] 0/596, RunningAvgSamplesPerSec=11.972622151928826, CurrSamplesPerSec=11.844813025596551, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:30:56,957] [INFO] [timer.py:197:stop] 0/597, RunningAvgSamplesPerSec=11.972632986281393, CurrSamplesPerSec=11.97907205870291, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:31:03,457] [INFO] [timer.py:197:stop] 0/598, RunningAvgSamplesPerSec=11.972506449635182, CurrSamplesPerSec=11.897688430237341, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:31:09,944] [INFO] [timer.py:197:stop] 0/599, RunningAvgSamplesPerSec=11.972352314420391, CurrSamplesPerSec=11.88118840628846, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:31:16,655] [INFO] [logging.py:68:log_dist] [Rank 0] step=600, skipped=4, lr=[9.78888888888889e-06], mom=[[0.9, 0.999]] [2022-12-19 19:31:16,656] [INFO] [timer.py:197:stop] 0/600, RunningAvgSamplesPerSec=11.972360536868486, CurrSamplesPerSec=11.977271355244234, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0059, 'learning_rate': 9.78888888888889e-06, 'epoch': 15.79} [2022-12-19 19:31:23,371] [INFO] [timer.py:197:stop] 0/601, RunningAvgSamplesPerSec=11.972248379395364, CurrSamplesPerSec=11.905552474841421, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:31:29,944] [INFO] [timer.py:197:stop] 0/602, RunningAvgSamplesPerSec=11.972031492714857, CurrSamplesPerSec=11.843513320957232, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:31:36,492] [INFO] [timer.py:197:stop] 0/603, RunningAvgSamplesPerSec=11.971876049655261, CurrSamplesPerSec=11.879332368803489, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:31:42,963] [INFO] [timer.py:197:stop] 0/604, RunningAvgSamplesPerSec=11.97177175707332, CurrSamplesPerSec=11.90941891531148, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:31:49,398] [INFO] [timer.py:197:stop] 0/605, RunningAvgSamplesPerSec=11.97177433075692, CurrSamplesPerSec=11.97332388915701, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:31:55,908] [INFO] [timer.py:197:stop] 0/606, RunningAvgSamplesPerSec=11.971584255924967, CurrSamplesPerSec=11.858057828933267, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:32:02,343] [INFO] [timer.py:197:stop] 0/607, RunningAvgSamplesPerSec=11.971582241077458, CurrSamplesPerSec=11.970365397085155, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:32:06,998] [INFO] [timer.py:197:stop] 0/608, RunningAvgSamplesPerSec=11.977063638514322, CurrSamplesPerSec=16.566010752043834, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:32:13,653] [INFO] [timer.py:197:stop] 0/609, RunningAvgSamplesPerSec=11.976834178469378, CurrSamplesPerSec=11.839379888970635, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:32:20,066] [INFO] [logging.py:68:log_dist] [Rank 0] step=610, skipped=4, lr=[9.766666666666667e-06], mom=[[0.9, 0.999]] [2022-12-19 19:32:20,067] [INFO] [timer.py:197:stop] 0/610, RunningAvgSamplesPerSec=11.976799359169414, CurrSamplesPerSec=11.955701336857453, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:32:26,511] [INFO] [timer.py:197:stop] 0/611, RunningAvgSamplesPerSec=11.976824491472861, CurrSamplesPerSec=11.992124484326345, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:32:33,168] [INFO] [timer.py:197:stop] 0/612, RunningAvgSamplesPerSec=11.97682546316243, CurrSamplesPerSec=11.977417251395893, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:32:39,748] [INFO] [timer.py:197:stop] 0/613, RunningAvgSamplesPerSec=11.976640271105873, CurrSamplesPerSec=11.864730413371571, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:32:46,182] [INFO] [timer.py:197:stop] 0/614, RunningAvgSamplesPerSec=11.97665746388829, CurrSamplesPerSec=11.987171490947777, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:32:52,669] [INFO] [timer.py:197:stop] 0/615, RunningAvgSamplesPerSec=11.97653013208724, CurrSamplesPerSec=11.89910765377266, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:32:59,164] [INFO] [timer.py:197:stop] 0/616, RunningAvgSamplesPerSec=11.976477808296522, CurrSamplesPerSec=11.944489133952922, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:33:05,687] [INFO] [timer.py:197:stop] 0/617, RunningAvgSamplesPerSec=11.976428793367765, CurrSamplesPerSec=11.946409185237007, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:33:12,125] [INFO] [timer.py:197:stop] 0/618, RunningAvgSamplesPerSec=11.976445188434264, CurrSamplesPerSec=11.986536664156107, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:33:18,631] [INFO] [timer.py:197:stop] 0/619, RunningAvgSamplesPerSec=11.976330313852575, CurrSamplesPerSec=11.905983891610601, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:33:25,153] [INFO] [logging.py:68:log_dist] [Rank 0] step=620, skipped=4, lr=[9.744444444444445e-06], mom=[[0.9, 0.999]] [2022-12-19 19:33:25,154] [INFO] [timer.py:197:stop] 0/620, RunningAvgSamplesPerSec=11.976178362705582, CurrSamplesPerSec=11.88315391432687, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:33:31,723] [INFO] [timer.py:197:stop] 0/621, RunningAvgSamplesPerSec=11.975844954305249, CurrSamplesPerSec=11.773289207166249, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:33:38,216] [INFO] [timer.py:197:stop] 0/622, RunningAvgSamplesPerSec=11.975712574260081, CurrSamplesPerSec=11.894327102190855, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:33:44,701] [INFO] [timer.py:197:stop] 0/623, RunningAvgSamplesPerSec=11.97567354168919, CurrSamplesPerSec=11.951522230924468, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:33:51,234] [INFO] [timer.py:197:stop] 0/624, RunningAvgSamplesPerSec=11.97548155772061, CurrSamplesPerSec=11.857436603559924, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:33:57,880] [INFO] [timer.py:197:stop] 0/625, RunningAvgSamplesPerSec=11.975491477491861, CurrSamplesPerSec=11.981664760967679, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0041, 'learning_rate': 9.733333333333334e-06, 'epoch': 16.45} [2022-12-19 19:34:04,480] [INFO] [timer.py:197:stop] 0/626, RunningAvgSamplesPerSec=11.97549935004787, CurrSamplesPerSec=11.980405965186343, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:34:10,993] [INFO] [timer.py:197:stop] 0/627, RunningAvgSamplesPerSec=11.975348142225789, CurrSamplesPerSec=11.881733234461805, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:34:17,516] [INFO] [timer.py:197:stop] 0/628, RunningAvgSamplesPerSec=11.975277071081818, CurrSamplesPerSec=11.931022022406468, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:34:24,062] [INFO] [timer.py:197:stop] 0/629, RunningAvgSamplesPerSec=11.975084586764885, CurrSamplesPerSec=11.855791665958167, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:34:30,668] [INFO] [logging.py:68:log_dist] [Rank 0] step=630, skipped=4, lr=[9.722222222222223e-06], mom=[[0.9, 0.999]] [2022-12-19 19:34:30,668] [INFO] [timer.py:197:stop] 0/630, RunningAvgSamplesPerSec=11.97497757825607, CurrSamplesPerSec=11.908257662965317, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:34:37,159] [INFO] [timer.py:197:stop] 0/631, RunningAvgSamplesPerSec=11.974992089318647, CurrSamplesPerSec=11.984111987895323, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:34:43,602] [INFO] [timer.py:197:stop] 0/632, RunningAvgSamplesPerSec=11.974877020960001, CurrSamplesPerSec=11.90293454605437, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:34:50,043] [INFO] [timer.py:197:stop] 0/633, RunningAvgSamplesPerSec=11.974881813686583, CurrSamplesPerSec=11.977901994168818, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:34:56,701] [INFO] [timer.py:197:stop] 0/634, RunningAvgSamplesPerSec=11.974884864401915, CurrSamplesPerSec=11.97681017576726, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:35:03,196] [INFO] [timer.py:197:stop] 0/635, RunningAvgSamplesPerSec=11.974699441704077, CurrSamplesPerSec=11.858649782132717, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:35:09,741] [INFO] [timer.py:197:stop] 0/636, RunningAvgSamplesPerSec=11.974558917760351, CurrSamplesPerSec=11.886264184973664, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:35:16,187] [INFO] [timer.py:197:stop] 0/637, RunningAvgSamplesPerSec=11.974423312315334, CurrSamplesPerSec=11.889063293902053, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:35:22,702] [INFO] [timer.py:197:stop] 0/638, RunningAvgSamplesPerSec=11.97431101143373, CurrSamplesPerSec=11.903422779760557, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:35:29,205] [INFO] [timer.py:197:stop] 0/639, RunningAvgSamplesPerSec=11.974194950623405, CurrSamplesPerSec=11.900833223163199, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:35:35,711] [INFO] [logging.py:68:log_dist] [Rank 0] step=640, skipped=4, lr=[9.7e-06], mom=[[0.9, 0.999]] [2022-12-19 19:35:35,712] [INFO] [timer.py:197:stop] 0/640, RunningAvgSamplesPerSec=11.974142488288306, CurrSamplesPerSec=11.940817134214983, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:35:42,411] [INFO] [timer.py:197:stop] 0/641, RunningAvgSamplesPerSec=11.97394014772186, CurrSamplesPerSec=11.846225935236182, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:35:48,938] [INFO] [timer.py:197:stop] 0/642, RunningAvgSamplesPerSec=11.973830865696149, CurrSamplesPerSec=11.904405174556993, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:35:55,426] [INFO] [timer.py:197:stop] 0/643, RunningAvgSamplesPerSec=11.973669232272032, CurrSamplesPerSec=11.871111265204867, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:36:01,981] [INFO] [timer.py:197:stop] 0/644, RunningAvgSamplesPerSec=11.973488752742794, CurrSamplesPerSec=11.858910155483617, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:36:08,431] [INFO] [timer.py:197:stop] 0/645, RunningAvgSamplesPerSec=11.97335393941898, CurrSamplesPerSec=11.887425889430897, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:36:13,241] [INFO] [timer.py:197:stop] 0/646, RunningAvgSamplesPerSec=11.978555711481668, CurrSamplesPerSec=16.621844348814726, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:36:19,695] [INFO] [timer.py:197:stop] 0/647, RunningAvgSamplesPerSec=11.978487173964334, CurrSamplesPerSec=11.934511306077958, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:36:26,202] [INFO] [timer.py:197:stop] 0/648, RunningAvgSamplesPerSec=11.978454024847355, CurrSamplesPerSec=11.957111000118086, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:36:32,697] [INFO] [timer.py:197:stop] 0/649, RunningAvgSamplesPerSec=11.978295197024016, CurrSamplesPerSec=11.876565163467909, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:36:39,167] [INFO] [logging.py:68:log_dist] [Rank 0] step=650, skipped=4, lr=[9.677777777777778e-06], mom=[[0.9, 0.999]] [2022-12-19 19:36:39,168] [INFO] [timer.py:197:stop] 0/650, RunningAvgSamplesPerSec=11.978148694954083, CurrSamplesPerSec=11.88410718580074, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0032, 'learning_rate': 9.677777777777778e-06, 'epoch': 17.11} [2022-12-19 19:36:45,649] [INFO] [timer.py:197:stop] 0/651, RunningAvgSamplesPerSec=11.978147492909812, CurrSamplesPerSec=11.977368618950415, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:36:52,128] [INFO] [timer.py:197:stop] 0/652, RunningAvgSamplesPerSec=11.978188725796924, CurrSamplesPerSec=12.005008880028736, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:36:58,626] [INFO] [timer.py:197:stop] 0/653, RunningAvgSamplesPerSec=11.978072758688917, CurrSamplesPerSec=11.903166254138675, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:37:05,100] [INFO] [timer.py:197:stop] 0/654, RunningAvgSamplesPerSec=11.977948028263256, CurrSamplesPerSec=11.897296107101942, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:37:11,617] [INFO] [timer.py:197:stop] 0/655, RunningAvgSamplesPerSec=11.977822727291652, CurrSamplesPerSec=11.896680781398778, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:37:18,020] [INFO] [timer.py:197:stop] 0/656, RunningAvgSamplesPerSec=11.977869265862653, CurrSamplesPerSec=12.00833637080853, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:37:24,467] [INFO] [timer.py:197:stop] 0/657, RunningAvgSamplesPerSec=11.97787510355765, CurrSamplesPerSec=11.981694175246536, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:37:31,217] [INFO] [timer.py:197:stop] 0/658, RunningAvgSamplesPerSec=11.977749953015193, CurrSamplesPerSec=11.896334392103153, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:37:37,768] [INFO] [timer.py:197:stop] 0/659, RunningAvgSamplesPerSec=11.97761777706295, CurrSamplesPerSec=11.891534469820135, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:37:44,274] [INFO] [logging.py:68:log_dist] [Rank 0] step=660, skipped=4, lr=[9.655555555555556e-06], mom=[[0.9, 0.999]] [2022-12-19 19:37:44,278] [INFO] [timer.py:197:stop] 0/660, RunningAvgSamplesPerSec=11.977488820867785, CurrSamplesPerSec=11.893360597609396, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:37:50,807] [INFO] [timer.py:197:stop] 0/661, RunningAvgSamplesPerSec=11.977439445873683, CurrSamplesPerSec=11.94503872013089, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:37:57,301] [INFO] [timer.py:197:stop] 0/662, RunningAvgSamplesPerSec=11.977299796536682, CurrSamplesPerSec=11.885973662762156, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:38:03,862] [INFO] [timer.py:197:stop] 0/663, RunningAvgSamplesPerSec=11.97703058783773, CurrSamplesPerSec=11.801954020577972, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:38:10,570] [INFO] [timer.py:197:stop] 0/664, RunningAvgSamplesPerSec=11.97678806239548, CurrSamplesPerSec=11.818599304969712, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:38:17,243] [INFO] [timer.py:197:stop] 0/665, RunningAvgSamplesPerSec=11.976631586659392, CurrSamplesPerSec=11.873934229570153, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:38:23,745] [INFO] [timer.py:197:stop] 0/666, RunningAvgSamplesPerSec=11.976645785868248, CurrSamplesPerSec=11.986067278143265, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:38:30,267] [INFO] [timer.py:197:stop] 0/667, RunningAvgSamplesPerSec=11.976513259571458, CurrSamplesPerSec=11.889158603679634, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:38:36,738] [INFO] [timer.py:197:stop] 0/668, RunningAvgSamplesPerSec=11.976489028525965, CurrSamplesPerSec=11.960397066596578, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:38:43,250] [INFO] [timer.py:197:stop] 0/669, RunningAvgSamplesPerSec=11.976383125636769, CurrSamplesPerSec=11.906265359060658, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:38:49,874] [INFO] [logging.py:68:log_dist] [Rank 0] step=670, skipped=4, lr=[9.633333333333335e-06], mom=[[0.9, 0.999]] [2022-12-19 19:38:49,874] [INFO] [timer.py:197:stop] 0/670, RunningAvgSamplesPerSec=11.976178151244438, CurrSamplesPerSec=11.841005650289713, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:38:56,350] [INFO] [timer.py:197:stop] 0/671, RunningAvgSamplesPerSec=11.97620547624723, CurrSamplesPerSec=11.99448648215828, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:39:02,865] [INFO] [timer.py:197:stop] 0/672, RunningAvgSamplesPerSec=11.97616119805436, CurrSamplesPerSec=11.946612283233677, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:39:09,326] [INFO] [timer.py:197:stop] 0/673, RunningAvgSamplesPerSec=11.976073455605626, CurrSamplesPerSec=11.917573604160975, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:39:16,047] [INFO] [timer.py:197:stop] 0/674, RunningAvgSamplesPerSec=11.975934733379361, CurrSamplesPerSec=11.883571083298984, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:39:22,625] [INFO] [timer.py:197:stop] 0/675, RunningAvgSamplesPerSec=11.975947719467163, CurrSamplesPerSec=11.984680743532465, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0034, 'learning_rate': 9.622222222222222e-06, 'epoch': 17.76} [2022-12-19 19:39:29,132] [INFO] [timer.py:197:stop] 0/676, RunningAvgSamplesPerSec=11.975912409494633, CurrSamplesPerSec=11.952195928143308, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:39:35,609] [INFO] [timer.py:197:stop] 0/677, RunningAvgSamplesPerSec=11.975803769199521, CurrSamplesPerSec=11.903025855908071, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:39:42,091] [INFO] [timer.py:197:stop] 0/678, RunningAvgSamplesPerSec=11.97567938308128, CurrSamplesPerSec=11.892304156985325, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:39:48,519] [INFO] [timer.py:197:stop] 0/679, RunningAvgSamplesPerSec=11.975549453856276, CurrSamplesPerSec=11.888357733134713, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:39:55,123] [INFO] [logging.py:68:log_dist] [Rank 0] step=680, skipped=4, lr=[9.611111111111112e-06], mom=[[0.9, 0.999]] [2022-12-19 19:39:55,123] [INFO] [timer.py:197:stop] 0/680, RunningAvgSamplesPerSec=11.975564106186184, CurrSamplesPerSec=11.985491969151347, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:40:01,609] [INFO] [timer.py:197:stop] 0/681, RunningAvgSamplesPerSec=11.975601562748755, CurrSamplesPerSec=12.001051160376083, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:40:08,147] [INFO] [timer.py:197:stop] 0/682, RunningAvgSamplesPerSec=11.975419587003449, CurrSamplesPerSec=11.853121774518845, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:40:14,671] [INFO] [timer.py:197:stop] 0/683, RunningAvgSamplesPerSec=11.975346195331841, CurrSamplesPerSec=11.925647279638905, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:40:19,361] [INFO] [timer.py:197:stop] 0/684, RunningAvgSamplesPerSec=11.980158171428565, CurrSamplesPerSec=16.49345841801192, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:40:26,077] [INFO] [timer.py:197:stop] 0/685, RunningAvgSamplesPerSec=11.98004187176945, CurrSamplesPerSec=11.901247940832896, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:40:32,551] [INFO] [timer.py:197:stop] 0/686, RunningAvgSamplesPerSec=11.980037465495455, CurrSamplesPerSec=11.977028737279714, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:40:39,074] [INFO] [timer.py:197:stop] 0/687, RunningAvgSamplesPerSec=11.979800598670858, CurrSamplesPerSec=11.819948715948051, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:40:45,552] [INFO] [timer.py:197:stop] 0/688, RunningAvgSamplesPerSec=11.97975829874634, CurrSamplesPerSec=11.950852866100679, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:40:52,099] [INFO] [timer.py:197:stop] 0/689, RunningAvgSamplesPerSec=11.979494167064313, CurrSamplesPerSec=11.801003506729925, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:40:58,634] [INFO] [logging.py:68:log_dist] [Rank 0] step=690, skipped=4, lr=[9.58888888888889e-06], mom=[[0.9, 0.999]] [2022-12-19 19:40:58,635] [INFO] [timer.py:197:stop] 0/690, RunningAvgSamplesPerSec=11.979355380969116, CurrSamplesPerSec=11.884763306398538, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:41:05,200] [INFO] [timer.py:197:stop] 0/691, RunningAvgSamplesPerSec=11.97922316380156, CurrSamplesPerSec=11.888944290468602, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:41:11,745] [INFO] [timer.py:197:stop] 0/692, RunningAvgSamplesPerSec=11.97903085416433, CurrSamplesPerSec=11.847981172495958, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:41:18,237] [INFO] [timer.py:197:stop] 0/693, RunningAvgSamplesPerSec=11.97900966105328, CurrSamplesPerSec=11.964404269640626, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:41:24,948] [INFO] [timer.py:197:stop] 0/694, RunningAvgSamplesPerSec=11.979045972093477, CurrSamplesPerSec=12.004189642217757, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:41:31,359] [INFO] [timer.py:197:stop] 0/695, RunningAvgSamplesPerSec=11.979076515016768, CurrSamplesPerSec=12.000249629445117, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:41:37,933] [INFO] [timer.py:197:stop] 0/696, RunningAvgSamplesPerSec=11.978872341496341, CurrSamplesPerSec=11.839034217655005, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:41:44,391] [INFO] [timer.py:197:stop] 0/697, RunningAvgSamplesPerSec=11.978867397144608, CurrSamplesPerSec=11.975437001104236, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:41:50,924] [INFO] [timer.py:197:stop] 0/698, RunningAvgSamplesPerSec=11.978752303121045, CurrSamplesPerSec=11.89929332232395, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:41:57,393] [INFO] [timer.py:197:stop] 0/699, RunningAvgSamplesPerSec=11.978624188200971, CurrSamplesPerSec=11.890115998806362, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:42:03,949] [INFO] [logging.py:68:log_dist] [Rank 0] step=700, skipped=4, lr=[9.566666666666668e-06], mom=[[0.9, 0.999]] [2022-12-19 19:42:03,950] [INFO] [timer.py:197:stop] 0/700, RunningAvgSamplesPerSec=11.978583673268098, CurrSamplesPerSec=11.95041127535315, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0022, 'learning_rate': 9.566666666666668e-06, 'epoch': 18.42} [2022-12-19 19:42:10,724] [INFO] [timer.py:197:stop] 0/701, RunningAvgSamplesPerSec=11.978405409161088, CurrSamplesPerSec=11.855258115031516, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:42:17,217] [INFO] [timer.py:197:stop] 0/702, RunningAvgSamplesPerSec=11.978369458806464, CurrSamplesPerSec=11.95329284415615, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:42:23,646] [INFO] [timer.py:197:stop] 0/703, RunningAvgSamplesPerSec=11.978377741346549, CurrSamplesPerSec=11.984178331020894, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:42:30,164] [INFO] [timer.py:197:stop] 0/704, RunningAvgSamplesPerSec=11.978239482854361, CurrSamplesPerSec=11.882099286083735, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:42:36,671] [INFO] [timer.py:197:stop] 0/705, RunningAvgSamplesPerSec=11.978141989512459, CurrSamplesPerSec=11.910091044701355, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:42:43,316] [INFO] [timer.py:197:stop] 0/706, RunningAvgSamplesPerSec=11.977669103547663, CurrSamplesPerSec=11.654220324064358, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:42:49,834] [INFO] [timer.py:197:stop] 0/707, RunningAvgSamplesPerSec=11.97770315266527, CurrSamplesPerSec=12.001721867593247, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:42:56,372] [INFO] [timer.py:197:stop] 0/708, RunningAvgSamplesPerSec=11.977700559176439, CurrSamplesPerSec=11.975872429012957, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:43:02,769] [INFO] [timer.py:197:stop] 0/709, RunningAvgSamplesPerSec=11.977715028571701, CurrSamplesPerSec=11.987939153792759, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:43:09,263] [INFO] [logging.py:68:log_dist] [Rank 0] step=710, skipped=4, lr=[9.544444444444445e-06], mom=[[0.9, 0.999]] [2022-12-19 19:43:09,264] [INFO] [timer.py:197:stop] 0/710, RunningAvgSamplesPerSec=11.977595669608714, CurrSamplesPerSec=11.893800090033567, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:43:15,785] [INFO] [timer.py:197:stop] 0/711, RunningAvgSamplesPerSec=11.977470951137823, CurrSamplesPerSec=11.889817388731501, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:43:22,338] [INFO] [timer.py:197:stop] 0/712, RunningAvgSamplesPerSec=11.977219984958623, CurrSamplesPerSec=11.801893311786731, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:43:28,833] [INFO] [timer.py:197:stop] 0/713, RunningAvgSamplesPerSec=11.977126634415487, CurrSamplesPerSec=11.911213014556118, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:43:35,580] [INFO] [timer.py:197:stop] 0/714, RunningAvgSamplesPerSec=11.976991331599185, CurrSamplesPerSec=11.881558631575729, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:43:42,135] [INFO] [timer.py:197:stop] 0/715, RunningAvgSamplesPerSec=11.976701065505281, CurrSamplesPerSec=11.773542230841562, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:43:48,624] [INFO] [timer.py:197:stop] 0/716, RunningAvgSamplesPerSec=11.976554169883514, CurrSamplesPerSec=11.872726848929643, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:43:55,047] [INFO] [timer.py:197:stop] 0/717, RunningAvgSamplesPerSec=11.976580911306579, CurrSamplesPerSec=11.995704817904988, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:44:01,619] [INFO] [timer.py:197:stop] 0/718, RunningAvgSamplesPerSec=11.97644817832658, CurrSamplesPerSec=11.882291261826497, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:44:08,112] [INFO] [timer.py:197:stop] 0/719, RunningAvgSamplesPerSec=11.976296629007416, CurrSamplesPerSec=11.86876296869121, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:44:14,573] [INFO] [logging.py:68:log_dist] [Rank 0] step=720, skipped=4, lr=[9.522222222222223e-06], mom=[[0.9, 0.999]] [2022-12-19 19:44:14,574] [INFO] [timer.py:197:stop] 0/720, RunningAvgSamplesPerSec=11.976276684199156, CurrSamplesPerSec=11.961993335679072, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:44:21,075] [INFO] [timer.py:197:stop] 0/721, RunningAvgSamplesPerSec=11.976120602571164, CurrSamplesPerSec=11.86509436918722, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:44:26,010] [INFO] [timer.py:197:stop] 0/722, RunningAvgSamplesPerSec=11.98068217975925, CurrSamplesPerSec=16.49912327716341, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:44:32,414] [INFO] [timer.py:197:stop] 0/723, RunningAvgSamplesPerSec=11.98066078627941, CurrSamplesPerSec=11.965277286533706, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:44:38,937] [INFO] [timer.py:197:stop] 0/724, RunningAvgSamplesPerSec=11.980512184572245, CurrSamplesPerSec=11.874321336588874, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:44:45,430] [INFO] [timer.py:197:stop] 0/725, RunningAvgSamplesPerSec=11.980532445810129, CurrSamplesPerSec=11.995178968204105, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0024, 'learning_rate': 9.511111111111112e-06, 'epoch': 19.08} [2022-12-19 19:44:51,933] [INFO] [timer.py:197:stop] 0/726, RunningAvgSamplesPerSec=11.980425548659776, CurrSamplesPerSec=11.903634975976148, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:44:58,458] [INFO] [timer.py:197:stop] 0/727, RunningAvgSamplesPerSec=11.980285437372041, CurrSamplesPerSec=11.879697746766794, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:45:04,900] [INFO] [timer.py:197:stop] 0/728, RunningAvgSamplesPerSec=11.980213817416447, CurrSamplesPerSec=11.92851373642, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:45:11,370] [INFO] [timer.py:197:stop] 0/729, RunningAvgSamplesPerSec=11.980179835447856, CurrSamplesPerSec=11.955559696593365, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:45:18,011] [INFO] [logging.py:68:log_dist] [Rank 0] step=730, skipped=4, lr=[9.5e-06], mom=[[0.9, 0.999]] [2022-12-19 19:45:18,012] [INFO] [timer.py:197:stop] 0/730, RunningAvgSamplesPerSec=11.98003403203525, CurrSamplesPerSec=11.87496587061819, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:45:24,467] [INFO] [timer.py:197:stop] 0/731, RunningAvgSamplesPerSec=11.979884701901927, CurrSamplesPerSec=11.872151341975183, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:45:31,022] [INFO] [timer.py:197:stop] 0/732, RunningAvgSamplesPerSec=11.97975129332956, CurrSamplesPerSec=11.883280692719694, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:45:37,662] [INFO] [timer.py:197:stop] 0/733, RunningAvgSamplesPerSec=11.979655168016306, CurrSamplesPerSec=11.909892885462195, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:45:44,417] [INFO] [timer.py:197:stop] 0/734, RunningAvgSamplesPerSec=11.979438379752066, CurrSamplesPerSec=11.823037960856995, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:45:50,894] [INFO] [timer.py:197:stop] 0/735, RunningAvgSamplesPerSec=11.979412094280493, CurrSamplesPerSec=11.960202025863683, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:45:57,373] [INFO] [timer.py:197:stop] 0/736, RunningAvgSamplesPerSec=11.979322075534418, CurrSamplesPerSec=11.913700281572673, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:46:04,088] [INFO] [timer.py:197:stop] 0/737, RunningAvgSamplesPerSec=11.979229066472591, CurrSamplesPerSec=11.911347791994812, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:46:10,583] [INFO] [timer.py:197:stop] 0/738, RunningAvgSamplesPerSec=11.979134266249346, CurrSamplesPerSec=11.909859595356957, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:46:17,040] [INFO] [timer.py:197:stop] 0/739, RunningAvgSamplesPerSec=11.979153922087859, CurrSamplesPerSec=11.993638134946083, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:46:23,606] [INFO] [logging.py:68:log_dist] [Rank 0] step=740, skipped=4, lr=[9.47777777777778e-06], mom=[[0.9, 0.999]] [2022-12-19 19:46:23,607] [INFO] [timer.py:197:stop] 0/740, RunningAvgSamplesPerSec=11.979027125097204, CurrSamplesPerSec=11.886302080222208, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:46:30,187] [INFO] [timer.py:197:stop] 0/741, RunningAvgSamplesPerSec=11.978834902806833, CurrSamplesPerSec=11.83863739934022, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:46:36,862] [INFO] [timer.py:197:stop] 0/742, RunningAvgSamplesPerSec=11.978711314874198, CurrSamplesPerSec=11.888071846905047, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:46:43,328] [INFO] [timer.py:197:stop] 0/743, RunningAvgSamplesPerSec=11.978689803349484, CurrSamplesPerSec=11.962792429693451, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:46:49,886] [INFO] [timer.py:197:stop] 0/744, RunningAvgSamplesPerSec=11.978578230047376, CurrSamplesPerSec=11.896469887492607, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:46:56,309] [INFO] [timer.py:197:stop] 0/745, RunningAvgSamplesPerSec=11.978473389916921, CurrSamplesPerSec=11.901184623288588, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:47:02,810] [INFO] [timer.py:197:stop] 0/746, RunningAvgSamplesPerSec=11.978375544989852, CurrSamplesPerSec=11.906115910333419, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:47:09,321] [INFO] [timer.py:197:stop] 0/747, RunningAvgSamplesPerSec=11.978286545053317, CurrSamplesPerSec=11.912435108214439, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:47:15,812] [INFO] [timer.py:197:stop] 0/748, RunningAvgSamplesPerSec=11.978170292320765, CurrSamplesPerSec=11.89218456181892, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:47:22,366] [INFO] [timer.py:197:stop] 0/749, RunningAvgSamplesPerSec=11.978200150970373, CurrSamplesPerSec=12.00051625803764, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:47:28,871] [INFO] [logging.py:68:log_dist] [Rank 0] step=750, skipped=4, lr=[9.455555555555557e-06], mom=[[0.9, 0.999]] [2022-12-19 19:47:28,872] [INFO] [timer.py:197:stop] 0/750, RunningAvgSamplesPerSec=11.97810552129097, CurrSamplesPerSec=11.90783242019479, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0022, 'learning_rate': 9.455555555555557e-06, 'epoch': 19.74} [2022-12-19 19:47:35,330] [INFO] [timer.py:197:stop] 0/751, RunningAvgSamplesPerSec=11.978108337300501, CurrSamplesPerSec=11.980215083401454, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:47:41,783] [INFO] [timer.py:197:stop] 0/752, RunningAvgSamplesPerSec=11.978105564339486, CurrSamplesPerSec=11.97602897709133, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:47:48,232] [INFO] [timer.py:197:stop] 0/753, RunningAvgSamplesPerSec=11.978020223764426, CurrSamplesPerSec=11.914355443497685, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:47:54,804] [INFO] [timer.py:197:stop] 0/754, RunningAvgSamplesPerSec=11.977900194604274, CurrSamplesPerSec=11.88843249754756, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:48:01,298] [INFO] [timer.py:197:stop] 0/755, RunningAvgSamplesPerSec=11.977771031799467, CurrSamplesPerSec=11.88142294974375, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:48:07,778] [INFO] [timer.py:197:stop] 0/756, RunningAvgSamplesPerSec=11.977786016464945, CurrSamplesPerSec=11.98908012310498, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:48:14,295] [INFO] [timer.py:197:stop] 0/757, RunningAvgSamplesPerSec=11.97776445344107, CurrSamplesPerSec=11.961528001896838, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:48:20,738] [INFO] [timer.py:197:stop] 0/758, RunningAvgSamplesPerSec=11.97779573285277, CurrSamplesPerSec=12.001458404858285, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:48:27,216] [INFO] [timer.py:197:stop] 0/759, RunningAvgSamplesPerSec=11.977744124173299, CurrSamplesPerSec=11.938854807654161, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:48:31,980] [INFO] [logging.py:68:log_dist] [Rank 0] step=760, skipped=4, lr=[9.433333333333335e-06], mom=[[0.9, 0.999]] [2022-12-19 19:48:31,981] [INFO] [timer.py:197:stop] 0/760, RunningAvgSamplesPerSec=11.982174937497238, CurrSamplesPerSec=16.642599215756004, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:48:38,517] [INFO] [timer.py:197:stop] 0/761, RunningAvgSamplesPerSec=11.982178203031259, CurrSamplesPerSec=11.984653989940739, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:48:45,058] [INFO] [timer.py:197:stop] 0/762, RunningAvgSamplesPerSec=11.982064794163506, CurrSamplesPerSec=11.896602222544859, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:48:51,507] [INFO] [timer.py:197:stop] 0/763, RunningAvgSamplesPerSec=11.982095367257427, CurrSamplesPerSec=12.00537612383475, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:48:58,021] [INFO] [timer.py:197:stop] 0/764, RunningAvgSamplesPerSec=11.981912320565547, CurrSamplesPerSec=11.844216704299297, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:49:04,524] [INFO] [timer.py:197:stop] 0/765, RunningAvgSamplesPerSec=11.98190787030546, CurrSamplesPerSec=11.978517732848205, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:49:11,029] [INFO] [timer.py:197:stop] 0/766, RunningAvgSamplesPerSec=11.981716785559062, CurrSamplesPerSec=11.837674181332742, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:49:17,591] [INFO] [timer.py:197:stop] 0/767, RunningAvgSamplesPerSec=11.981589066790168, CurrSamplesPerSec=11.884801192077603, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:49:24,343] [INFO] [timer.py:197:stop] 0/768, RunningAvgSamplesPerSec=11.981388311505341, CurrSamplesPerSec=11.829756678093096, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:49:30,775] [INFO] [timer.py:197:stop] 0/769, RunningAvgSamplesPerSec=11.981275278213968, CurrSamplesPerSec=11.895313794508033, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:49:37,246] [INFO] [logging.py:68:log_dist] [Rank 0] step=770, skipped=4, lr=[9.411111111111113e-06], mom=[[0.9, 0.999]] [2022-12-19 19:49:37,247] [INFO] [timer.py:197:stop] 0/770, RunningAvgSamplesPerSec=11.981253213669396, CurrSamplesPerSec=11.964353609773532, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:49:43,830] [INFO] [timer.py:197:stop] 0/771, RunningAvgSamplesPerSec=11.981239772573442, CurrSamplesPerSec=11.970925908641167, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:49:50,414] [INFO] [timer.py:197:stop] 0/772, RunningAvgSamplesPerSec=11.981108188076112, CurrSamplesPerSec=11.880768249574357, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:49:56,978] [INFO] [timer.py:197:stop] 0/773, RunningAvgSamplesPerSec=11.980976876354708, CurrSamplesPerSec=11.880714088856092, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:50:03,468] [INFO] [timer.py:197:stop] 0/774, RunningAvgSamplesPerSec=11.980986177769637, CurrSamplesPerSec=11.988161869363177, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:50:10,011] [INFO] [timer.py:197:stop] 0/775, RunningAvgSamplesPerSec=11.980766366325167, CurrSamplesPerSec=11.81344492601003, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0016, 'learning_rate': 9.4e-06, 'epoch': 20.39} [2022-12-19 19:50:16,487] [INFO] [timer.py:197:stop] 0/776, RunningAvgSamplesPerSec=11.980602403247165, CurrSamplesPerSec=11.855187432299326, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:50:22,950] [INFO] [timer.py:197:stop] 0/777, RunningAvgSamplesPerSec=11.980626320045564, CurrSamplesPerSec=11.999166606134903, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:50:29,645] [INFO] [timer.py:197:stop] 0/778, RunningAvgSamplesPerSec=11.980489992252004, CurrSamplesPerSec=11.875760734674495, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:50:36,278] [INFO] [timer.py:197:stop] 0/779, RunningAvgSamplesPerSec=11.980432586270492, CurrSamplesPerSec=11.936050782877498, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:50:42,822] [INFO] [logging.py:68:log_dist] [Rank 0] step=780, skipped=4, lr=[9.38888888888889e-06], mom=[[0.9, 0.999]] [2022-12-19 19:50:42,823] [INFO] [timer.py:197:stop] 0/780, RunningAvgSamplesPerSec=11.980286357016164, CurrSamplesPerSec=11.867735028479908, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:50:49,285] [INFO] [timer.py:197:stop] 0/781, RunningAvgSamplesPerSec=11.980295375691854, CurrSamplesPerSec=11.98731602246405, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:50:55,743] [INFO] [timer.py:197:stop] 0/782, RunningAvgSamplesPerSec=11.9803104686943, CurrSamplesPerSec=11.992079482510942, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:51:02,279] [INFO] [timer.py:197:stop] 0/783, RunningAvgSamplesPerSec=11.980189983197402, CurrSamplesPerSec=11.886943704511545, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:51:08,893] [INFO] [timer.py:197:stop] 0/784, RunningAvgSamplesPerSec=11.979890129862069, CurrSamplesPerSec=11.750200447087725, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:51:15,371] [INFO] [timer.py:197:stop] 0/785, RunningAvgSamplesPerSec=11.97978688191083, CurrSamplesPerSec=11.899588188550048, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:51:21,841] [INFO] [timer.py:197:stop] 0/786, RunningAvgSamplesPerSec=11.979679000875512, CurrSamplesPerSec=11.895800348894433, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:51:28,292] [INFO] [timer.py:197:stop] 0/787, RunningAvgSamplesPerSec=11.979686072713772, CurrSamplesPerSec=11.98523296434199, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:51:34,773] [INFO] [timer.py:197:stop] 0/788, RunningAvgSamplesPerSec=11.979659260781558, CurrSamplesPerSec=11.958648854731944, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:51:41,269] [INFO] [timer.py:197:stop] 0/789, RunningAvgSamplesPerSec=11.979559448873635, CurrSamplesPerSec=11.9016183619666, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:51:47,891] [INFO] [logging.py:68:log_dist] [Rank 0] step=790, skipped=4, lr=[9.366666666666668e-06], mom=[[0.9, 0.999]] [2022-12-19 19:51:47,891] [INFO] [timer.py:197:stop] 0/790, RunningAvgSamplesPerSec=11.979561312769011, CurrSamplesPerSec=11.981028378298582, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:51:54,457] [INFO] [timer.py:197:stop] 0/791, RunningAvgSamplesPerSec=11.979412561144684, CurrSamplesPerSec=11.863333533828419, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:52:00,978] [INFO] [timer.py:197:stop] 0/792, RunningAvgSamplesPerSec=11.97931202287896, CurrSamplesPerSec=11.900509805651419, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:52:07,441] [INFO] [timer.py:197:stop] 0/793, RunningAvgSamplesPerSec=11.979318777362575, CurrSamplesPerSec=11.984657200365438, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:52:13,892] [INFO] [timer.py:197:stop] 0/794, RunningAvgSamplesPerSec=11.97935416159502, CurrSamplesPerSec=12.007408719850522, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:52:20,391] [INFO] [timer.py:197:stop] 0/795, RunningAvgSamplesPerSec=11.979246869102509, CurrSamplesPerSec=11.894870500625041, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:52:26,885] [INFO] [timer.py:197:stop] 0/796, RunningAvgSamplesPerSec=11.979242591050609, CurrSamplesPerSec=11.975851057579147, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:52:33,329] [INFO] [timer.py:197:stop] 0/797, RunningAvgSamplesPerSec=11.979127747950312, CurrSamplesPerSec=11.888632049422895, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:52:37,960] [INFO] [timer.py:197:stop] 0/798, RunningAvgSamplesPerSec=11.983327798767473, CurrSamplesPerSec=16.614393901366984, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:52:44,505] [INFO] [timer.py:197:stop] 0/799, RunningAvgSamplesPerSec=11.983242355228949, CurrSamplesPerSec=11.915613619878123, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:52:50,968] [INFO] [logging.py:68:log_dist] [Rank 0] step=800, skipped=4, lr=[9.344444444444446e-06], mom=[[0.9, 0.999]] [2022-12-19 19:52:50,969] [INFO] [timer.py:197:stop] 0/800, RunningAvgSamplesPerSec=11.98314432544352, CurrSamplesPerSec=11.905521321107688, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0016, 'learning_rate': 9.344444444444446e-06, 'epoch': 21.05} [2022-12-19 19:52:57,443] [INFO] [timer.py:197:stop] 0/801, RunningAvgSamplesPerSec=11.983141642587343, CurrSamplesPerSec=11.98100110626628, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:53:03,923] [INFO] [timer.py:197:stop] 0/802, RunningAvgSamplesPerSec=11.983021135378596, CurrSamplesPerSec=11.88750432713988, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:53:10,487] [INFO] [timer.py:197:stop] 0/803, RunningAvgSamplesPerSec=11.982819024731294, CurrSamplesPerSec=11.82328583678163, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:53:16,928] [INFO] [timer.py:197:stop] 0/804, RunningAvgSamplesPerSec=11.982790971478826, CurrSamplesPerSec=11.960362427750507, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:53:23,448] [INFO] [timer.py:197:stop] 0/805, RunningAvgSamplesPerSec=11.982713734166708, CurrSamplesPerSec=11.921088377732412, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:53:29,893] [INFO] [timer.py:197:stop] 0/806, RunningAvgSamplesPerSec=11.982723105360106, CurrSamplesPerSec=11.990252908212623, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:53:36,399] [INFO] [timer.py:197:stop] 0/807, RunningAvgSamplesPerSec=11.982723619816003, CurrSamplesPerSec=11.983137256652576, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:53:42,839] [INFO] [timer.py:197:stop] 0/808, RunningAvgSamplesPerSec=11.982617439646585, CurrSamplesPerSec=11.897748546552213, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:53:49,349] [INFO] [timer.py:197:stop] 0/809, RunningAvgSamplesPerSec=11.982509903291962, CurrSamplesPerSec=11.89645881576911, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:53:55,848] [INFO] [logging.py:68:log_dist] [Rank 0] step=810, skipped=4, lr=[9.322222222222223e-06], mom=[[0.9, 0.999]] [2022-12-19 19:53:55,848] [INFO] [timer.py:197:stop] 0/810, RunningAvgSamplesPerSec=11.982403897411295, CurrSamplesPerSec=11.897464317750822, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:54:02,346] [INFO] [timer.py:197:stop] 0/811, RunningAvgSamplesPerSec=11.98241782785198, CurrSamplesPerSec=11.993684220214812, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:54:08,790] [INFO] [timer.py:197:stop] 0/812, RunningAvgSamplesPerSec=11.982416056155053, CurrSamplesPerSec=11.980982924980388, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:54:15,252] [INFO] [timer.py:197:stop] 0/813, RunningAvgSamplesPerSec=11.982407782667156, CurrSamplesPerSec=11.975710008029, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:54:21,817] [INFO] [timer.py:197:stop] 0/814, RunningAvgSamplesPerSec=11.98223398583903, CurrSamplesPerSec=11.842925490352137, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:54:28,304] [INFO] [timer.py:197:stop] 0/815, RunningAvgSamplesPerSec=11.982213867109767, CurrSamplesPerSec=11.965899728841453, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:54:34,837] [INFO] [timer.py:197:stop] 0/816, RunningAvgSamplesPerSec=11.982188269993504, CurrSamplesPerSec=11.961413939346976, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:54:41,386] [INFO] [timer.py:197:stop] 0/817, RunningAvgSamplesPerSec=11.982076509046031, CurrSamplesPerSec=11.891789440706084, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:54:47,951] [INFO] [timer.py:197:stop] 0/818, RunningAvgSamplesPerSec=11.982062734376463, CurrSamplesPerSec=11.970846900024835, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:54:54,436] [INFO] [timer.py:197:stop] 0/819, RunningAvgSamplesPerSec=11.9820604273514, CurrSamplesPerSec=11.980178190984786, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:55:00,920] [INFO] [logging.py:68:log_dist] [Rank 0] step=820, skipped=4, lr=[9.3e-06], mom=[[0.9, 0.999]] [2022-12-19 19:55:00,921] [INFO] [timer.py:197:stop] 0/820, RunningAvgSamplesPerSec=11.98193607279563, CurrSamplesPerSec=11.881193665008846, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:55:07,507] [INFO] [timer.py:197:stop] 0/821, RunningAvgSamplesPerSec=11.981755749473937, CurrSamplesPerSec=11.836047247104597, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:55:14,003] [INFO] [timer.py:197:stop] 0/822, RunningAvgSamplesPerSec=11.981649637780032, CurrSamplesPerSec=11.89537072405942, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:55:20,543] [INFO] [timer.py:197:stop] 0/823, RunningAvgSamplesPerSec=11.981526332425174, CurrSamplesPerSec=11.881263080554193, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:55:27,044] [INFO] [timer.py:197:stop] 0/824, RunningAvgSamplesPerSec=11.98140995646887, CurrSamplesPerSec=11.886622094178941, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:55:33,500] [INFO] [timer.py:197:stop] 0/825, RunningAvgSamplesPerSec=11.98141219335818, CurrSamplesPerSec=11.9832511989369, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0025, 'learning_rate': 9.28888888888889e-06, 'epoch': 21.71} [2022-12-19 19:55:39,977] [INFO] [timer.py:197:stop] 0/826, RunningAvgSamplesPerSec=11.981307253376455, CurrSamplesPerSec=11.895560493166274, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:55:46,499] [INFO] [timer.py:197:stop] 0/827, RunningAvgSamplesPerSec=11.981128459132593, CurrSamplesPerSec=11.835593747787183, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:55:53,075] [INFO] [timer.py:197:stop] 0/828, RunningAvgSamplesPerSec=11.98085174090832, CurrSamplesPerSec=11.75683300840612, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:55:59,551] [INFO] [timer.py:197:stop] 0/829, RunningAvgSamplesPerSec=11.980852009453596, CurrSamplesPerSec=11.981073831961657, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:56:06,147] [INFO] [logging.py:68:log_dist] [Rank 0] step=830, skipped=4, lr=[9.277777777777778e-06], mom=[[0.9, 0.999]] [2022-12-19 19:56:06,147] [INFO] [timer.py:197:stop] 0/830, RunningAvgSamplesPerSec=11.980727265209744, CurrSamplesPerSec=11.878445566424796, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:56:12,645] [INFO] [timer.py:197:stop] 0/831, RunningAvgSamplesPerSec=11.980627212830624, CurrSamplesPerSec=11.898353434861107, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:56:19,116] [INFO] [timer.py:197:stop] 0/832, RunningAvgSamplesPerSec=11.980636256685223, CurrSamplesPerSec=11.988138312517263, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:56:25,638] [INFO] [timer.py:197:stop] 0/833, RunningAvgSamplesPerSec=11.980506246720328, CurrSamplesPerSec=11.873562378482356, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:56:32,059] [INFO] [timer.py:197:stop] 0/834, RunningAvgSamplesPerSec=11.98053467038546, CurrSamplesPerSec=12.004201452191309, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:56:38,562] [INFO] [timer.py:197:stop] 0/835, RunningAvgSamplesPerSec=11.980536521172313, CurrSamplesPerSec=11.982076574014389, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:56:43,237] [INFO] [timer.py:197:stop] 0/836, RunningAvgSamplesPerSec=11.984386151369149, CurrSamplesPerSec=16.36457006948476, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:56:49,764] [INFO] [timer.py:197:stop] 0/837, RunningAvgSamplesPerSec=11.984250901143252, CurrSamplesPerSec=11.872505251142632, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:56:56,306] [INFO] [timer.py:197:stop] 0/838, RunningAvgSamplesPerSec=11.98411650038727, CurrSamplesPerSec=11.872934276154512, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:57:02,732] [INFO] [timer.py:197:stop] 0/839, RunningAvgSamplesPerSec=11.984133208564284, CurrSamplesPerSec=11.998117563429098, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:57:09,241] [INFO] [logging.py:68:log_dist] [Rank 0] step=840, skipped=4, lr=[9.255555555555556e-06], mom=[[0.9, 0.999]] [2022-12-19 19:57:09,242] [INFO] [timer.py:197:stop] 0/840, RunningAvgSamplesPerSec=11.984034163733051, CurrSamplesPerSec=11.901703847041274, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:57:15,751] [INFO] [timer.py:197:stop] 0/841, RunningAvgSamplesPerSec=11.983931751241053, CurrSamplesPerSec=11.898721038708024, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:57:22,309] [INFO] [timer.py:197:stop] 0/842, RunningAvgSamplesPerSec=11.983558829090255, CurrSamplesPerSec=11.678647634609634, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:57:28,800] [INFO] [timer.py:197:stop] 0/843, RunningAvgSamplesPerSec=11.983552059522403, CurrSamplesPerSec=11.977868322785367, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:57:35,334] [INFO] [timer.py:197:stop] 0/844, RunningAvgSamplesPerSec=11.983488350892907, CurrSamplesPerSec=11.930148164513145, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:57:41,871] [INFO] [timer.py:197:stop] 0/845, RunningAvgSamplesPerSec=11.983381822856135, CurrSamplesPerSec=11.894352399949097, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:57:48,338] [INFO] [timer.py:197:stop] 0/846, RunningAvgSamplesPerSec=11.983375867228439, CurrSamplesPerSec=11.97835737813754, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:57:54,888] [INFO] [timer.py:197:stop] 0/847, RunningAvgSamplesPerSec=11.983265309483414, CurrSamplesPerSec=11.890676393998765, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:58:01,387] [INFO] [timer.py:197:stop] 0/848, RunningAvgSamplesPerSec=11.983212373788698, CurrSamplesPerSec=11.938648256536025, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:58:07,992] [INFO] [timer.py:197:stop] 0/849, RunningAvgSamplesPerSec=11.982958740630838, CurrSamplesPerSec=11.772164158073844, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:58:14,613] [INFO] [logging.py:68:log_dist] [Rank 0] step=850, skipped=4, lr=[9.233333333333334e-06], mom=[[0.9, 0.999]] [2022-12-19 19:58:14,613] [INFO] [timer.py:197:stop] 0/850, RunningAvgSamplesPerSec=11.98289203487784, CurrSamplesPerSec=11.92665772196626, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0021, 'learning_rate': 9.233333333333334e-06, 'epoch': 22.37} [2022-12-19 19:58:21,447] [INFO] [timer.py:197:stop] 0/851, RunningAvgSamplesPerSec=11.982796362029921, CurrSamplesPerSec=11.902212033666396, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:58:27,996] [INFO] [timer.py:197:stop] 0/852, RunningAvgSamplesPerSec=11.982451781422256, CurrSamplesPerSec=11.696883153157295, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:58:34,556] [INFO] [timer.py:197:stop] 0/853, RunningAvgSamplesPerSec=11.98233031250701, CurrSamplesPerSec=11.87996482861918, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:58:41,100] [INFO] [timer.py:197:stop] 0/854, RunningAvgSamplesPerSec=11.982188779835464, CurrSamplesPerSec=11.862944521588084, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:58:47,659] [INFO] [timer.py:197:stop] 0/855, RunningAvgSamplesPerSec=11.982008104786834, CurrSamplesPerSec=11.830027775298824, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:58:54,323] [INFO] [timer.py:197:stop] 0/856, RunningAvgSamplesPerSec=11.981849722007539, CurrSamplesPerSec=11.848257289324078, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:59:00,937] [INFO] [timer.py:197:stop] 0/857, RunningAvgSamplesPerSec=11.981705057784435, CurrSamplesPerSec=11.859424125173737, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:59:07,431] [INFO] [timer.py:197:stop] 0/858, RunningAvgSamplesPerSec=11.981701116487608, CurrSamplesPerSec=11.97833225628853, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:59:14,029] [INFO] [timer.py:197:stop] 0/859, RunningAvgSamplesPerSec=11.98145374060705, CurrSamplesPerSec=11.773381637003185, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:59:20,513] [INFO] [logging.py:68:log_dist] [Rank 0] step=860, skipped=4, lr=[9.211111111111111e-06], mom=[[0.9, 0.999]] [2022-12-19 19:59:20,514] [INFO] [timer.py:197:stop] 0/860, RunningAvgSamplesPerSec=11.981369267363489, CurrSamplesPerSec=11.909410989709924, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:59:27,015] [INFO] [timer.py:197:stop] 0/861, RunningAvgSamplesPerSec=11.981316968692994, CurrSamplesPerSec=11.93661233172, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:59:33,610] [INFO] [timer.py:197:stop] 0/862, RunningAvgSamplesPerSec=11.981154136555933, CurrSamplesPerSec=11.842897275950751, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:59:40,125] [INFO] [timer.py:197:stop] 0/863, RunningAvgSamplesPerSec=11.981118032618161, CurrSamplesPerSec=11.950148996538209, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:59:46,675] [INFO] [timer.py:197:stop] 0/864, RunningAvgSamplesPerSec=11.980999003889433, CurrSamplesPerSec=11.87938546550021, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:59:53,310] [INFO] [timer.py:197:stop] 0/865, RunningAvgSamplesPerSec=11.980747946956448, CurrSamplesPerSec=11.768180972470478, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 19:59:59,871] [INFO] [timer.py:197:stop] 0/866, RunningAvgSamplesPerSec=11.980606286580622, CurrSamplesPerSec=11.85958969472394, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:00:06,367] [INFO] [timer.py:197:stop] 0/867, RunningAvgSamplesPerSec=11.980539400299683, CurrSamplesPerSec=11.923027391580408, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:00:12,998] [INFO] [timer.py:197:stop] 0/868, RunningAvgSamplesPerSec=11.98032799892162, CurrSamplesPerSec=11.80021809936344, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:00:19,529] [INFO] [timer.py:197:stop] 0/869, RunningAvgSamplesPerSec=11.980153125016635, CurrSamplesPerSec=11.830604940925745, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:00:26,052] [INFO] [logging.py:68:log_dist] [Rank 0] step=870, skipped=4, lr=[9.188888888888889e-06], mom=[[0.9, 0.999]] [2022-12-19 20:00:26,053] [INFO] [timer.py:197:stop] 0/870, RunningAvgSamplesPerSec=11.98001204672134, CurrSamplesPerSec=11.858934778907349, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:00:32,533] [INFO] [timer.py:197:stop] 0/871, RunningAvgSamplesPerSec=11.980017343293701, CurrSamplesPerSec=11.98461653511299, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:00:39,010] [INFO] [timer.py:197:stop] 0/872, RunningAvgSamplesPerSec=11.97996003318681, CurrSamplesPerSec=11.930363965815399, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:00:45,544] [INFO] [timer.py:197:stop] 0/873, RunningAvgSamplesPerSec=11.97985772165359, CurrSamplesPerSec=11.891503916191386, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:00:50,179] [INFO] [timer.py:197:stop] 0/874, RunningAvgSamplesPerSec=11.983552615117333, CurrSamplesPerSec=16.385268098141385, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:00:56,681] [INFO] [timer.py:197:stop] 0/875, RunningAvgSamplesPerSec=11.9834597081003, CurrSamplesPerSec=11.90298943735917, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0023, 'learning_rate': 9.17777777777778e-06, 'epoch': 23.03} [2022-12-19 20:01:03,240] [INFO] [timer.py:197:stop] 0/876, RunningAvgSamplesPerSec=11.98322962488529, CurrSamplesPerSec=11.7856820593804, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:01:09,780] [INFO] [timer.py:197:stop] 0/877, RunningAvgSamplesPerSec=11.983021345565856, CurrSamplesPerSec=11.803712252072884, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:01:16,399] [INFO] [timer.py:197:stop] 0/878, RunningAvgSamplesPerSec=11.982866799878284, CurrSamplesPerSec=11.84915005054646, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:01:22,908] [INFO] [timer.py:197:stop] 0/879, RunningAvgSamplesPerSec=11.982831265054113, CurrSamplesPerSec=11.951783505743089, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:01:29,435] [INFO] [logging.py:68:log_dist] [Rank 0] step=880, skipped=4, lr=[9.166666666666666e-06], mom=[[0.9, 0.999]] [2022-12-19 20:01:29,436] [INFO] [timer.py:197:stop] 0/880, RunningAvgSamplesPerSec=11.982790429441541, CurrSamplesPerSec=11.947084433011256, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:01:36,043] [INFO] [timer.py:197:stop] 0/881, RunningAvgSamplesPerSec=11.98267979983293, CurrSamplesPerSec=11.886328922836121, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:01:42,555] [INFO] [timer.py:197:stop] 0/882, RunningAvgSamplesPerSec=11.98253675206111, CurrSamplesPerSec=11.85810497348226, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:01:49,021] [INFO] [timer.py:197:stop] 0/883, RunningAvgSamplesPerSec=11.9824280164329, CurrSamplesPerSec=11.887499589262312, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:01:55,487] [INFO] [timer.py:197:stop] 0/884, RunningAvgSamplesPerSec=11.982316050914847, CurrSamplesPerSec=11.88448074999409, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:02:02,029] [INFO] [timer.py:197:stop] 0/885, RunningAvgSamplesPerSec=11.982254726407474, CurrSamplesPerSec=11.928409843708701, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:02:08,489] [INFO] [timer.py:197:stop] 0/886, RunningAvgSamplesPerSec=11.98218034728463, CurrSamplesPerSec=11.916862010355025, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:02:15,010] [INFO] [timer.py:197:stop] 0/887, RunningAvgSamplesPerSec=11.982074813573476, CurrSamplesPerSec=11.889504575215854, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:02:21,559] [INFO] [timer.py:197:stop] 0/888, RunningAvgSamplesPerSec=11.98192834413386, CurrSamplesPerSec=11.853691773891462, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:02:28,069] [INFO] [timer.py:197:stop] 0/889, RunningAvgSamplesPerSec=11.981854329288753, CurrSamplesPerSec=11.916634529889821, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:02:34,532] [INFO] [logging.py:68:log_dist] [Rank 0] step=890, skipped=4, lr=[9.144444444444444e-06], mom=[[0.9, 0.999]] [2022-12-19 20:02:34,532] [INFO] [timer.py:197:stop] 0/890, RunningAvgSamplesPerSec=11.981746252846698, CurrSamplesPerSec=11.886644201042495, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:02:41,039] [INFO] [timer.py:197:stop] 0/891, RunningAvgSamplesPerSec=11.981651329527743, CurrSamplesPerSec=11.897948938656091, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:02:47,540] [INFO] [timer.py:197:stop] 0/892, RunningAvgSamplesPerSec=11.98154696408698, CurrSamplesPerSec=11.889479824680796, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:02:54,035] [INFO] [timer.py:197:stop] 0/893, RunningAvgSamplesPerSec=11.981458539146406, CurrSamplesPerSec=11.90327445804242, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:03:00,535] [INFO] [timer.py:197:stop] 0/894, RunningAvgSamplesPerSec=11.981410328686746, CurrSamplesPerSec=11.938608433877473, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:03:07,112] [INFO] [timer.py:197:stop] 0/895, RunningAvgSamplesPerSec=11.981271866167665, CurrSamplesPerSec=11.85902489103199, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:03:13,681] [INFO] [timer.py:197:stop] 0/896, RunningAvgSamplesPerSec=11.981114496504203, CurrSamplesPerSec=11.842214423941336, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:03:20,223] [INFO] [timer.py:197:stop] 0/897, RunningAvgSamplesPerSec=11.980949563091654, CurrSamplesPerSec=11.835293695421383, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:03:26,689] [INFO] [timer.py:197:stop] 0/898, RunningAvgSamplesPerSec=11.980904528365928, CurrSamplesPerSec=11.940733742129343, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:03:33,192] [INFO] [timer.py:197:stop] 0/899, RunningAvgSamplesPerSec=11.9807731842482, CurrSamplesPerSec=11.864234861021599, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:03:39,737] [INFO] [logging.py:68:log_dist] [Rank 0] step=900, skipped=4, lr=[9.122222222222223e-06], mom=[[0.9, 0.999]] [2022-12-19 20:03:39,738] [INFO] [timer.py:197:stop] 0/900, RunningAvgSamplesPerSec=11.980638461612315, CurrSamplesPerSec=11.861000367094544, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0016, 'learning_rate': 9.122222222222223e-06, 'epoch': 23.68} [2022-12-19 20:03:46,245] [INFO] [timer.py:197:stop] 0/901, RunningAvgSamplesPerSec=11.98049915506701, CurrSamplesPerSec=11.856696037118041, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:03:52,725] [INFO] [timer.py:197:stop] 0/902, RunningAvgSamplesPerSec=11.980498896458982, CurrSamplesPerSec=11.980266412359146, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:03:59,326] [INFO] [timer.py:197:stop] 0/903, RunningAvgSamplesPerSec=11.980391565922977, CurrSamplesPerSec=11.884567567571157, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:04:05,808] [INFO] [timer.py:197:stop] 0/904, RunningAvgSamplesPerSec=11.980325013931116, CurrSamplesPerSec=11.920660630001858, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:04:12,317] [INFO] [timer.py:197:stop] 0/905, RunningAvgSamplesPerSec=11.980207031909814, CurrSamplesPerSec=11.874725278419776, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:04:18,875] [INFO] [timer.py:197:stop] 0/906, RunningAvgSamplesPerSec=11.980032154817515, CurrSamplesPerSec=11.824174838651505, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:04:25,333] [INFO] [timer.py:197:stop] 0/907, RunningAvgSamplesPerSec=11.9800185586484, CurrSamplesPerSec=11.967740232689193, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:04:31,811] [INFO] [timer.py:197:stop] 0/908, RunningAvgSamplesPerSec=11.979888734385144, CurrSamplesPerSec=11.863540108650193, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:04:38,361] [INFO] [timer.py:197:stop] 0/909, RunningAvgSamplesPerSec=11.979805292224095, CurrSamplesPerSec=11.904681287829456, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:04:44,874] [INFO] [logging.py:68:log_dist] [Rank 0] step=910, skipped=4, lr=[9.100000000000001e-06], mom=[[0.9, 0.999]] [2022-12-19 20:04:44,875] [INFO] [timer.py:197:stop] 0/910, RunningAvgSamplesPerSec=11.97970836545113, CurrSamplesPerSec=11.892436926243004, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:04:51,363] [INFO] [timer.py:197:stop] 0/911, RunningAvgSamplesPerSec=11.979674537317225, CurrSamplesPerSec=11.949037232448587, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:04:56,007] [INFO] [timer.py:197:stop] 0/912, RunningAvgSamplesPerSec=11.983269197818773, CurrSamplesPerSec=16.47768060258604, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:05:02,511] [INFO] [timer.py:197:stop] 0/913, RunningAvgSamplesPerSec=11.983168218383554, CurrSamplesPerSec=11.891976988254015, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:05:09,018] [INFO] [timer.py:197:stop] 0/914, RunningAvgSamplesPerSec=11.983169905972785, CurrSamplesPerSec=11.984707497243635, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:05:15,599] [INFO] [timer.py:197:stop] 0/915, RunningAvgSamplesPerSec=11.98304526607897, CurrSamplesPerSec=11.870443003110184, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:05:22,071] [INFO] [timer.py:197:stop] 0/916, RunningAvgSamplesPerSec=11.983068807576172, CurrSamplesPerSec=12.004600857705775, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:05:28,579] [INFO] [timer.py:197:stop] 0/917, RunningAvgSamplesPerSec=11.982985495407744, CurrSamplesPerSec=11.907319528019647, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:05:35,150] [INFO] [timer.py:197:stop] 0/918, RunningAvgSamplesPerSec=11.982889907948238, CurrSamplesPerSec=11.896061829015457, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:05:41,595] [INFO] [timer.py:197:stop] 0/919, RunningAvgSamplesPerSec=11.982842926425958, CurrSamplesPerSec=11.939962022236086, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:05:48,116] [INFO] [logging.py:68:log_dist] [Rank 0] step=920, skipped=4, lr=[9.077777777777779e-06], mom=[[0.9, 0.999]] [2022-12-19 20:05:48,117] [INFO] [timer.py:197:stop] 0/920, RunningAvgSamplesPerSec=11.982764407919769, CurrSamplesPerSec=11.911193458867416, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:05:54,651] [INFO] [timer.py:197:stop] 0/921, RunningAvgSamplesPerSec=11.982658492465227, CurrSamplesPerSec=11.886211553084873, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:06:01,107] [INFO] [timer.py:197:stop] 0/922, RunningAvgSamplesPerSec=11.982668903127419, CurrSamplesPerSec=11.99224395507005, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:06:07,639] [INFO] [timer.py:197:stop] 0/923, RunningAvgSamplesPerSec=11.982588259962645, CurrSamplesPerSec=11.908853582194906, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:06:14,131] [INFO] [timer.py:197:stop] 0/924, RunningAvgSamplesPerSec=11.98250657628016, CurrSamplesPerSec=11.907745790608923, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:06:20,639] [INFO] [timer.py:197:stop] 0/925, RunningAvgSamplesPerSec=11.982540587864234, CurrSamplesPerSec=12.013981639833316, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0022, 'learning_rate': 9.066666666666667e-06, 'epoch': 24.34} [2022-12-19 20:06:27,055] [INFO] [timer.py:197:stop] 0/926, RunningAvgSamplesPerSec=11.982550243799029, CurrSamplesPerSec=11.991469312661055, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:06:33,597] [INFO] [timer.py:197:stop] 0/927, RunningAvgSamplesPerSec=11.982452504105975, CurrSamplesPerSec=11.89281733711839, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:06:40,097] [INFO] [timer.py:197:stop] 0/928, RunningAvgSamplesPerSec=11.982370871752023, CurrSamplesPerSec=11.907334317288633, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:06:46,630] [INFO] [timer.py:197:stop] 0/929, RunningAvgSamplesPerSec=11.98231029926522, CurrSamplesPerSec=11.926481796257018, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:06:53,156] [INFO] [logging.py:68:log_dist] [Rank 0] step=930, skipped=4, lr=[9.055555555555556e-06], mom=[[0.9, 0.999]] [2022-12-19 20:06:53,157] [INFO] [timer.py:197:stop] 0/930, RunningAvgSamplesPerSec=11.982241703113752, CurrSamplesPerSec=11.918989108397547, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:06:59,646] [INFO] [timer.py:197:stop] 0/931, RunningAvgSamplesPerSec=11.9821701781696, CurrSamplesPerSec=11.91616108095669, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:07:06,105] [INFO] [timer.py:197:stop] 0/932, RunningAvgSamplesPerSec=11.982116413439563, CurrSamplesPerSec=11.932376542997222, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:07:12,613] [INFO] [timer.py:197:stop] 0/933, RunningAvgSamplesPerSec=11.98197058748448, CurrSamplesPerSec=11.84787187980677, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:07:19,057] [INFO] [timer.py:197:stop] 0/934, RunningAvgSamplesPerSec=11.982000665016344, CurrSamplesPerSec=12.010068512775431, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:07:25,517] [INFO] [timer.py:197:stop] 0/935, RunningAvgSamplesPerSec=11.982020506595292, CurrSamplesPerSec=12.000541473024791, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:07:32,044] [INFO] [timer.py:197:stop] 0/936, RunningAvgSamplesPerSec=11.98205803557732, CurrSamplesPerSec=12.01717530716078, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:07:38,507] [INFO] [timer.py:197:stop] 0/937, RunningAvgSamplesPerSec=11.982042212754564, CurrSamplesPerSec=11.96728192097118, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:07:44,997] [INFO] [timer.py:197:stop] 0/938, RunningAvgSamplesPerSec=11.982078107109796, CurrSamplesPerSec=12.01573369775648, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:07:51,484] [INFO] [timer.py:197:stop] 0/939, RunningAvgSamplesPerSec=11.982010053807992, CurrSamplesPerSec=11.918649356715488, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:07:57,952] [INFO] [logging.py:68:log_dist] [Rank 0] step=940, skipped=4, lr=[9.033333333333334e-06], mom=[[0.9, 0.999]] [2022-12-19 20:07:57,953] [INFO] [timer.py:197:stop] 0/940, RunningAvgSamplesPerSec=11.982034470581441, CurrSamplesPerSec=12.004956801951309, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:08:04,393] [INFO] [timer.py:197:stop] 0/941, RunningAvgSamplesPerSec=11.982021466328323, CurrSamplesPerSec=11.969835895338058, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:08:10,963] [INFO] [timer.py:197:stop] 0/942, RunningAvgSamplesPerSec=11.981886755861986, CurrSamplesPerSec=11.85671646168461, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:08:17,478] [INFO] [timer.py:197:stop] 0/943, RunningAvgSamplesPerSec=11.981822838767812, CurrSamplesPerSec=11.922040861972283, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:08:23,989] [INFO] [timer.py:197:stop] 0/944, RunningAvgSamplesPerSec=11.981670732945945, CurrSamplesPerSec=11.840230577000028, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:08:30,465] [INFO] [timer.py:197:stop] 0/945, RunningAvgSamplesPerSec=11.981697213378585, CurrSamplesPerSec=12.00669387645563, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:08:36,893] [INFO] [timer.py:197:stop] 0/946, RunningAvgSamplesPerSec=11.981735401368988, CurrSamplesPerSec=12.017855350458216, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:08:43,343] [INFO] [timer.py:197:stop] 0/947, RunningAvgSamplesPerSec=11.981674702666513, CurrSamplesPerSec=11.924648132744139, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:08:49,857] [INFO] [timer.py:197:stop] 0/948, RunningAvgSamplesPerSec=11.981573502433035, CurrSamplesPerSec=11.886697363122433, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:08:56,286] [INFO] [timer.py:197:stop] 0/949, RunningAvgSamplesPerSec=11.981453036635726, CurrSamplesPerSec=11.868567231791992, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:09:00,953] [INFO] [logging.py:68:log_dist] [Rank 0] step=950, skipped=4, lr=[9.011111111111111e-06], mom=[[0.9, 0.999]] [2022-12-19 20:09:00,953] [INFO] [timer.py:197:stop] 0/950, RunningAvgSamplesPerSec=11.984961377845826, CurrSamplesPerSec=16.583480525231515, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0034, 'learning_rate': 9.011111111111111e-06, 'epoch': 25.0} [2022-12-19 20:09:07,400] [INFO] [timer.py:197:stop] 0/951, RunningAvgSamplesPerSec=11.98489328687906, CurrSamplesPerSec=11.920689216145462, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:09:13,962] [INFO] [timer.py:197:stop] 0/952, RunningAvgSamplesPerSec=11.984780237527145, CurrSamplesPerSec=11.878449245829165, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:09:20,510] [INFO] [timer.py:197:stop] 0/953, RunningAvgSamplesPerSec=11.98465349369228, CurrSamplesPerSec=11.865445759819316, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:09:26,989] [INFO] [timer.py:197:stop] 0/954, RunningAvgSamplesPerSec=11.984692836082711, CurrSamplesPerSec=12.022224742196753, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:09:33,470] [INFO] [timer.py:197:stop] 0/955, RunningAvgSamplesPerSec=11.98463364100865, CurrSamplesPerSec=11.928543950456334, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:09:39,921] [INFO] [timer.py:197:stop] 0/956, RunningAvgSamplesPerSec=11.984641677719958, CurrSamplesPerSec=11.992305566473584, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:09:46,450] [INFO] [timer.py:197:stop] 0/957, RunningAvgSamplesPerSec=11.984469362038348, CurrSamplesPerSec=11.822306891565008, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:09:52,970] [INFO] [timer.py:197:stop] 0/958, RunningAvgSamplesPerSec=11.984363497840585, CurrSamplesPerSec=11.884109816452387, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:09:59,489] [INFO] [timer.py:197:stop] 0/959, RunningAvgSamplesPerSec=11.984251686794812, CurrSamplesPerSec=11.87830627636451, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:10:05,991] [INFO] [logging.py:68:log_dist] [Rank 0] step=960, skipped=4, lr=[8.988888888888889e-06], mom=[[0.9, 0.999]] [2022-12-19 20:10:05,992] [INFO] [timer.py:197:stop] 0/960, RunningAvgSamplesPerSec=11.984115171781156, CurrSamplesPerSec=11.854880626653486, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:10:12,463] [INFO] [timer.py:197:stop] 0/961, RunningAvgSamplesPerSec=11.984064226610863, CurrSamplesPerSec=11.935456914890047, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:10:18,985] [INFO] [timer.py:197:stop] 0/962, RunningAvgSamplesPerSec=11.983996430335285, CurrSamplesPerSec=11.919330996464325, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:10:25,438] [INFO] [timer.py:197:stop] 0/963, RunningAvgSamplesPerSec=11.983990497879097, CurrSamplesPerSec=11.978298047982722, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:10:31,976] [INFO] [timer.py:197:stop] 0/964, RunningAvgSamplesPerSec=11.98389065684, CurrSamplesPerSec=11.888706290748539, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:10:38,445] [INFO] [timer.py:197:stop] 0/965, RunningAvgSamplesPerSec=11.983906035094055, CurrSamplesPerSec=11.998718219832808, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:10:44,942] [INFO] [timer.py:197:stop] 0/966, RunningAvgSamplesPerSec=11.983806121153947, CurrSamplesPerSec=11.888356153615007, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:10:51,498] [INFO] [timer.py:197:stop] 0/967, RunningAvgSamplesPerSec=11.983773364975226, CurrSamplesPerSec=11.952279480406327, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:10:57,944] [INFO] [timer.py:197:stop] 0/968, RunningAvgSamplesPerSec=11.983732493570201, CurrSamplesPerSec=11.944421103591708, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:11:04,380] [INFO] [timer.py:197:stop] 0/969, RunningAvgSamplesPerSec=11.98370986771487, CurrSamplesPerSec=11.961893123283378, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:11:10,897] [INFO] [logging.py:68:log_dist] [Rank 0] step=970, skipped=4, lr=[8.966666666666667e-06], mom=[[0.9, 0.999]] [2022-12-19 20:11:10,898] [INFO] [timer.py:197:stop] 0/970, RunningAvgSamplesPerSec=11.983570224433443, CurrSamplesPerSec=11.850041376962448, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:11:17,432] [INFO] [timer.py:197:stop] 0/971, RunningAvgSamplesPerSec=11.983510054057579, CurrSamplesPerSec=11.925547145492628, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:11:23,983] [INFO] [timer.py:197:stop] 0/972, RunningAvgSamplesPerSec=11.983431301211311, CurrSamplesPerSec=11.907603171475493, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:11:30,421] [INFO] [timer.py:197:stop] 0/973, RunningAvgSamplesPerSec=11.983445113993465, CurrSamplesPerSec=11.996858525352687, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:11:36,943] [INFO] [timer.py:197:stop] 0/974, RunningAvgSamplesPerSec=11.983394067382388, CurrSamplesPerSec=11.934032191134115, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:11:43,431] [INFO] [timer.py:197:stop] 0/975, RunningAvgSamplesPerSec=11.983338599644666, CurrSamplesPerSec=11.929665688613657, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.0036, 'learning_rate': 8.955555555555555e-06, 'epoch': 25.66} [2022-12-19 20:11:49,942] [INFO] [timer.py:197:stop] 0/976, RunningAvgSamplesPerSec=11.98324976326228, CurrSamplesPerSec=11.89743162439965, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:11:56,373] [INFO] [timer.py:197:stop] 0/977, RunningAvgSamplesPerSec=11.983282055763688, CurrSamplesPerSec=12.014817809999654, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:12:02,896] [INFO] [timer.py:197:stop] 0/978, RunningAvgSamplesPerSec=11.983203514993598, CurrSamplesPerSec=11.907113010280371, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:12:09,357] [INFO] [timer.py:197:stop] 0/979, RunningAvgSamplesPerSec=11.98314478188719, CurrSamplesPerSec=11.926094460052305, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:12:15,926] [INFO] [logging.py:68:log_dist] [Rank 0] step=980, skipped=4, lr=[8.944444444444446e-06], mom=[[0.9, 0.999]] [2022-12-19 20:12:15,927] [INFO] [timer.py:197:stop] 0/980, RunningAvgSamplesPerSec=11.982959135171967, CurrSamplesPerSec=11.80428944815244, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:12:22,406] [INFO] [timer.py:197:stop] 0/981, RunningAvgSamplesPerSec=11.982991942908281, CurrSamplesPerSec=12.01516414221157, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:12:28,925] [INFO] [timer.py:197:stop] 0/982, RunningAvgSamplesPerSec=11.982913239797575, CurrSamplesPerSec=11.906355663820282, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:12:35,483] [INFO] [timer.py:197:stop] 0/983, RunningAvgSamplesPerSec=11.982813430020077, CurrSamplesPerSec=11.885792619871138, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:12:42,393] [INFO] [timer.py:197:stop] 0/984, RunningAvgSamplesPerSec=11.982698869313275, CurrSamplesPerSec=11.871360111006437, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:12:49,463] [INFO] [timer.py:197:stop] 0/985, RunningAvgSamplesPerSec=11.98248540580213, CurrSamplesPerSec=11.776471900745703, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:12:56,451] [INFO] [timer.py:197:stop] 0/986, RunningAvgSamplesPerSec=11.982282268839205, CurrSamplesPerSec=11.785875071912223, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:13:03,148] [INFO] [timer.py:197:stop] 0/987, RunningAvgSamplesPerSec=11.982128577028242, CurrSamplesPerSec=11.832782723174759, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:13:08,211] [INFO] [timer.py:197:stop] 0/988, RunningAvgSamplesPerSec=11.985415190922605, CurrSamplesPerSec=16.42239476510482, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:13:14,766] [INFO] [timer.py:197:stop] 0/989, RunningAvgSamplesPerSec=11.985273780707011, CurrSamplesPerSec=11.847448324319458, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:13:21,353] [INFO] [logging.py:68:log_dist] [Rank 0] step=990, skipped=4, lr=[8.922222222222224e-06], mom=[[0.9, 0.999]] [2022-12-19 20:13:21,353] [INFO] [timer.py:197:stop] 0/990, RunningAvgSamplesPerSec=11.985189516140235, CurrSamplesPerSec=11.90259412571053, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:13:27,849] [INFO] [timer.py:197:stop] 0/991, RunningAvgSamplesPerSec=11.98518585698797, CurrSamplesPerSec=11.981571705836634, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:13:34,316] [INFO] [timer.py:197:stop] 0/992, RunningAvgSamplesPerSec=11.985165623087367, CurrSamplesPerSec=11.96518768577523, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:13:40,898] [INFO] [timer.py:197:stop] 0/993, RunningAvgSamplesPerSec=11.98501916329215, CurrSamplesPerSec=11.841758884755997, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:13:47,480] [INFO] [timer.py:197:stop] 0/994, RunningAvgSamplesPerSec=11.98490783023967, CurrSamplesPerSec=11.875584205771913, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:13:54,119] [INFO] [timer.py:197:stop] 0/995, RunningAvgSamplesPerSec=11.98478266961461, CurrSamplesPerSec=11.861897670620404, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:14:00,786] [INFO] [timer.py:197:stop] 0/996, RunningAvgSamplesPerSec=11.984706039513199, CurrSamplesPerSec=11.909092917607682, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:14:07,334] [INFO] [timer.py:197:stop] 0/997, RunningAvgSamplesPerSec=11.984605081457271, CurrSamplesPerSec=11.88508692102057, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:14:13,839] [INFO] [timer.py:197:stop] 0/998, RunningAvgSamplesPerSec=11.984535021173334, CurrSamplesPerSec=11.91522857593963, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:14:20,389] [INFO] [timer.py:197:stop] 0/999, RunningAvgSamplesPerSec=11.98435874770376, CurrSamplesPerSec=11.811327784854544, MemAllocated=1.52GB, MaxMemAllocated=26.06GB [2022-12-19 20:14:27,042] [INFO] [logging.py:68:log_dist] [Rank 0] step=1000, skipped=4, lr=[8.900000000000001e-06], mom=[[0.9, 0.999]] [2022-12-19 20:14:27,043] [INFO] [timer.py:197:stop] 0/1000, RunningAvgSamplesPerSec=11.984206885402875, CurrSamplesPerSec=11.83469102750876, MemAllocated=1.52GB, MaxMemAllocated=26.06GB {'loss': 0.003, 'learning_rate': 8.900000000000001e-06, 'epoch': 26.32} {'eval_loss': 0.366455078125, 'eval_wer': 19.037900874635568, 'eval_runtime': 167.6651, 'eval_samples_per_second': 7.199, 'eval_steps_per_second': 0.227, 'epoch': 26.32} [2022-12-19 20:17:16,832] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step1000 is begin to save! [2022-12-19 20:17:16,840] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: ./checkpoint-1000/global_step1000/mp_rank_00_model_states.pt [2022-12-19 20:17:16,841] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./checkpoint-1000/global_step1000/mp_rank_00_model_states.pt... [2022-12-19 20:17:18,775] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-1000/global_step1000/mp_rank_00_model_states.pt. [2022-12-19 20:17:18,776] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./checkpoint-1000/global_step1000/zero_pp_rank_0_mp_rank_00_optim_states.pt... [2022-12-19 20:17:26,330] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-1000/global_step1000/zero_pp_rank_0_mp_rank_00_optim_states.pt. [2022-12-19 20:17:26,330] [INFO] [engine.py:3269:_save_zero_checkpoint] zero checkpoint saved ./checkpoint-1000/global_step1000/zero_pp_rank_0_mp_rank_00_optim_states.pt [2022-12-19 20:17:26,330] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now!