File size: 6,032 Bytes
e70f10a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
{
"adaptive_mixing_args": null,
"async_checkpointing": false,
"async_eval_ngpus": -1,
"batch_size": 2,
"checkpoint_manifold_bucket": "genai_llm_fb",
"data": "",
"delete_manifold_checkpoints": true,
"disable_logging": false,
"disable_workers_print": false,
"dist": {
"global_rank": 0,
"world_size": 8
},
"do_sync_eval": true,
"dtype": "bf16",
"dump_dir": "/tmp/metaformers_dmp",
"dump_freq": 100,
"dump_profile_traces": false,
"enable_gil_watcher": false,
"enable_loss_tracker": false,
"eval": null,
"eval_freq": 100,
"exp_id": "",
"exp_name": "",
"finetuning_checkpoint_load_strict": false,
"finetuning_dir": "/tmp/metaformers_dmp/checkpoints/stable/llama_cinnamon_7b",
"fp32_reduce_scatter": true,
"gpu_check_level": 3,
"hive_data": null,
"instruct": {
"is_instruct_tuning": true,
"no_loss_prompt": true,
"no_loss_truncated": false,
"only_sft_last_response": false,
"smart_coalesce": false,
"space_around_response": false,
"wrap_seq_tokens_once": false
},
"instruct_data": "/tmp/metaformers_dmp/data/sft/anthropic_prompts_open_13K_desc_1122_redist_6cat:0.25,/tmp/metaformers_dmp/data/sft/anthropic_prompts_open_13K_no_desc_1122_redist_6cat:0.25,/tmp/metaformers_dmp/data/sft/anthropic_responses_open_13K_desc_1122_redist_6cat:0.25,/tmp/metaformers_dmp/data/sft/anthropic_responses_open_13K_no_desc_1122_redist_6cat:0.25",
"iter_batch_multi_hive_koski": null,
"iter_jsonl": {
"buffer_size": 64,
"same_data": false
},
"iter_multi": {
"buffer_size": 512,
"ignore_extra_chunks": true,
"max_precompute": 20,
"multiprocess": true
},
"iter_type": "multi",
"kd_args": {
"kd_logits": false,
"kd_model": null,
"kd_model_dir": "",
"reverse_kld_loss": false
},
"keep_eval_checkpoints": true,
"keep_n_last_checkpoints": 2,
"log_all_steps": false,
"log_freq": 1,
"log_updates": true,
"loss_rescaling": false,
"manifold_output_dir": "tree/checkpoints/mast/inan/2023-11-27/080608_VAx9Hcb0THuGhWcZP4I6OA",
"mixing_ratio": null,
"model": {
"alpha_depth": "disabled",
"custom_bwd": true,
"dim": 4096,
"dim_by_layer": "",
"dropout": 0,
"efficient_attn": "cutlass",
"ffn_dim": 512,
"ffn_dim_multiplier": 1.0,
"full_logging_n_layers": 4,
"head_prune": false,
"init": {
"coeff_std": null,
"depth_last": false,
"fixed_std": null,
"no_init": false,
"use_depth": "current",
"use_gaussian": true
},
"init_on_meta_device": false,
"layer_ckpt": "none",
"loss_parallel": false,
"max_length": 4096,
"multiple_of": 256,
"n_heads": 32,
"n_heads_by_layer": "",
"n_kv_heads": null,
"n_kv_heads_by_layer": "",
"n_layers": 32,
"non_linearity": "swiglu",
"norm_affine": true,
"norm_eps": 1e-05,
"norm_type": "rmsnorm",
"output_size": -1,
"parallel_impl": "FAIRSCALE",
"position_interpolation": 1.0,
"pre_norm": true,
"recompute_attn": true,
"recompute_fc1_out": true,
"recompute_fc3_out": true,
"rope_theta": 10000.0,
"sequence_parallel": false,
"use_rope": true,
"use_xpos": false,
"vocab_size": 32000,
"xpos_gamma": 0.8,
"xpos_scale_base": 4096,
"xpos_theta": 500000.0
},
"model_parallel_size": 1,
"no_final_ckpt": false,
"optim": {
"beta1": 0.9,
"beta2": 0.95,
"clip": 1.0,
"cosine_theta": 1.0,
"cycle_length": 1.0,
"epsilon": 1e-08,
"exp_factor": 0.5,
"fused": null,
"lr": 2e-06,
"lr_min_ratio": 0.1,
"scheduler": "cosine",
"use_deprecated_optim": false,
"use_sgd": false,
"warmup": 100,
"weight_decay": 0.1
},
"peft_args": null,
"periodic_gpu_check": true,
"profile_freq": -1,
"reshard_after_forward": true,
"restore_dataloader_position": false,
"rlhf": null,
"root_dump_dir": "/tmp/nobody/xldumps",
"secondary_hive_data": null,
"seq_len": 4096,
"snapshot_restore_dir": null,
"steps": 3000,
"stuck_threshold_sec": 1500,
"tb_upload_freq": 50,
"tokenizer": "tokenizer_final_32k.minus_inf_ws.model",
"tokenizer_dir": "/tmp/metaformers_dmp/tokenizer",
"torch_seed": -1,
"unlimited_steps": false,
"valid": {
"batch_size": 8,
"content_key": null,
"custom_preference_task_table1": "",
"custom_preference_task_table2": "",
"debug": false,
"hive_data": null,
"hive_tasks": [],
"hive_tasks_output_hive_data": null,
"instruct": {
"is_instruct_tuning": true,
"no_loss_prompt": true,
"no_loss_truncated": false,
"only_sft_last_response": false,
"smart_coalesce": false,
"space_around_response": false,
"wrap_seq_tokens_once": false
},
"iso_regression_model_path": "",
"majority_voting": 0,
"n_batches": 100,
"n_generations": 1,
"ppl_files_str": "",
"ppl_root_dir": "",
"prompt_path": "",
"random_fewshots": false,
"repetition_penalty": 1.0,
"rlhf_eval": false,
"seq_len": 2048,
"task_batch_size": 8,
"tasks_root_dir": "/tmp/metaformers_dmp/data/eval",
"tasks_str": "safetyllama_prompt,safetyllama_response",
"temperature": 1.0,
"top_k": 0,
"top_p": 0.0,
"use_llm_inference": true,
"use_relative_loss": true,
"use_sampling": false,
"write_eval": true,
"write_every_n_batches": 1
}
} |