{"mode": "rl", "wandb": "True", "batch_size": "1", "context_length": "2048", "rl_save_steps": "100", "flash_atten": "False", "load_in_8bit": "False", "model": "./output/merged-mathy-mammoth-13b-highlora/", "tokenizer": "./output/merged-mathy-mammoth-13b-highlora/", "tokenizer_init_kwargs": "{\"use_fast\": false}", "dataset": "approach0/MATH-full", "dataset_key": "train", "dataset_shuffle": "True", "collate_fn": "collate_generalist_infer", "collate__query_key": "input", "collate_add_eos": "False", "stop_fn": "stop_on_common_stop_and_boxed_tokens", "mcts_fn": "mcts_generalist_infer", "reward_fn": "reward_by_answer", "reward_args": "{\"sol_key\": \"output\"}", "step_fn": "rl_step_default", "log_fn": "log_rl_default", "log_columns": "[\"src_path\", \"input\", \"output\"]", "decode_kwargs": "{\n\"skip_special_tokens\": false,\n\"spaces_between_special_tokens\": true,\n\"clean_up_tokenization_spaces\": true\n}", "rl_respond_kwargs": "{\n\"batch_size\": 1,\n\"min_length\": -1,\n\"max_length\": 2048,\n\"top_k\": 0.0,\n\"top_p\": 1.0,\n\"do_sample\": true,\n\"temperature\": 0.4\n}", "peft": "{\n\"peft_attach_new\": true,\n\"peft_lora_rank\": 16,\n\"peft_lora_alpha\": 16,\n\"peft_lora_targets\": [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\", \"gate_proj\", \"down_proj\", \"up_proj\"]\n}", "trainer": "{\n\"lr\": 1e-5,\n\"warmup_steps\": 100,\n\"training_steps\": 0,\n\"batch_size\": 1,\n\"log_with\": \"wandb\",\n\"use_score_scaling\": true,\n\"use_score_norm\": false,\n\"score_clip\": 1.0,\n\"early_stopping\": true,\n\"optimize_cuda_cache\": true,\n\"seed\": 70\n}", "local_rank": "0", "run": "__output_merged-mathy-mammoth-13b-highlora___dense_reward_and_logs", "7b_vicuna_v1_5": "lmsys/vicuna-7b-v1.5", "7b_vicuna_v1_5_32k": "lmsys/longchat-7b-v1.5-32k", "test_lora_repo": "Lajonbot/vicuna-7b-v1.5-PL-lora_adapter_model", "13b_mathy_fft": "approach0/mathy-vicuna-13B-FFT", "7b_wizardmath": "WizardLM/WizardMath-7B-V1.0", "13b_wizardmath": "WizardLM/WizardMath-13B-V1.0", "70b_wizardmath": "WizardLM/WizardMath-70B-V1.0", "7b_mammoth": "TIGER-Lab/MAmmoTH-7B", "13b_mammoth": "TIGER-Lab/MAmmoTH-13B", "34b_mammoth_code": "TIGER-Lab/MAmmoTH-Coder-13B", "70b_mammoth": "TIGER-Lab/MAmmoTH-70B", "7b_tora": "llm-agents/tora-7b-v1.0", "13b_tora": "llm-agents/tora-13b-v1.0", "7b_llemma": "EleutherAI/llemma_7b", "7b_metamath": "meta-math/MetaMath-7B-V1.0", "13b_metamath": "meta-math/MetaMath-13B-V1.0", "7b_abel": "GAIR/GAIRMath-Abel-7b", "13b_abel": "GAIR/GAIRMath-Abel-13b", "seed": "70", "output_dir": "./output", "add_sys_paths": "[\"../Progressive-Hint\", \"../math/modeling\"]"}