{ "experiment_key": "base", "save_safetensors": true, "max_shard_size": "10GB", "local_rank": 0, "use_gradient_checkpointing": true, "trainer_key": "lm", "force_fp32": false, "force_fp16": false, "from_gptq": false, "huggingface_hub_token": null, "single_gpu": null, "master_port": 9994, "deepspeed_stage": null, "deepspeed_config_path": null, "fsdp_strategy": "", "fsdp_offload": true, "seed": 42, "stabilize": false, "norm_fp32": false, "path_to_env_file": "./.env", "prepare_dataset": true, "lora_hub_model_id": null, "lora_model_local_path": null, "fused_model_local_path": null, "fuse_after_training": false, "quantization_dataset_id": null, "quantization_max_samples": 1024, "quantized_model_path": "./quantized_model/", "quantized_hub_model_id": null, "quantized_hub_private_repo": true, "dataset_key": "soda", "train_local_path_to_data": "./train.jsonl", "eval_local_path_to_data": null, "shuffle": true, "max_eval_samples": 1000, "add_eval_to_train_if_no_path": false, "tokenizer_name_or_path": null, "tokenizer_use_fast": null, "tokenizer_padding_side": null, "collator_key": "lm", "max_length": 2048, "model_name_or_path": "deepseek-ai/deepseek-llm-7b-chat", "push_to_hub_bos_add_bos_token": false, "use_flash_attention_2": false, "trust_remote_code": false, "device_map": null, "prepare_model_for_kbit_training": true, "offload_folder": null, "load_in_8bit": false, "load_in_4bit": true, "llm_int8_threshold": 6.0, "llm_int8_has_fp16_weight": true, "bnb_4bit_use_double_quant": true, "bnb_4bit_quant_type": "nf4", "bnb_quantize_after_model_init": false, "gptq_bits": 4, "gptq_group_size": 128, "gptq_disable_exllama": true, "apply_lora": true, "lora_rank": 8, "lora_alpha": 32, "lora_dropout": 0.1, "raw_lora_target_modules": "all", "output_dir": "./outputs/", "per_device_train_batch_size": 2, "do_eval": false, "per_device_eval_batch_size": null, "gradient_accumulation_steps": 2, "eval_accumulation_steps": null, "eval_delay": 0, "eval_steps": 1000, "warmup_steps": 3, "max_steps": 150, "num_train_epochs": 1, "learning_rate": 0.0002, "max_grad_norm": 1.0, "weight_decay": 0.001, "label_smoothing_factor": 0.0, "logging_steps": 1, "save_steps": 5, "save_total_limit": 1, "optim": "paged_adamw_8bit", "push_to_hub": true, "hub_model_id": "TachyHealthResearch/Thealth-deepseek-7b", "hub_private_repo": false, "neftune_noise_alpha": null, "project_name": null, "report_to_wandb": false, "wandb_api_key": null, "wandb_project": null, "wandb_entity": null }