config_name: "JackFram/llama-68m" tokenizer_name: "JackFram/llama-68m" validation_split_percentage: 2 train_file: "/home/dshteyma/shareGPT_data/ShareGPT_V3_unfiltered_cleaned_split.json" dataset_name_hub: "anon8231489123/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json" dataset_name_local: "ShareGPT" # max_train_samples: 1000 # max_eval_samples: 10 do_train: True do_eval: True output_dir: "/home/dshteyma/target_draft_coupling_code/target_draft_training/training_outputs" overwrite_output_dir: True per_device_train_batch_size: 4 gradient_accumulation_steps: 1 report_to: "tensorboard" logging_dir: "/home/dshteyma/target_draft_coupling_code/target_draft_training/training_outputs" logging_steps: 500 save_steps: 1000 eval_strategy: "steps" eval_steps: 1000 learning_rate: 0.0001 gradient_accumulation_steps: 1 weight_decay: 0.01 warmup_ratio: 0.05 push_to_hub: True hub_model_id: "DorinSht/ShareGPT_llama2_68M" hub_strategy: "all_checkpoints"