config_name: "JackFram/llama-68m" | |
tokenizer_name: "JackFram/llama-68m" | |
validation_split_percentage: 2 | |
train_file: "/home/dshteyma/target_draft_coupling_code/dataset_dict.json" | |
dataset_name_local: "RedPajama" | |
dataset_name: "togethercomputer/RedPajama-Data-1T-Sample" | |
dataset_name_hub: "togethercomputer/RedPajama-Data-1T-Sample" | |
# max_train_samples: 1000 | |
# max_eval_samples: 10 | |
do_train: True | |
do_eval: True | |
output_dir: "/home/dshteyma/target_draft_coupling_code/target_draft_training/training_outputs" | |
overwrite_output_dir: True | |
per_device_train_batch_size: 4 | |
gradient_accumulation_steps: 3 | |
report_to: "tensorboard" | |
logging_dir: "/home/dshteyma/target_draft_coupling_code/target_draft_training/training_outputs" | |
logging_steps: 10000 | |
save_steps: 10000 | |
eval_strategy: "steps" | |
eval_steps: 10000 | |
learning_rate: 0.0001 | |
weight_decay: 0.01 | |
warmup_ratio: 0.05 | |
push_to_hub: False | |
hub_model_id: "DorinSht/llama_68M_redpajama" | |
hub_strategy: "all_checkpoints" |