HenryJJ commited on
Commit
ee7a7dd
1 Parent(s): 0a6d915

add config

Browse files
Files changed (1) hide show
  1. config/llama3-lima.yml +83 -0
config/llama3-lima.yml ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Mistral-7b
2
+ base_model: NousResearch/Meta-Llama-3-8B
3
+ model_type: AutoModelForCausalLM
4
+ tokenizer_type: AutoTokenizer
5
+
6
+ load_in_8bit: false
7
+ load_in_4bit: true
8
+ strict: false
9
+
10
+ datasets:
11
+ - path: 64bits/lima_vicuna_format
12
+ #for type,conversation arguments read axolotl readme and pick what is suited for your project, I wanted a chatbot and put sharegpt and chatml
13
+ type: sharegpt
14
+ conversation: chatml
15
+ chat_template: chatml
16
+ dataset_prepared_path: lima/data #Path to json dataset file in huggingface
17
+ val_set_size: 0.05
18
+ output_dir: ./llama3-lima-out
19
+
20
+ adapter: qlora
21
+ lora_model_dir:
22
+
23
+ sequence_len: 4096
24
+ sample_packing: true
25
+ pad_to_sequence_len: true
26
+ eval_sample_packing: false
27
+
28
+ lora_r: 32
29
+ lora_alpha: 16
30
+ lora_dropout: 0.05
31
+ lora_target_modules:
32
+ lora_target_linear: true
33
+ lora_fan_in_fan_out:
34
+ lora_modules_to_save:
35
+ - embed_tokens
36
+ - lm_head
37
+
38
+ wandb_project: llama3-lima
39
+ wandb_entity:
40
+ wandb_watch:
41
+ wandb_name:
42
+ wandb_log_model:
43
+
44
+ gradient_accumulation_steps: 2
45
+ micro_batch_size: 1
46
+ num_epochs: 3
47
+ optimizer: paged_adamw_32bit
48
+ lr_scheduler: cosine
49
+ learning_rate: 0.0002
50
+
51
+ train_on_inputs: false
52
+ group_by_length: false
53
+ bf16: auto
54
+ fp16:
55
+ tf32: false
56
+
57
+ gradient_checkpointing: true
58
+ gradient_checkpointing_kwargs:
59
+ use_reentrant: false
60
+ early_stopping_patience:
61
+ resume_from_checkpoint:
62
+ local_rank:
63
+ logging_steps: 2
64
+ xformers_attention:
65
+ flash_attention: true
66
+
67
+
68
+ warmup_steps: 10
69
+ evals_per_epoch: 4
70
+ eval_table_size:
71
+ eval_table_max_new_tokens:
72
+ saves_per_epoch: 1
73
+ debug: true
74
+ deepspeed:
75
+ weight_decay: 0.05
76
+ fsdp:
77
+ fsdp_config:
78
+ special_tokens:
79
+ eos_token: "<|im_end|>"
80
+ pad_token: "<|end_of_text|>"
81
+ tokens:
82
+ - "<|im_start|>"
83
+ - "<|im_end|>"