Text Generation
Transformers
Safetensors
mistral
chat
conversational
text-generation-inference
Inference Endpoints
alpindale commited on
Commit
ec0d9b2
1 Parent(s): 73ca724

Create axolotl_config/config.yaml

Browse files
Files changed (1) hide show
  1. axolotl_config/config.yaml +77 -0
axolotl_config/config.yaml ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: mistralai/Mistral-Large-Instruct-2407
2
+ model_type: AutoModelForCausalLM
3
+ tokenizer_type: AutoTokenizer
4
+
5
+ load_in_8bit: false
6
+ load_in_4bit: false
7
+ strict: false
8
+
9
+ datasets:
10
+ - path: Doctor-Shotgun/C2-Stheno
11
+ type: sharegpt
12
+ conversation: mistral
13
+ - path: anthracite-org/kalo-opus-instruct-22k-no-refusal
14
+ type: sharegpt
15
+ conversation: mistral
16
+ - path: anthracite-org/nopm_claude_writing_fixed
17
+ type: sharegpt
18
+ conversation: mistral
19
+ chat_template: inst
20
+
21
+ dataset_prepared_path: /workspace/data/datasets/tokenized
22
+ val_set_size: 0.00
23
+ output_dir: /workspace/data/fft-out
24
+
25
+ sequence_len: 8192
26
+ sample_packing: true
27
+ pad_to_sequence_len: true
28
+
29
+ adapter:
30
+ lora_model_dir:
31
+ lora_r:
32
+ lora_alpha:
33
+ lora_dropout:
34
+ lora_target_linear:
35
+ lora_fan_in_fan_out:
36
+
37
+ wandb_project: 123b-magnum-fft
38
+ wandb_entity:
39
+ wandb_watch:
40
+ wandb_name: v1-attempt-1
41
+ wandb_log_model:
42
+
43
+ gradient_accumulation_steps: 8
44
+ micro_batch_size: 1
45
+ num_epochs: 2
46
+ optimizer: adamw_bnb_8bit
47
+ lr_scheduler: cosine
48
+ learning_rate: 0.000002
49
+
50
+ train_on_inputs: false
51
+ group_by_length: false
52
+ bf16: auto
53
+ fp16:
54
+ tf32: false
55
+
56
+ gradient_checkpointing: unsloth
57
+ #gradient_checkpointing_kwargs:
58
+ #use_reentrant: true
59
+ early_stopping_patience:
60
+ resume_from_checkpoint:
61
+ local_rank:
62
+ logging_steps: 1
63
+ xformers_attention:
64
+ flash_attention: true
65
+ s2_attention:
66
+
67
+ warmup_steps: 20
68
+ evals_per_epoch:
69
+ eval_table_size:
70
+ eval_max_new_tokens: 128
71
+ saves_per_epoch: 2
72
+ debug:
73
+ deepspeed: deepspeed_configs/zero3_bf16.json
74
+ weight_decay: 0.01
75
+ fsdp:
76
+ fsdp_config:
77
+ special_tokens: