Text Generation
Transformers
PyTorch
English
qwen2
conversational
Inference Endpoints
text-generation-inference
ehartford commited on
Commit
847add5
1 Parent(s): 668062b

Upload folder using huggingface_hub

Browse files
configs/Liberated-Qwen-1.5-72b.qlora.yml ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: /data/models/Qwen1.5-72B
2
+ model_type: AutoModelForCausalLM
3
+ tokenizer_type: AutoTokenizer
4
+
5
+ trust_remote_code: true
6
+
7
+ load_in_8bit: false
8
+ load_in_4bit: true
9
+ strict: false
10
+
11
+ datasets:
12
+ - path: /data/eric/datasets/m-a-p_Code-Feedback-sharegpt.jsonl
13
+ type: sharegpt
14
+ conversation: chatml
15
+ - path: /data/eric/datasets/m-a-p_CodeFeedback-Filtered-Instruction-sharegpt.jsonl
16
+ type: sharegpt
17
+ conversation: chatml
18
+ - path: /data/eric/datasets/openhermes2_5-sharegpt.jsonl
19
+ type: sharegpt
20
+ conversation: chatml
21
+ - path: /data/eric/datasets/gorilla_openfunctions_train_sharegpt.jsonl
22
+ type: sharegpt
23
+ conversation: chatml
24
+ - path: /data/eric/datasets/SystemConversations.jsonl
25
+ type: sharegpt
26
+ conversation: chatml
27
+ - path: /data/eric/datasets/identity_sharegpt.jsonl
28
+ type: sharegpt
29
+ conversation: chatml
30
+
31
+ chat_template: chatml
32
+
33
+ dataset_prepared_path: thingy
34
+ val_set_size: 0
35
+ output_dir: /data/eric/Liberated-Qwen-1.5-72b
36
+
37
+ sequence_len: 8192
38
+ sample_packing: true
39
+ pad_to_sequence_len: true
40
+
41
+ adapter: qlora
42
+ lora_model_dir:
43
+ lora_r: 32
44
+ lora_alpha: 16
45
+ lora_dropout: 0.05
46
+ lora_target_linear: true
47
+ lora_fan_in_fan_out:
48
+
49
+ wandb_project: AbacusLiberated
50
+ wandb_entity:
51
+ wandb_watch:
52
+ wandb_name:
53
+ wandb_log_model:
54
+
55
+ gradient_accumulation_steps: 8
56
+ micro_batch_size: 1
57
+ num_epochs: 3
58
+ optimizer: adamw_bnb_8bit
59
+ lr_scheduler: cosine
60
+ learning_rate: 0.0002
61
+
62
+ train_on_inputs: false
63
+ group_by_length: false
64
+ bf16: auto
65
+ fp16:
66
+ tf32: false
67
+
68
+ gradient_checkpointing: true
69
+ early_stopping_patience:
70
+ resume_from_checkpoint:
71
+ local_rank:
72
+ logging_steps: 1
73
+ xformers_attention:
74
+ flash_attention: true
75
+
76
+ warmup_steps: 10
77
+ evals_per_epoch: 4
78
+ eval_table_size:
79
+ eval_max_new_tokens: 128
80
+ saves_per_epoch: 4
81
+ debug:
82
+ deepspeed: deepspeed_configs/zero2.json
83
+ weight_decay: 0.0
84
+ fsdp:
85
+ fsdp_config:
86
+
87
+ special_tokens:
88
+ eos_token: "<|im_end|>"