PathFinderKR
commited on
Commit
•
9ee8f14
1
Parent(s):
c2a7866
Update README.md
Browse files
README.md
CHANGED
@@ -155,7 +155,7 @@ The model is trained on the [MarkrAI/KoCommercial-Dataset](https://huggingface.c
|
|
155 |
|
156 |
### Training Procedure
|
157 |
|
158 |
-
The model training used LoRA for computational efficiency. 0.
|
159 |
|
160 |
#### Training Hyperparameters
|
161 |
|
@@ -173,8 +173,8 @@ bnb_4bit_use_double_quant=True
|
|
173 |
################################################################################
|
174 |
task_type="CAUSAL_LM"
|
175 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
|
176 |
-
r=
|
177 |
-
lora_alpha=
|
178 |
lora_dropout=0.1
|
179 |
bias="none"
|
180 |
|
@@ -182,19 +182,19 @@ bias="none"
|
|
182 |
# TrainingArguments parameters
|
183 |
################################################################################
|
184 |
num_train_epochs=1
|
185 |
-
per_device_train_batch_size=
|
186 |
gradient_accumulation_steps=1
|
187 |
gradient_checkpointing=True
|
188 |
learning_rate=2e-5
|
189 |
lr_scheduler_type="cosine"
|
190 |
warmup_ratio=0.1
|
191 |
-
optim = "
|
192 |
weight_decay=0.01
|
193 |
|
194 |
################################################################################
|
195 |
# SFT parameters
|
196 |
################################################################################
|
197 |
-
max_seq_length=
|
198 |
packing=False
|
199 |
```
|
200 |
|
|
|
155 |
|
156 |
### Training Procedure
|
157 |
|
158 |
+
The model training used LoRA for computational efficiency. 0.04 billion parameters(0.51% of total parameters) were trained.
|
159 |
|
160 |
#### Training Hyperparameters
|
161 |
|
|
|
173 |
################################################################################
|
174 |
task_type="CAUSAL_LM"
|
175 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
|
176 |
+
r=16
|
177 |
+
lora_alpha=32
|
178 |
lora_dropout=0.1
|
179 |
bias="none"
|
180 |
|
|
|
182 |
# TrainingArguments parameters
|
183 |
################################################################################
|
184 |
num_train_epochs=1
|
185 |
+
per_device_train_batch_size=1
|
186 |
gradient_accumulation_steps=1
|
187 |
gradient_checkpointing=True
|
188 |
learning_rate=2e-5
|
189 |
lr_scheduler_type="cosine"
|
190 |
warmup_ratio=0.1
|
191 |
+
optim = "paged_adamw_32bit"
|
192 |
weight_decay=0.01
|
193 |
|
194 |
################################################################################
|
195 |
# SFT parameters
|
196 |
################################################################################
|
197 |
+
max_seq_length=2048
|
198 |
packing=False
|
199 |
```
|
200 |
|