PathFinderKR commited on
Commit
9ee8f14
1 Parent(s): c2a7866

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +6 -6
README.md CHANGED
@@ -155,7 +155,7 @@ The model is trained on the [MarkrAI/KoCommercial-Dataset](https://huggingface.c
155
 
156
  ### Training Procedure
157
 
158
- The model training used LoRA for computational efficiency. 0.02 billion parameters(0.25% of total parameters) were trained.
159
 
160
  #### Training Hyperparameters
161
 
@@ -173,8 +173,8 @@ bnb_4bit_use_double_quant=True
173
  ################################################################################
174
  task_type="CAUSAL_LM"
175
  target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
176
- r=8
177
- lora_alpha=16
178
  lora_dropout=0.1
179
  bias="none"
180
 
@@ -182,19 +182,19 @@ bias="none"
182
  # TrainingArguments parameters
183
  ################################################################################
184
  num_train_epochs=1
185
- per_device_train_batch_size=2
186
  gradient_accumulation_steps=1
187
  gradient_checkpointing=True
188
  learning_rate=2e-5
189
  lr_scheduler_type="cosine"
190
  warmup_ratio=0.1
191
- optim = "paged_adamw_8bit"
192
  weight_decay=0.01
193
 
194
  ################################################################################
195
  # SFT parameters
196
  ################################################################################
197
- max_seq_length=1024
198
  packing=False
199
  ```
200
 
 
155
 
156
  ### Training Procedure
157
 
158
+ The model training used LoRA for computational efficiency. 0.04 billion parameters(0.51% of total parameters) were trained.
159
 
160
  #### Training Hyperparameters
161
 
 
173
  ################################################################################
174
  task_type="CAUSAL_LM"
175
  target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
176
+ r=16
177
+ lora_alpha=32
178
  lora_dropout=0.1
179
  bias="none"
180
 
 
182
  # TrainingArguments parameters
183
  ################################################################################
184
  num_train_epochs=1
185
+ per_device_train_batch_size=1
186
  gradient_accumulation_steps=1
187
  gradient_checkpointing=True
188
  learning_rate=2e-5
189
  lr_scheduler_type="cosine"
190
  warmup_ratio=0.1
191
+ optim = "paged_adamw_32bit"
192
  weight_decay=0.01
193
 
194
  ################################################################################
195
  # SFT parameters
196
  ################################################################################
197
+ max_seq_length=2048
198
  packing=False
199
  ```
200