nroggendorff commited on
Commit
6a1ce4a
1 Parent(s): c62bc4a

Create config.txt

Browse files
Files changed (1) hide show
  1. config.txt +51 -0
config.txt ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from trl import SFTConfig
2
+
3
+ class Config:
4
+ def __init__(self):
5
+ # Model and training hyperparameters
6
+ self.BATCH_SIZE = 16
7
+ self.EPOCHS = 3
8
+ self.LEARNING_RATE = 2e-4
9
+ self.MAX_SEQ_LENGTH = 512
10
+ self.VOCAB_SIZE = 32000
11
+ self.FP16 = True
12
+ self.WEIGHT_DECAY = 1e-3
13
+ self.GRADIENT_ACCUMULATION_STEPS = self.BATCH_SIZE // 4
14
+
15
+ # Dataset configurations
16
+ self.INPUT_DATASET = "HuggingFaceTB/smollm-corpus"
17
+ self.INSTRUCT_DATASET = "nroggendorff/elephant"
18
+ self.SHARD_SIZE = int(2e+5)
19
+
20
+ # Output and repo settings
21
+ self.OUTPUT_REPO = "nroggendorff/smallama"
22
+ self.PUSH_TO_HUB = True
23
+ self.INSTRUCT_FINETUNE_BOOL = False
24
+
25
+ # Training steps and warmup
26
+ self.FACTOR = 12 ** 3 // 2
27
+ self.TOTAL_STEPS = (self.SHARD_SIZE * self.EPOCHS) // (self.BATCH_SIZE * self.GRADIENT_ACCUMULATION_STEPS)
28
+ self.WARMUP_STEPS = int(self.TOTAL_STEPS * 0.1)
29
+
30
+ # Initial state for shard offset
31
+ self.INIT = 0
32
+
33
+ # ignore
34
+ self.getConfig = lambda: self._args()
35
+
36
+ # @staticmethod
37
+ def _args(self):
38
+ return SFTConfig(
39
+ output_dir="model",
40
+ num_train_epochs=self.EPOCHS,
41
+ per_device_train_batch_size=self.BATCH_SIZE,
42
+ learning_rate=self.LEARNING_RATE,
43
+ warmup_steps=self.WARMUP_STEPS,
44
+ weight_decay=self.WEIGHT_DECAY,
45
+ gradient_accumulation_steps=self.GRADIENT_ACCUMULATION_STEPS,
46
+ fp16=self.FP16,
47
+ save_steps=int(self.WARMUP_STEPS * 5),
48
+ logging_steps=int(self.WARMUP_STEPS),
49
+ save_total_limit=2,
50
+ report_to="none",
51
+ )