NeMo
jiaqiz commited on
Commit
1270a84
1 Parent(s): 91adaf3

Update model_config.yaml

Browse files
Files changed (1) hide show
  1. model_config.yaml +7 -7
model_config.yaml CHANGED
@@ -80,7 +80,7 @@ reduce_amax: true
80
  use_emha: false
81
  optim:
82
  name: distributed_fused_adam
83
- lr: 3.002e-07
84
  weight_decay: 0.1
85
  betas:
86
  - 0.9
@@ -110,7 +110,7 @@ data:
110
  num_workers: 2
111
  dataloader_type: single
112
  train_ds:
113
- file_path: /dataset/daring-anteater_commercial.shuf.removelong.jsonl
114
  global_batch_size: 128
115
  micro_batch_size: 1
116
  shuffle: true
@@ -153,7 +153,7 @@ data:
153
  hf_dataset: true
154
  truncation_method: right
155
  validation_ds:
156
- file_path: /dataset/daring-anteater_commercial.shuf.removelong.jsonl
157
  names: null
158
  global_batch_size: 128
159
  micro_batch_size: 1
@@ -238,13 +238,13 @@ data:
238
  index_mapping_dir: /indexmap_dir
239
  data_prefix:
240
  train:
241
- - /datasets/v30_benign-walrus_clip153600.jsonl
242
  validation:
243
- - /datasets/v30_benign-walrus_clip153600.jsonl
244
  test:
245
- - /datasets/v30_benign-walrus_clip153600.jsonl
246
  answer_only_loss: true
247
- restore_from_path: /models/340B_100p_CT_100B
248
  save_nemo_on_validation_end: true
249
  use_flash_attention: null
250
  pipeline_model_parallel_split_rank: 0
 
80
  use_emha: false
81
  optim:
82
  name: distributed_fused_adam
83
+ lr: 3.001e-07
84
  weight_decay: 0.1
85
  betas:
86
  - 0.9
 
110
  num_workers: 2
111
  dataloader_type: single
112
  train_ds:
113
+ file_path: /dataset/train.jsonl
114
  global_batch_size: 128
115
  micro_batch_size: 1
116
  shuffle: true
 
153
  hf_dataset: true
154
  truncation_method: right
155
  validation_ds:
156
+ file_path: /dataset/val.jsonl
157
  names: null
158
  global_batch_size: 128
159
  micro_batch_size: 1
 
238
  index_mapping_dir: /indexmap_dir
239
  data_prefix:
240
  train:
241
+ - /datasets/train.jsonl
242
  validation:
243
+ - /datasets/val.jsonl
244
  test:
245
+ - /datasets/val.jsonl
246
  answer_only_loss: true
247
+ restore_from_path: /models/340B_base
248
  save_nemo_on_validation_end: true
249
  use_flash_attention: null
250
  pipeline_model_parallel_split_rank: 0