hlzhang109
commited on
Commit
•
a274810
1
Parent(s):
ebc1546
Upload folder using huggingface_hub
Browse files
models/down_tau=64_1b/config.yaml
ADDED
@@ -0,0 +1,789 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
method: full
|
2 |
+
score: rho
|
3 |
+
score_combination: max
|
4 |
+
fix_learner: false
|
5 |
+
granularity: sequence
|
6 |
+
select_random: false
|
7 |
+
select_frac: 1.0
|
8 |
+
just_score_reference: false
|
9 |
+
collect_learner_score: false
|
10 |
+
collect_reference_score: false
|
11 |
+
sft: false
|
12 |
+
sft_use_label: false
|
13 |
+
sft_dataset: null
|
14 |
+
reference_models: null
|
15 |
+
update_reference: false
|
16 |
+
learner_model: null
|
17 |
+
train_online: false
|
18 |
+
fix_reference: false
|
19 |
+
data_start_step: null
|
20 |
+
run_name: olmo_35013764_1
|
21 |
+
seed: 1
|
22 |
+
epoch: null
|
23 |
+
dry_run: false
|
24 |
+
model:
|
25 |
+
d_model: 2048
|
26 |
+
n_heads: 32
|
27 |
+
n_kv_heads: null
|
28 |
+
clip_qkv: null
|
29 |
+
n_layers: 24
|
30 |
+
mlp_ratio: 4
|
31 |
+
mlp_hidden_size: 8192
|
32 |
+
activation_type: gelu
|
33 |
+
block_type: sequential
|
34 |
+
block_group_size: 1
|
35 |
+
alibi: false
|
36 |
+
alibi_bias_max: 8.0
|
37 |
+
rope: true
|
38 |
+
rope_full_precision: true
|
39 |
+
flash_attention: false
|
40 |
+
attention_dropout: 0.0
|
41 |
+
multi_query_attention: false
|
42 |
+
attention_layer_norm: true
|
43 |
+
residual_dropout: 0.0
|
44 |
+
embedding_dropout: 0.0
|
45 |
+
layer_norm_type: default
|
46 |
+
layer_norm_with_affine: true
|
47 |
+
attention_layer_norm_with_affine: true
|
48 |
+
max_sequence_length: 512
|
49 |
+
include_bias: false
|
50 |
+
bias_for_layer_norm: false
|
51 |
+
scale_logits: false
|
52 |
+
vocab_size: 50280
|
53 |
+
embedding_size: 50304
|
54 |
+
weight_tying: false
|
55 |
+
eos_token_id: 0
|
56 |
+
pad_token_id: 1
|
57 |
+
init_device: meta
|
58 |
+
init_fn: mitchell
|
59 |
+
init_std: 0.02
|
60 |
+
init_cutoff_factor: null
|
61 |
+
precision: amp_bf16
|
62 |
+
optimizer:
|
63 |
+
name: adamw
|
64 |
+
learning_rate: 0.001
|
65 |
+
weight_decay: 0.0
|
66 |
+
betas:
|
67 |
+
- 0.9
|
68 |
+
- 0.95
|
69 |
+
eps: 1.0e-15
|
70 |
+
no_decay_norm_and_bias: null
|
71 |
+
decay_norm_and_bias: false
|
72 |
+
decay_embeddings: false
|
73 |
+
metrics_log_interval: 100
|
74 |
+
scheduler:
|
75 |
+
name: cosine_with_warmup
|
76 |
+
units: steps
|
77 |
+
t_warmup: 8000
|
78 |
+
t_max: null
|
79 |
+
alpha_f: 0.1
|
80 |
+
grad_clip_warmup_steps: null
|
81 |
+
grad_clip_warmup_factor: null
|
82 |
+
warmup_min_lr: null
|
83 |
+
data:
|
84 |
+
paths:
|
85 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-00-00001.npy
|
86 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-01-00000.npy
|
87 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-01-00001.npy
|
88 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-02-00000.npy
|
89 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-02-00001.npy
|
90 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-02-00002.npy
|
91 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-02-00003.npy
|
92 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-03-00000.npy
|
93 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-03-00001.npy
|
94 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-04-00000.npy
|
95 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-04-00001.npy
|
96 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-05-00000.npy
|
97 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-05-00001.npy
|
98 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-05-00002.npy
|
99 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-05-00003.npy
|
100 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-06-00000.npy
|
101 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-06-00001.npy
|
102 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-07-00000.npy
|
103 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-07-00001.npy
|
104 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-08-00000.npy
|
105 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-08-00001.npy
|
106 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-08-00002.npy
|
107 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-08-00003.npy
|
108 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-09-00000.npy
|
109 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-09-00001.npy
|
110 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-10-00000.npy
|
111 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-10-00001.npy
|
112 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-11-00000.npy
|
113 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-11-00001.npy
|
114 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-11-00002.npy
|
115 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-11-00003.npy
|
116 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-12-00000.npy
|
117 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-12-00001.npy
|
118 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-13-00000.npy
|
119 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-13-00001.npy
|
120 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-14-00000.npy
|
121 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-14-00001.npy
|
122 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-14-00002.npy
|
123 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-14-00003.npy
|
124 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-15-00000.npy
|
125 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-15-00001.npy
|
126 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-16-00000.npy
|
127 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-16-00001.npy
|
128 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-17-00000.npy
|
129 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-17-00001.npy
|
130 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-17-00002.npy
|
131 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-17-00003.npy
|
132 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-18-00000.npy
|
133 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-18-00001.npy
|
134 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-19-00000.npy
|
135 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-19-00001.npy
|
136 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-20-00000.npy
|
137 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-20-00001.npy
|
138 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-20-00002.npy
|
139 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-20-00003.npy
|
140 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-21-00000.npy
|
141 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-21-00001.npy
|
142 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-22-00000.npy
|
143 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-22-00001.npy
|
144 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-23-00000.npy
|
145 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-23-00001.npy
|
146 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-23-00002.npy
|
147 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-24-00000.npy
|
148 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-24-00001.npy
|
149 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-25-00000.npy
|
150 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-25-00001.npy
|
151 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-26-00000.npy
|
152 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-26-00001.npy
|
153 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-26-00002.npy
|
154 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-26-00003.npy
|
155 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-27-00000.npy
|
156 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-27-00001.npy
|
157 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-28-00000.npy
|
158 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-28-00001.npy
|
159 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-29-00000.npy
|
160 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-29-00001.npy
|
161 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-29-00002.npy
|
162 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-29-00003.npy
|
163 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-30-00000.npy
|
164 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-30-00001.npy
|
165 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-31-00000.npy
|
166 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-31-00001.npy
|
167 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-31-00002.npy
|
168 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-31-00003.npy
|
169 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-32-00000.npy
|
170 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-32-00001.npy
|
171 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-33-00000.npy
|
172 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-33-00001.npy
|
173 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-34-00000.npy
|
174 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-34-00001.npy
|
175 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-34-00002.npy
|
176 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-34-00003.npy
|
177 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-35-00000.npy
|
178 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-35-00001.npy
|
179 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-36-00000.npy
|
180 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-36-00001.npy
|
181 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-37-00000.npy
|
182 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-37-00001.npy
|
183 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-37-00002.npy
|
184 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-37-00003.npy
|
185 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-38-00000.npy
|
186 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-38-00001.npy
|
187 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-39-00000.npy
|
188 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-39-00001.npy
|
189 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-40-00000.npy
|
190 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-40-00001.npy
|
191 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-40-00002.npy
|
192 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-40-00003.npy
|
193 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-41-00000.npy
|
194 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-41-00001.npy
|
195 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-42-00000.npy
|
196 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-42-00001.npy
|
197 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-43-00000.npy
|
198 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-43-00001.npy
|
199 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-43-00002.npy
|
200 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-43-00003.npy
|
201 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-44-00000.npy
|
202 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-44-00001.npy
|
203 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-45-00000.npy
|
204 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-45-00001.npy
|
205 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-46-00000.npy
|
206 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-46-00001.npy
|
207 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-46-00002.npy
|
208 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-46-00003.npy
|
209 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-47-00000.npy
|
210 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-47-00001.npy
|
211 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-48-00000.npy
|
212 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-48-00001.npy
|
213 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-49-00000.npy
|
214 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-49-00001.npy
|
215 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-49-00002.npy
|
216 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-49-00003.npy
|
217 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-50-00000.npy
|
218 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-50-00001.npy
|
219 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-51-00000.npy
|
220 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-51-00001.npy
|
221 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-52-00000.npy
|
222 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-52-00001.npy
|
223 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-52-00002.npy
|
224 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-52-00003.npy
|
225 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-53-00000.npy
|
226 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-53-00001.npy
|
227 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-54-00000.npy
|
228 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-54-00001.npy
|
229 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-55-00000.npy
|
230 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-55-00001.npy
|
231 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-55-00002.npy
|
232 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-55-00003.npy
|
233 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-56-00000.npy
|
234 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-56-00001.npy
|
235 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-57-00000.npy
|
236 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-57-00001.npy
|
237 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-58-00000.npy
|
238 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-58-00001.npy
|
239 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-58-00002.npy
|
240 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-58-00003.npy
|
241 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-59-00000.npy
|
242 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-59-00001.npy
|
243 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-60-00000.npy
|
244 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-60-00001.npy
|
245 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-61-00000.npy
|
246 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-61-00001.npy
|
247 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-61-00002.npy
|
248 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-61-00003.npy
|
249 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-62-00000.npy
|
250 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-62-00001.npy
|
251 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-63-00000.npy
|
252 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-63-00001.npy
|
253 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-63-00002.npy
|
254 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4/part-63-00003.npy
|
255 |
+
datasets: null
|
256 |
+
label_mask_paths: null
|
257 |
+
pad_direction: right
|
258 |
+
generate_attention_mask: false
|
259 |
+
num_workers: 16
|
260 |
+
drop_last: true
|
261 |
+
pin_memory: true
|
262 |
+
prefetch_factor: 16
|
263 |
+
persistent_workers: true
|
264 |
+
timeout: 0
|
265 |
+
seed: null
|
266 |
+
extra_data_paths: null
|
267 |
+
extra_data_key: null
|
268 |
+
load_extra_data_to_ram: false
|
269 |
+
index_path: /n/holyscratch01/sham_lab/data-olmo/data/35006380_1/selected_indices.npy
|
270 |
+
restore_dataloader: true
|
271 |
+
fast_forward_batches: null
|
272 |
+
evaluators:
|
273 |
+
- label: books
|
274 |
+
type: lm
|
275 |
+
data:
|
276 |
+
paths: null
|
277 |
+
datasets:
|
278 |
+
books_val:
|
279 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/books_val/books_val.npy
|
280 |
+
label_mask_paths: null
|
281 |
+
pad_direction: right
|
282 |
+
generate_attention_mask: false
|
283 |
+
num_workers: 0
|
284 |
+
drop_last: true
|
285 |
+
pin_memory: false
|
286 |
+
prefetch_factor: null
|
287 |
+
persistent_workers: false
|
288 |
+
timeout: 0
|
289 |
+
seed: null
|
290 |
+
extra_data_paths: null
|
291 |
+
extra_data_key: null
|
292 |
+
load_extra_data_to_ram: false
|
293 |
+
index_path: null
|
294 |
+
device_eval_batch_size: null
|
295 |
+
subset_num_batches: null
|
296 |
+
sft_use_label: false
|
297 |
+
sft: false
|
298 |
+
- label: c4
|
299 |
+
type: lm
|
300 |
+
data:
|
301 |
+
paths: null
|
302 |
+
datasets:
|
303 |
+
c4_val:
|
304 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/c4_val/part-00-00000.npy
|
305 |
+
label_mask_paths: null
|
306 |
+
pad_direction: right
|
307 |
+
generate_attention_mask: false
|
308 |
+
num_workers: 0
|
309 |
+
drop_last: true
|
310 |
+
pin_memory: false
|
311 |
+
prefetch_factor: null
|
312 |
+
persistent_workers: false
|
313 |
+
timeout: 0
|
314 |
+
seed: null
|
315 |
+
extra_data_paths: null
|
316 |
+
extra_data_key: null
|
317 |
+
load_extra_data_to_ram: false
|
318 |
+
index_path: null
|
319 |
+
device_eval_batch_size: null
|
320 |
+
subset_num_batches: null
|
321 |
+
sft_use_label: false
|
322 |
+
sft: false
|
323 |
+
- label: other
|
324 |
+
type: lm
|
325 |
+
data:
|
326 |
+
paths: null
|
327 |
+
datasets:
|
328 |
+
wiki-en-simple_val:
|
329 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/wiki-en-simple_val/wiki-en-simple_val.npy
|
330 |
+
stack-code_val:
|
331 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/stack-code_val/part-00-00000.npy
|
332 |
+
cc_en_head_val:
|
333 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/cc_en_head_val/part-00-00000.npy
|
334 |
+
peS2o_val:
|
335 |
+
- /n/holylfs06/LABS/kempner_shared/Lab/data/dolma/preprocessed/eleuther-ai-gpt-neox-20b-pii-special/peS2o_val/part-01-00000.npy
|
336 |
+
label_mask_paths: null
|
337 |
+
pad_direction: right
|
338 |
+
generate_attention_mask: false
|
339 |
+
num_workers: 0
|
340 |
+
drop_last: true
|
341 |
+
pin_memory: false
|
342 |
+
prefetch_factor: null
|
343 |
+
persistent_workers: false
|
344 |
+
timeout: 0
|
345 |
+
seed: null
|
346 |
+
extra_data_paths: null
|
347 |
+
extra_data_key: null
|
348 |
+
load_extra_data_to_ram: false
|
349 |
+
index_path: null
|
350 |
+
device_eval_batch_size: null
|
351 |
+
subset_num_batches: null
|
352 |
+
sft_use_label: false
|
353 |
+
sft: false
|
354 |
+
- label: piqa_train
|
355 |
+
type: downstream
|
356 |
+
data:
|
357 |
+
paths: null
|
358 |
+
datasets: null
|
359 |
+
label_mask_paths: null
|
360 |
+
pad_direction: right
|
361 |
+
generate_attention_mask: false
|
362 |
+
num_workers: 0
|
363 |
+
drop_last: false
|
364 |
+
pin_memory: false
|
365 |
+
prefetch_factor: null
|
366 |
+
persistent_workers: false
|
367 |
+
timeout: 0
|
368 |
+
seed: null
|
369 |
+
extra_data_paths: null
|
370 |
+
extra_data_key: null
|
371 |
+
load_extra_data_to_ram: false
|
372 |
+
index_path: null
|
373 |
+
device_eval_batch_size: null
|
374 |
+
subset_num_batches: null
|
375 |
+
sft_use_label: false
|
376 |
+
sft: false
|
377 |
+
- label: openbook_qa_train
|
378 |
+
type: downstream
|
379 |
+
data:
|
380 |
+
paths: null
|
381 |
+
datasets: null
|
382 |
+
label_mask_paths: null
|
383 |
+
pad_direction: right
|
384 |
+
generate_attention_mask: false
|
385 |
+
num_workers: 0
|
386 |
+
drop_last: false
|
387 |
+
pin_memory: false
|
388 |
+
prefetch_factor: null
|
389 |
+
persistent_workers: false
|
390 |
+
timeout: 0
|
391 |
+
seed: null
|
392 |
+
extra_data_paths: null
|
393 |
+
extra_data_key: null
|
394 |
+
load_extra_data_to_ram: false
|
395 |
+
index_path: null
|
396 |
+
device_eval_batch_size: null
|
397 |
+
subset_num_batches: null
|
398 |
+
sft_use_label: false
|
399 |
+
sft: false
|
400 |
+
- label: hellaswag_train
|
401 |
+
type: downstream
|
402 |
+
data:
|
403 |
+
paths: null
|
404 |
+
datasets: null
|
405 |
+
label_mask_paths: null
|
406 |
+
pad_direction: right
|
407 |
+
generate_attention_mask: false
|
408 |
+
num_workers: 0
|
409 |
+
drop_last: false
|
410 |
+
pin_memory: false
|
411 |
+
prefetch_factor: null
|
412 |
+
persistent_workers: false
|
413 |
+
timeout: 0
|
414 |
+
seed: null
|
415 |
+
extra_data_paths: null
|
416 |
+
extra_data_key: null
|
417 |
+
load_extra_data_to_ram: false
|
418 |
+
index_path: null
|
419 |
+
device_eval_batch_size: null
|
420 |
+
subset_num_batches: null
|
421 |
+
sft_use_label: false
|
422 |
+
sft: false
|
423 |
+
- label: winogrande_train
|
424 |
+
type: downstream
|
425 |
+
data:
|
426 |
+
paths: null
|
427 |
+
datasets: null
|
428 |
+
label_mask_paths: null
|
429 |
+
pad_direction: right
|
430 |
+
generate_attention_mask: false
|
431 |
+
num_workers: 0
|
432 |
+
drop_last: false
|
433 |
+
pin_memory: false
|
434 |
+
prefetch_factor: null
|
435 |
+
persistent_workers: false
|
436 |
+
timeout: 0
|
437 |
+
seed: null
|
438 |
+
extra_data_paths: null
|
439 |
+
extra_data_key: null
|
440 |
+
load_extra_data_to_ram: false
|
441 |
+
index_path: null
|
442 |
+
device_eval_batch_size: null
|
443 |
+
subset_num_batches: null
|
444 |
+
sft_use_label: false
|
445 |
+
sft: false
|
446 |
+
- label: arc_easy_train
|
447 |
+
type: downstream
|
448 |
+
data:
|
449 |
+
paths: null
|
450 |
+
datasets: null
|
451 |
+
label_mask_paths: null
|
452 |
+
pad_direction: right
|
453 |
+
generate_attention_mask: false
|
454 |
+
num_workers: 0
|
455 |
+
drop_last: false
|
456 |
+
pin_memory: false
|
457 |
+
prefetch_factor: null
|
458 |
+
persistent_workers: false
|
459 |
+
timeout: 0
|
460 |
+
seed: null
|
461 |
+
extra_data_paths: null
|
462 |
+
extra_data_key: null
|
463 |
+
load_extra_data_to_ram: false
|
464 |
+
index_path: null
|
465 |
+
device_eval_batch_size: null
|
466 |
+
subset_num_batches: null
|
467 |
+
sft_use_label: false
|
468 |
+
sft: false
|
469 |
+
- label: arc_challenge_train
|
470 |
+
type: downstream
|
471 |
+
data:
|
472 |
+
paths: null
|
473 |
+
datasets: null
|
474 |
+
label_mask_paths: null
|
475 |
+
pad_direction: right
|
476 |
+
generate_attention_mask: false
|
477 |
+
num_workers: 0
|
478 |
+
drop_last: false
|
479 |
+
pin_memory: false
|
480 |
+
prefetch_factor: null
|
481 |
+
persistent_workers: false
|
482 |
+
timeout: 0
|
483 |
+
seed: null
|
484 |
+
extra_data_paths: null
|
485 |
+
extra_data_key: null
|
486 |
+
load_extra_data_to_ram: false
|
487 |
+
index_path: null
|
488 |
+
device_eval_batch_size: null
|
489 |
+
subset_num_batches: null
|
490 |
+
sft_use_label: false
|
491 |
+
sft: false
|
492 |
+
- label: boolq_train
|
493 |
+
type: downstream
|
494 |
+
data:
|
495 |
+
paths: null
|
496 |
+
datasets: null
|
497 |
+
label_mask_paths: null
|
498 |
+
pad_direction: right
|
499 |
+
generate_attention_mask: false
|
500 |
+
num_workers: 0
|
501 |
+
drop_last: false
|
502 |
+
pin_memory: false
|
503 |
+
prefetch_factor: null
|
504 |
+
persistent_workers: false
|
505 |
+
timeout: 0
|
506 |
+
seed: null
|
507 |
+
extra_data_paths: null
|
508 |
+
extra_data_key: null
|
509 |
+
load_extra_data_to_ram: false
|
510 |
+
index_path: null
|
511 |
+
device_eval_batch_size: null
|
512 |
+
subset_num_batches: null
|
513 |
+
sft_use_label: false
|
514 |
+
sft: false
|
515 |
+
- label: sciq_train
|
516 |
+
type: downstream
|
517 |
+
data:
|
518 |
+
paths: null
|
519 |
+
datasets: null
|
520 |
+
label_mask_paths: null
|
521 |
+
pad_direction: right
|
522 |
+
generate_attention_mask: false
|
523 |
+
num_workers: 0
|
524 |
+
drop_last: false
|
525 |
+
pin_memory: false
|
526 |
+
prefetch_factor: null
|
527 |
+
persistent_workers: false
|
528 |
+
timeout: 0
|
529 |
+
seed: null
|
530 |
+
extra_data_paths: null
|
531 |
+
extra_data_key: null
|
532 |
+
load_extra_data_to_ram: false
|
533 |
+
index_path: null
|
534 |
+
device_eval_batch_size: null
|
535 |
+
subset_num_batches: null
|
536 |
+
sft_use_label: false
|
537 |
+
sft: false
|
538 |
+
- label: piqa_test
|
539 |
+
type: downstream
|
540 |
+
data:
|
541 |
+
paths: null
|
542 |
+
datasets: null
|
543 |
+
label_mask_paths: null
|
544 |
+
pad_direction: right
|
545 |
+
generate_attention_mask: false
|
546 |
+
num_workers: 0
|
547 |
+
drop_last: false
|
548 |
+
pin_memory: false
|
549 |
+
prefetch_factor: null
|
550 |
+
persistent_workers: false
|
551 |
+
timeout: 0
|
552 |
+
seed: null
|
553 |
+
extra_data_paths: null
|
554 |
+
extra_data_key: null
|
555 |
+
load_extra_data_to_ram: false
|
556 |
+
index_path: null
|
557 |
+
device_eval_batch_size: null
|
558 |
+
subset_num_batches: 1000
|
559 |
+
sft_use_label: false
|
560 |
+
sft: false
|
561 |
+
- label: openbook_qa_test
|
562 |
+
type: downstream
|
563 |
+
data:
|
564 |
+
paths: null
|
565 |
+
datasets: null
|
566 |
+
label_mask_paths: null
|
567 |
+
pad_direction: right
|
568 |
+
generate_attention_mask: false
|
569 |
+
num_workers: 0
|
570 |
+
drop_last: false
|
571 |
+
pin_memory: false
|
572 |
+
prefetch_factor: null
|
573 |
+
persistent_workers: false
|
574 |
+
timeout: 0
|
575 |
+
seed: null
|
576 |
+
extra_data_paths: null
|
577 |
+
extra_data_key: null
|
578 |
+
load_extra_data_to_ram: false
|
579 |
+
index_path: null
|
580 |
+
device_eval_batch_size: null
|
581 |
+
subset_num_batches: 1000
|
582 |
+
sft_use_label: false
|
583 |
+
sft: false
|
584 |
+
- label: hellaswag_test
|
585 |
+
type: downstream
|
586 |
+
data:
|
587 |
+
paths: null
|
588 |
+
datasets: null
|
589 |
+
label_mask_paths: null
|
590 |
+
pad_direction: right
|
591 |
+
generate_attention_mask: false
|
592 |
+
num_workers: 0
|
593 |
+
drop_last: false
|
594 |
+
pin_memory: false
|
595 |
+
prefetch_factor: null
|
596 |
+
persistent_workers: false
|
597 |
+
timeout: 0
|
598 |
+
seed: null
|
599 |
+
extra_data_paths: null
|
600 |
+
extra_data_key: null
|
601 |
+
load_extra_data_to_ram: false
|
602 |
+
index_path: null
|
603 |
+
device_eval_batch_size: null
|
604 |
+
subset_num_batches: 1000
|
605 |
+
sft_use_label: false
|
606 |
+
sft: false
|
607 |
+
- label: winogrande_test
|
608 |
+
type: downstream
|
609 |
+
data:
|
610 |
+
paths: null
|
611 |
+
datasets: null
|
612 |
+
label_mask_paths: null
|
613 |
+
pad_direction: right
|
614 |
+
generate_attention_mask: false
|
615 |
+
num_workers: 0
|
616 |
+
drop_last: false
|
617 |
+
pin_memory: false
|
618 |
+
prefetch_factor: null
|
619 |
+
persistent_workers: false
|
620 |
+
timeout: 0
|
621 |
+
seed: null
|
622 |
+
extra_data_paths: null
|
623 |
+
extra_data_key: null
|
624 |
+
load_extra_data_to_ram: false
|
625 |
+
index_path: null
|
626 |
+
device_eval_batch_size: null
|
627 |
+
subset_num_batches: 1000
|
628 |
+
sft_use_label: false
|
629 |
+
sft: false
|
630 |
+
- label: arc_easy_test
|
631 |
+
type: downstream
|
632 |
+
data:
|
633 |
+
paths: null
|
634 |
+
datasets: null
|
635 |
+
label_mask_paths: null
|
636 |
+
pad_direction: right
|
637 |
+
generate_attention_mask: false
|
638 |
+
num_workers: 0
|
639 |
+
drop_last: false
|
640 |
+
pin_memory: false
|
641 |
+
prefetch_factor: null
|
642 |
+
persistent_workers: false
|
643 |
+
timeout: 0
|
644 |
+
seed: null
|
645 |
+
extra_data_paths: null
|
646 |
+
extra_data_key: null
|
647 |
+
load_extra_data_to_ram: false
|
648 |
+
index_path: null
|
649 |
+
device_eval_batch_size: null
|
650 |
+
subset_num_batches: 1000
|
651 |
+
sft_use_label: false
|
652 |
+
sft: false
|
653 |
+
- label: arc_challenge_test
|
654 |
+
type: downstream
|
655 |
+
data:
|
656 |
+
paths: null
|
657 |
+
datasets: null
|
658 |
+
label_mask_paths: null
|
659 |
+
pad_direction: right
|
660 |
+
generate_attention_mask: false
|
661 |
+
num_workers: 0
|
662 |
+
drop_last: false
|
663 |
+
pin_memory: false
|
664 |
+
prefetch_factor: null
|
665 |
+
persistent_workers: false
|
666 |
+
timeout: 0
|
667 |
+
seed: null
|
668 |
+
extra_data_paths: null
|
669 |
+
extra_data_key: null
|
670 |
+
load_extra_data_to_ram: false
|
671 |
+
index_path: null
|
672 |
+
device_eval_batch_size: null
|
673 |
+
subset_num_batches: 1000
|
674 |
+
sft_use_label: false
|
675 |
+
sft: false
|
676 |
+
- label: boolq_test
|
677 |
+
type: downstream
|
678 |
+
data:
|
679 |
+
paths: null
|
680 |
+
datasets: null
|
681 |
+
label_mask_paths: null
|
682 |
+
pad_direction: right
|
683 |
+
generate_attention_mask: false
|
684 |
+
num_workers: 0
|
685 |
+
drop_last: false
|
686 |
+
pin_memory: false
|
687 |
+
prefetch_factor: null
|
688 |
+
persistent_workers: false
|
689 |
+
timeout: 0
|
690 |
+
seed: null
|
691 |
+
extra_data_paths: null
|
692 |
+
extra_data_key: null
|
693 |
+
load_extra_data_to_ram: false
|
694 |
+
index_path: null
|
695 |
+
device_eval_batch_size: null
|
696 |
+
subset_num_batches: 1000
|
697 |
+
sft_use_label: false
|
698 |
+
sft: false
|
699 |
+
- label: sciq_test
|
700 |
+
type: downstream
|
701 |
+
data:
|
702 |
+
paths: null
|
703 |
+
datasets: null
|
704 |
+
label_mask_paths: null
|
705 |
+
pad_direction: right
|
706 |
+
generate_attention_mask: false
|
707 |
+
num_workers: 0
|
708 |
+
drop_last: false
|
709 |
+
pin_memory: false
|
710 |
+
prefetch_factor: null
|
711 |
+
persistent_workers: false
|
712 |
+
timeout: 0
|
713 |
+
seed: null
|
714 |
+
extra_data_paths: null
|
715 |
+
extra_data_key: null
|
716 |
+
load_extra_data_to_ram: false
|
717 |
+
index_path: null
|
718 |
+
device_eval_batch_size: null
|
719 |
+
subset_num_batches: 1000
|
720 |
+
sft_use_label: false
|
721 |
+
sft: false
|
722 |
+
eval_interval: 2000
|
723 |
+
tokenizer:
|
724 |
+
identifier: allenai/eleuther-ai-gpt-neox-20b-pii-special
|
725 |
+
truncate_direction: right
|
726 |
+
save_folder: /n/holyscratch01/sham_lab/data-olmo/ckpts/35013764_1/
|
727 |
+
remote_save_folder: null
|
728 |
+
canceled_check_interval: 50
|
729 |
+
save_interval: 5000
|
730 |
+
save_interval_unsharded: 100000
|
731 |
+
save_interval_ephemeral: null
|
732 |
+
save_num_checkpoints_to_keep: 1
|
733 |
+
save_num_unsharded_checkpoints_to_keep: 1
|
734 |
+
save_overwrite: true
|
735 |
+
force_save_unsharded: false
|
736 |
+
no_pre_train_checkpoint: false
|
737 |
+
load_path: null
|
738 |
+
load_path_sharded_checkpointer: null
|
739 |
+
reset_optimizer_state: false
|
740 |
+
reset_trainer_state: false
|
741 |
+
sharded_checkpointer: torch_legacy
|
742 |
+
new_style_checkpoints: null
|
743 |
+
max_duration: 192000
|
744 |
+
global_train_batch_size: 256
|
745 |
+
device_train_batch_size: 64
|
746 |
+
device_train_microbatch_size: 32
|
747 |
+
device_eval_batch_size: 64
|
748 |
+
eval_subset_num_batches: 100
|
749 |
+
eval_on_load: false
|
750 |
+
device_train_grad_accum: 2
|
751 |
+
max_grad_norm: 1.0
|
752 |
+
max_grad_norm_ratio: null
|
753 |
+
precision: amp_bf16
|
754 |
+
wandb:
|
755 |
+
project: data-olmo
|
756 |
+
entity: harvardml
|
757 |
+
group: frozen-150-global-1b-5
|
758 |
+
name: olmo_35013764_1
|
759 |
+
tags:
|
760 |
+
- watching
|
761 |
+
log_artifacts: false
|
762 |
+
rank_zero_only: true
|
763 |
+
log_interval: 10
|
764 |
+
speed_monitor:
|
765 |
+
window_size: 1
|
766 |
+
gpu_flops_available: null
|
767 |
+
console_log_interval: 10
|
768 |
+
gen1_gc_interval: 1
|
769 |
+
compile:
|
770 |
+
mode: default
|
771 |
+
fullgraph: false
|
772 |
+
backend: inductor
|
773 |
+
fsdp:
|
774 |
+
use_orig_params: true
|
775 |
+
sharding_strategy: FULL_SHARD
|
776 |
+
wrapping_strategy: null
|
777 |
+
precision: mixed
|
778 |
+
hybrid_sharding_num_model_replicas: null
|
779 |
+
softmax_auxiliary_loss: true
|
780 |
+
time_limit: 2100000.0
|
781 |
+
extra_steps_after_cancel: 10
|
782 |
+
early_stopping_factor: null
|
783 |
+
save_data_indices: true
|
784 |
+
python_profiling: false
|
785 |
+
torch_profiling: false
|
786 |
+
stop_at: null
|
787 |
+
stop_after: null
|
788 |
+
activation_checkpointing: null
|
789 |
+
fused_loss: null
|
models/down_tau=64_1b/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af00703fdefb438dce80a5da5091b5d49c8c50d5b7caf5170d6a0df1674b7532
|
3 |
+
size 5656891654
|
models/down_tau=64_1b/optim.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:00237c80c3f2d3c3bb87ebcc9738798fcadcf6a3b839943a4bbfa9760893b2db
|
3 |
+
size 11313806634
|
models/down_tau=64_1b/train.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f39e4b80f95ce745483d3ce0726db73901201bec84088b0fad01b867be06942c
|
3 |
+
size 14604
|