Muennighoff commited on
Commit
7853d0d
1 Parent(s): da6b743

Better model with bs=256

Browse files
README.md CHANGED
@@ -14,7 +14,7 @@ For usage instructions, refer to our codebase: https://github.com/Muennighoff/sg
14
 
15
  ## Evaluation Results
16
 
17
- For eval results, refer to our paper: https://arxiv.org/abs/2202.08904
18
 
19
  ## Training
20
  The model was trained with the parameters:
 
14
 
15
  ## Evaluation Results
16
 
17
+ For eval results, refer to the eval folder or our paper: https://arxiv.org/abs/2202.08904
18
 
19
  ## Training
20
  The model was trained with the parameters:
config.json CHANGED
@@ -67,7 +67,7 @@
67
  },
68
  "tokenizer_class": "GPT2Tokenizer",
69
  "torch_dtype": "float32",
70
- "transformers_version": "4.11.3",
71
  "use_cache": true,
72
  "vocab_size": 50259,
73
  "window_size": 256
 
67
  },
68
  "tokenizer_class": "GPT2Tokenizer",
69
  "torch_dtype": "float32",
70
+ "transformers_version": "4.20.0.dev0",
71
  "use_cache": true,
72
  "vocab_size": 50259,
73
  "window_size": 256
config_sentence_transformers.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "__version__": {
3
  "sentence_transformers": "2.1.0",
4
- "transformers": "4.11.3",
5
- "pytorch": "1.10.1"
6
  }
7
  }
 
1
  {
2
  "__version__": {
3
  "sentence_transformers": "2.1.0",
4
+ "transformers": "4.20.0.dev0",
5
+ "pytorch": "1.10.2"
6
  }
7
  }
eval/beir.json ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "SGPT-1.3B-weightedmean-msmarco-specb-bitfit": {
3
+ "cqadupstack_android": {
4
+ "NDCG@1": 0.29185,
5
+ "NDCG@3": 0.32944,
6
+ "NDCG@5": 0.35929,
7
+ "NDCG@10": 0.38723,
8
+ "NDCG@100": 0.44145,
9
+ "NDCG@1000": 0.46656
10
+ },
11
+ "cqadupstack_english": {
12
+ "NDCG@1": 0.30764,
13
+ "NDCG@3": 0.35197,
14
+ "NDCG@5": 0.37105,
15
+ "NDCG@10": 0.39067,
16
+ "NDCG@100": 0.43427,
17
+ "NDCG@1000": 0.4586
18
+ },
19
+ "cqadupstack_gaming": {
20
+ "NDCG@1": 0.35987,
21
+ "NDCG@3": 0.42514,
22
+ "NDCG@5": 0.44992,
23
+ "NDCG@10": 0.47974,
24
+ "NDCG@100": 0.52508,
25
+ "NDCG@1000": 0.54051
26
+ },
27
+ "cqadupstack_gis": {
28
+ "NDCG@1": 0.2,
29
+ "NDCG@3": 0.24039,
30
+ "NDCG@5": 0.26361,
31
+ "NDCG@10": 0.28866,
32
+ "NDCG@100": 0.34303,
33
+ "NDCG@1000": 0.36929
34
+ },
35
+ "cqadupstack_mathematica": {
36
+ "NDCG@1": 0.1393,
37
+ "NDCG@3": 0.18372,
38
+ "NDCG@5": 0.20196,
39
+ "NDCG@10": 0.23076,
40
+ "NDCG@100": 0.28674,
41
+ "NDCG@1000": 0.32014
42
+ },
43
+ "cqadupstack_physics": {
44
+ "NDCG@1": 0.26756,
45
+ "NDCG@3": 0.3194,
46
+ "NDCG@5": 0.34338,
47
+ "NDCG@10": 0.3694,
48
+ "NDCG@100": 0.42578,
49
+ "NDCG@1000": 0.45088
50
+ },
51
+ "cqadupstack_programmers": {
52
+ "NDCG@1": 0.25799,
53
+ "NDCG@3": 0.29731,
54
+ "NDCG@5": 0.32163,
55
+ "NDCG@10": 0.34766,
56
+ "NDCG@100": 0.4065,
57
+ "NDCG@1000": 0.43174
58
+ },
59
+ "cqadupstack_stats": {
60
+ "NDCG@1": 0.18712,
61
+ "NDCG@3": 0.22737,
62
+ "NDCG@5": 0.24022,
63
+ "NDCG@10": 0.25908,
64
+ "NDCG@100": 0.31008,
65
+ "NDCG@1000": 0.3382
66
+ },
67
+ "cqadupstack_wordpress": {
68
+ "NDCG@1": 0.17745,
69
+ "NDCG@3": 0.208,
70
+ "NDCG@5": 0.22922,
71
+ "NDCG@10": 0.249,
72
+ "NDCG@100": 0.30064,
73
+ "NDCG@1000": 0.33151
74
+ },
75
+ "cqadupstack_webmasters": {
76
+ "NDCG@1": 0.23715,
77
+ "NDCG@3": 0.28019,
78
+ "NDCG@5": 0.30105,
79
+ "NDCG@10": 0.32438,
80
+ "NDCG@100": 0.38253,
81
+ "NDCG@1000": 0.41166
82
+ },
83
+ "cqadupstack_unix": {
84
+ "NDCG@1": 0.22761,
85
+ "NDCG@3": 0.27169,
86
+ "NDCG@5": 0.28825,
87
+ "NDCG@10": 0.31209,
88
+ "NDCG@100": 0.36454,
89
+ "NDCG@1000": 0.39443
90
+ },
91
+ "cqadupstack_tex": {
92
+ "NDCG@1": 0.1404,
93
+ "NDCG@3": 0.16974,
94
+ "NDCG@5": 0.18636,
95
+ "NDCG@10": 0.20547,
96
+ "NDCG@100": 0.25368,
97
+ "NDCG@1000": 0.28579
98
+ },
99
+ "cqadupstack": {
100
+ "NDCG@1": 0.2328283333333333,
101
+ "NDCG@3": 0.27536333333333335,
102
+ "NDCG@5": 0.2963283333333333,
103
+ "NDCG@10": 0.320345,
104
+ "NDCG@100": 0.37285999999999997,
105
+ "NDCG@1000": 0.3999425
106
+ },
107
+ "trec-covid": {
108
+ "NDCG@1": 0.88,
109
+ "NDCG@3": 0.84693,
110
+ "NDCG@5": 0.82035,
111
+ "NDCG@10": 0.78527,
112
+ "NDCG@100": 0.55017,
113
+ "NDCG@1000": 0.45909
114
+ },
115
+ "trec-news": {
116
+ "NDCG@1": 0.47515,
117
+ "NDCG@3": 0.4709,
118
+ "NDCG@5": 0.44046,
119
+ "NDCG@10": 0.42399,
120
+ "NDCG@100": 0.4355,
121
+ "NDCG@1000": 0.53977
122
+ },
123
+ "signal1m": {
124
+ "NDCG@1": 0.40722,
125
+ "NDCG@3": 0.32186,
126
+ "NDCG@5": 0.29096,
127
+ "NDCG@10": 0.25005,
128
+ "NDCG@100": 0.26756,
129
+ "NDCG@1000": 0.32664
130
+ },
131
+ "robust04": {
132
+ "NDCG@1": 0.54418,
133
+ "NDCG@3": 0.49172,
134
+ "NDCG@5": 0.46235,
135
+ "NDCG@10": 0.421,
136
+ "NDCG@100": 0.34684,
137
+ "NDCG@1000": 0.43416
138
+ },
139
+ "nfcorpus": {
140
+ "NDCG@1": 0.4195,
141
+ "NDCG@3": 0.3794,
142
+ "NDCG@5": 0.35456,
143
+ "NDCG@10": 0.32101,
144
+ "NDCG@100": 0.30051,
145
+ "NDCG@1000": 0.38669
146
+ },
147
+ "msmarco": {
148
+ "NDCG@1": 0.19427,
149
+ "NDCG@3": 0.28633,
150
+ "NDCG@5": 0.32367,
151
+ "NDCG@10": 0.3605,
152
+ "NDCG@100": 0.42155,
153
+ "NDCG@1000": 0.43777
154
+ },
155
+ "nq": {
156
+ "NDCG@1": 0.26738,
157
+ "NDCG@3": 0.3542,
158
+ "NDCG@5": 0.39376,
159
+ "NDCG@10": 0.42945,
160
+ "NDCG@100": 0.48308,
161
+ "NDCG@1000": 0.49665
162
+ },
163
+ "average": {
164
+ "NDCG@1": 0.43378157407407403,
165
+ "NDCG@3": 0.42776351851851846,
166
+ "NDCG@5": 0.42827157407407407,
167
+ "NDCG@10": 0.4332547222222223,
168
+ "NDCG@100": 0.4522683333333333,
169
+ "NDCG@1000": 0.48897124999999997
170
+ },
171
+ "subaverage": {
172
+ "NDCG@1": 0.48321,
173
+ "NDCG@3": 0.4798981818181818,
174
+ "NDCG@5": 0.48613909090909097,
175
+ "NDCG@10": 0.4950427272727273,
176
+ "NDCG@100": 0.51009,
177
+ "NDCG@1000": 0.5356790909090909
178
+ },
179
+ "subsubaverage": {
180
+ "NDCG@1": 0.342878,
181
+ "NDCG@3": 0.365358,
182
+ "NDCG@5": 0.372162,
183
+ "NDCG@10": 0.39230800000000005,
184
+ "NDCG@100": 0.42882799999999993,
185
+ "NDCG@1000": 0.46609199999999995
186
+ },
187
+ "bioasq": {
188
+ "NDCG@1": 0.376,
189
+ "NDCG@3": 0.34795,
190
+ "NDCG@5": 0.34274,
191
+ "NDCG@10": 0.34726,
192
+ "NDCG@100": 0.39707,
193
+ "NDCG@1000": 0.43191
194
+ },
195
+ "climate-fever": {
196
+ "NDCG@1": 0.22997,
197
+ "NDCG@3": 0.20755,
198
+ "NDCG@5": 0.22802,
199
+ "NDCG@10": 0.26636,
200
+ "NDCG@100": 0.33596,
201
+ "NDCG@1000": 0.36954
202
+ },
203
+ "dbpedia-entity": {
204
+ "NDCG@1": 0.43125,
205
+ "NDCG@3": 0.35102,
206
+ "NDCG@5": 0.33354,
207
+ "NDCG@10": 0.31476,
208
+ "NDCG@100": 0.34557,
209
+ "NDCG@1000": 0.41241
210
+ },
211
+ "webis-touche2020": {
212
+ "NDCG@1": 0.30612,
213
+ "NDCG@3": 0.28528,
214
+ "NDCG@5": 0.25768,
215
+ "NDCG@10": 0.2445,
216
+ "NDCG@100": 0.35871,
217
+ "NDCG@1000": 0.47273
218
+ },
219
+ "scifact": {
220
+ "NDCG@1": 0.57,
221
+ "NDCG@3": 0.62656,
222
+ "NDCG@5": 0.65523,
223
+ "NDCG@10": 0.68287,
224
+ "NDCG@100": 0.70985,
225
+ "NDCG@1000": 0.71695
226
+ },
227
+ "scidocs": {
228
+ "NDCG@1": 0.19,
229
+ "NDCG@3": 0.15887,
230
+ "NDCG@5": 0.13476,
231
+ "NDCG@10": 0.16102,
232
+ "NDCG@100": 0.22693,
233
+ "NDCG@1000": 0.27994
234
+ },
235
+ "fiqa": {
236
+ "NDCG@1": 0.27315,
237
+ "NDCG@3": 0.26342,
238
+ "NDCG@5": 0.2768,
239
+ "NDCG@10": 0.29987,
240
+ "NDCG@100": 0.3641,
241
+ "NDCG@1000": 0.40185
242
+ },
243
+ "arguana": {
244
+ "NDCG@1": 0.26174,
245
+ "NDCG@3": 0.39854,
246
+ "NDCG@5": 0.43946,
247
+ "NDCG@10": 0.49677,
248
+ "NDCG@100": 0.54275,
249
+ "NDCG@1000": 0.54503
250
+ },
251
+ "quora": {
252
+ "NDCG@1": 0.7756,
253
+ "NDCG@3": 0.81975,
254
+ "NDCG@5": 0.83738,
255
+ "NDCG@10": 0.85293,
256
+ "NDCG@100": 0.86816,
257
+ "NDCG@1000": 0.87021
258
+ },
259
+ "hotpotqa": {
260
+ "NDCG@1": 0.61742,
261
+ "NDCG@3": 0.4611,
262
+ "NDCG@5": 0.48147,
263
+ "NDCG@10": 0.49923,
264
+ "NDCG@100": 0.53178,
265
+ "NDCG@1000": 0.54947
266
+ },
267
+ "fever": {
268
+ "NDCG@1": 0.55056,
269
+ "NDCG@3": 0.63933,
270
+ "NDCG@5": 0.66304,
271
+ "NDCG@10": 0.6819,
272
+ "NDCG@100": 0.70343,
273
+ "NDCG@1000": 0.7085
274
+ }
275
+ }
276
+ }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70c84fed2517a31d4ff19880d4b6ec22644f2be2a617e8f53b68019f59a8bb72
3
- size 5363113217
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71d5e1599703e7c4795a70cef20fb032d9c252bf27a99a7b3edc7d3bb13b09a2
3
+ size 5363097985
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "model_max_length": 2048, "special_tokens_map_file": null, "name_or_path": "EleutherAI/gpt-neo-1.3B", "errors": "replace", "tokenizer_class": "GPT2Tokenizer"}
 
1
+ {"unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "model_max_length": 2048, "special_tokens_map_file": null, "name_or_path": "EleutherAI/gpt-neo-1.3B", "errors": "replace", "pad_token": null, "add_bos_token": false, "tokenizer_class": "GPT2Tokenizer"}