[WIP] Optimized q4f16 ONNX export (Olive)
#6
by
Xenova
HF staff
- opened
- config.json +3 -0
- onnx/model_q4f16.onnx +2 -2
config.json
CHANGED
@@ -25,6 +25,9 @@
|
|
25 |
"tie_word_embeddings": true,
|
26 |
"torch_dtype": "bfloat16",
|
27 |
"transformers_version": "4.42.3",
|
|
|
|
|
|
|
28 |
"use_cache": true,
|
29 |
"vocab_size": 49152
|
30 |
}
|
|
|
25 |
"tie_word_embeddings": true,
|
26 |
"torch_dtype": "bfloat16",
|
27 |
"transformers_version": "4.42.3",
|
28 |
+
"transformers.js_config": {
|
29 |
+
"kv_cache_dtype": "float16"
|
30 |
+
},
|
31 |
"use_cache": true,
|
32 |
"vocab_size": 49152
|
33 |
}
|
onnx/model_q4f16.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8eb23549361696ffe4350e2d68d34fe92575e14182282a4bb33f9ee59836bdd6
|
3 |
+
size 299014965
|