[WIP] Optimized q4f16 ONNX export (Olive)

#6
by Xenova HF staff - opened
Files changed (2) hide show
  1. config.json +3 -0
  2. onnx/model_q4f16.onnx +2 -2
config.json CHANGED
@@ -25,6 +25,9 @@
25
  "tie_word_embeddings": true,
26
  "torch_dtype": "bfloat16",
27
  "transformers_version": "4.42.3",
 
 
 
28
  "use_cache": true,
29
  "vocab_size": 49152
30
  }
 
25
  "tie_word_embeddings": true,
26
  "torch_dtype": "bfloat16",
27
  "transformers_version": "4.42.3",
28
+ "transformers.js_config": {
29
+ "kv_cache_dtype": "float16"
30
+ },
31
  "use_cache": true,
32
  "vocab_size": 49152
33
  }
onnx/model_q4f16.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1a788453e1393e8642f43ca729b7f2301ba61cc1f8ac1f1904c809869fc1ffb
3
- size 272513495
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8eb23549361696ffe4350e2d68d34fe92575e14182282a4bb33f9ee59836bdd6
3
+ size 299014965