Upload importance matrix, heavily quantized model weights and q2

- imatrix was calculated at 550 chunks with over 128k tokens over wikipedia english and spanish data
- iq4 model weights are non-linear quantizations
- q2 model weight had to be requantized with importance matrix since output was mostly garbage

Note: Below 3 bits, models **must** be quantized with an importance matrix to work, below 2 bits it won't work at all

Files changed (9) hide show

.gitattributes +7 -0
phi-3-mini-4k-instruct-iq2_m.gguf +3 -0
phi-3-mini-4k-instruct-iq2_s.gguf +3 -0
phi-3-mini-4k-instruct-iq2_xs.gguf +3 -0
phi-3-mini-4k-instruct-iq2_xxs.gguf +3 -0
phi-3-mini-4k-instruct-iq4_nl.gguf +3 -0
phi-3-mini-4k-instruct-iq4_xs.gguf +3 -0
phi-3-mini-4k-instruct-q2_k.gguf +2 -2
phi-3-mini-4k.imatrix +3 -0

.gitattributes CHANGED Viewed

@@ -42,3 +42,10 @@ phi-3-mini-4k-instruct-q5_0.gguf filter=lfs diff=lfs merge=lfs -text
 phi-3-mini-4k-instruct-q5_k.gguf filter=lfs diff=lfs merge=lfs -text
 phi-3-mini-4k-instruct-q6_k.gguf filter=lfs diff=lfs merge=lfs -text
 phi-3-mini-4k-instruct-q8_0.gguf filter=lfs diff=lfs merge=lfs -text

 phi-3-mini-4k-instruct-q5_k.gguf filter=lfs diff=lfs merge=lfs -text
 phi-3-mini-4k-instruct-q6_k.gguf filter=lfs diff=lfs merge=lfs -text
 phi-3-mini-4k-instruct-q8_0.gguf filter=lfs diff=lfs merge=lfs -text
+phi-3-mini-4k-instruct-iq2_m.gguf filter=lfs diff=lfs merge=lfs -text
+phi-3-mini-4k-instruct-iq2_s.gguf filter=lfs diff=lfs merge=lfs -text
+phi-3-mini-4k-instruct-iq2_xs.gguf filter=lfs diff=lfs merge=lfs -text
+phi-3-mini-4k-instruct-iq2_xxs.gguf filter=lfs diff=lfs merge=lfs -text
+phi-3-mini-4k-instruct-iq4_nl.gguf filter=lfs diff=lfs merge=lfs -text
+phi-3-mini-4k-instruct-iq4_xs.gguf filter=lfs diff=lfs merge=lfs -text
+phi-3-mini-4k.imatrix filter=lfs diff=lfs merge=lfs -text

phi-3-mini-4k-instruct-iq2_m.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0793e122e2f7fd30119d77476aed3ecb510d3e54c4f65d2ce00d8878bba4acbc
+size 1445676768

phi-3-mini-4k-instruct-iq2_s.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:15826cfb33d8e5b960123922483ba4fc16847aac88c71ad826a0b01135351854
+size 1345013472

phi-3-mini-4k-instruct-iq2_xs.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5490f599715dd51fcbaf7f1df5c0836c6c6074b65fbe5e8c53857dbe9bf6b75e
+size 1282318560

phi-3-mini-4k-instruct-iq2_xxs.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e49af6896400ff31d065721bbd34146511d2ff707c140b70e36a9ae2257a8994
+size 1172218080

phi-3-mini-4k-instruct-iq4_nl.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:71a9a0aba0b5c2f33222925bab86185ec0778cf5ce3ce0ca6ece2ec1d229bda2
+size 2292376800

phi-3-mini-4k-instruct-iq4_xs.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7476997dc1aaba2d95c903e0e8a561e58734347374f182409f1f50e933dd6c44
+size 2176052448

phi-3-mini-4k-instruct-q2_k.gguf CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5aed8a7392fede27a0d08e2e8acd0451fd6354e311947e28c2b11c3effebeee7
-size 1532403648

 version https://git-lfs.github.com/spec/v1
+oid sha256:9dc25787d880e44d2526ef2ede63beaa19f329b0ca7735ab52987f8ac90a425d
+size 1532403936

phi-3-mini-4k.imatrix ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:604d7a6e4b3990bde99e7a2890ca1b9b13881e084d45c2293b4bb85247635e9a
+size 2232649