Upload importance matrix, heavily quantized model weights and q2
Browse files- imatrix was calculated at 550 chunks with over 128k tokens over wikipedia english and spanish data
- iq4 model weights are non-linear quantizations
- q2 model weight had to be requantized with importance matrix since output was mostly garbage
Note: Below 3 bits, models **must** be quantized with an importance matrix to work, below 2 bits it won't work at all
- .gitattributes +7 -0
- phi-3-mini-4k-instruct-iq2_m.gguf +3 -0
- phi-3-mini-4k-instruct-iq2_s.gguf +3 -0
- phi-3-mini-4k-instruct-iq2_xs.gguf +3 -0
- phi-3-mini-4k-instruct-iq2_xxs.gguf +3 -0
- phi-3-mini-4k-instruct-iq4_nl.gguf +3 -0
- phi-3-mini-4k-instruct-iq4_xs.gguf +3 -0
- phi-3-mini-4k-instruct-q2_k.gguf +2 -2
- phi-3-mini-4k.imatrix +3 -0
.gitattributes
CHANGED
@@ -42,3 +42,10 @@ phi-3-mini-4k-instruct-q5_0.gguf filter=lfs diff=lfs merge=lfs -text
|
|
42 |
phi-3-mini-4k-instruct-q5_k.gguf filter=lfs diff=lfs merge=lfs -text
|
43 |
phi-3-mini-4k-instruct-q6_k.gguf filter=lfs diff=lfs merge=lfs -text
|
44 |
phi-3-mini-4k-instruct-q8_0.gguf filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
phi-3-mini-4k-instruct-q5_k.gguf filter=lfs diff=lfs merge=lfs -text
|
43 |
phi-3-mini-4k-instruct-q6_k.gguf filter=lfs diff=lfs merge=lfs -text
|
44 |
phi-3-mini-4k-instruct-q8_0.gguf filter=lfs diff=lfs merge=lfs -text
|
45 |
+
phi-3-mini-4k-instruct-iq2_m.gguf filter=lfs diff=lfs merge=lfs -text
|
46 |
+
phi-3-mini-4k-instruct-iq2_s.gguf filter=lfs diff=lfs merge=lfs -text
|
47 |
+
phi-3-mini-4k-instruct-iq2_xs.gguf filter=lfs diff=lfs merge=lfs -text
|
48 |
+
phi-3-mini-4k-instruct-iq2_xxs.gguf filter=lfs diff=lfs merge=lfs -text
|
49 |
+
phi-3-mini-4k-instruct-iq4_nl.gguf filter=lfs diff=lfs merge=lfs -text
|
50 |
+
phi-3-mini-4k-instruct-iq4_xs.gguf filter=lfs diff=lfs merge=lfs -text
|
51 |
+
phi-3-mini-4k.imatrix filter=lfs diff=lfs merge=lfs -text
|
phi-3-mini-4k-instruct-iq2_m.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0793e122e2f7fd30119d77476aed3ecb510d3e54c4f65d2ce00d8878bba4acbc
|
3 |
+
size 1445676768
|
phi-3-mini-4k-instruct-iq2_s.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15826cfb33d8e5b960123922483ba4fc16847aac88c71ad826a0b01135351854
|
3 |
+
size 1345013472
|
phi-3-mini-4k-instruct-iq2_xs.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5490f599715dd51fcbaf7f1df5c0836c6c6074b65fbe5e8c53857dbe9bf6b75e
|
3 |
+
size 1282318560
|
phi-3-mini-4k-instruct-iq2_xxs.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e49af6896400ff31d065721bbd34146511d2ff707c140b70e36a9ae2257a8994
|
3 |
+
size 1172218080
|
phi-3-mini-4k-instruct-iq4_nl.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71a9a0aba0b5c2f33222925bab86185ec0778cf5ce3ce0ca6ece2ec1d229bda2
|
3 |
+
size 2292376800
|
phi-3-mini-4k-instruct-iq4_xs.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7476997dc1aaba2d95c903e0e8a561e58734347374f182409f1f50e933dd6c44
|
3 |
+
size 2176052448
|
phi-3-mini-4k-instruct-q2_k.gguf
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9dc25787d880e44d2526ef2ede63beaa19f329b0ca7735ab52987f8ac90a425d
|
3 |
+
size 1532403936
|
phi-3-mini-4k.imatrix
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:604d7a6e4b3990bde99e7a2890ca1b9b13881e084d45c2293b4bb85247635e9a
|
3 |
+
size 2232649
|