File size: 2,560 Bytes
5dadba4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
quantizations:
- IQ2_XS
- IQ3_M
- IQ3_S
- IQ3_XS
- IQ3_XXS
- IQ4_NL
- IQ4_XS
- Q3_K_L
- Q3_K_M
- Q3_K_S
- Q4_K_M
- Q4_K_S
- Q5_K_M
- Q5_K_S
- Q6_K
- Q8_0
- TQ1_0
- TQ2_0
allowed_quantization_types:
- name: Q4_0
size: 4.34G
ppl: +0.4685
details: Llama-3-8B
- name: Q4_1
size: 4.78G
ppl: +0.4511
details: Llama-3-8B
- name: Q5_0
size: 5.21G
ppl: +0.1316
details: Llama-3-8B
- name: Q5_1
size: 5.65G
ppl: +0.1062
details: Llama-3-8B
- name: IQ2_XXS
size: "2.06 bpw"
type: quantization
- name: IQ2_XS
size: "2.31 bpw"
type: quantization
- name: IQ2_S
size: "2.5 bpw"
type: quantization
- name: IQ2_M
size: "2.7 bpw"
type: quantization
- name: IQ1_S
size: "1.56 bpw"
type: quantization
- name: IQ1_M
size: "1.75 bpw"
type: quantization
- name: TQ1_0
size: "1.69 bpw"
type: ternarization
- name: TQ2_0
size: "2.06 bpw"
type: ternarization
- name: Q2_K
size: 2.96G
ppl: +3.5199
details: Llama-3-8B
- name: Q2_K_S
size: 2.96G
ppl: +3.1836
details: Llama-3-8B
- name: IQ3_XXS
size: "3.06 bpw"
type: quantization
- name: IQ3_S
size: "3.44 bpw"
type: quantization
- name: IQ3_M
size: "3.66 bpw"
type: quantization mix
- name: Q3_K
alias: Q3_K_M
- name: IQ3_XS
size: "3.3 bpw"
type: quantization
- name: Q3_K_S
size: 3.41G
ppl: +1.6321
details: Llama-3-8B
- name: Q3_K_M
size: 3.74G
ppl: +0.6569
details: Llama-3-8B
- name: Q3_K_L
size: 4.03G
ppl: +0.5562
details: Llama-3-8B
- name: IQ4_NL
size: "4.50 bpw"
type: non-linear quantization
- name: IQ4_XS
size: "4.25 bpw"
type: non-linear quantization
- name: Q4_K
alias: Q4_K_M
- name: Q4_K_S
size: 4.37G
ppl: +0.2689
details: Llama-3-8B
- name: Q4_K_M
size: 4.58G
ppl: +0.1754
details: Llama-3-8B
- name: Q5_K
alias: Q5_K_M
- name: Q5_K_S
size: 5.21G
ppl: +0.1049
details: Llama-3-8B
- name: Q5_K_M
size: 5.33G
ppl: +0.0569
details: Llama-3-8B
- name: Q6_K
size: 6.14G
ppl: +0.0217
details: Llama-3-8B
- name: Q8_0
size: 7.96G
ppl: +0.0026
details: Llama-3-8B
- name: F16
size: 14.00G
ppl: +0.0020
details: Mistral-7B
- name: BF16
size: 14.00G
ppl: -0.0050
details: Mistral-7B
- name: F32
size: 26.00G
details: 7B
- name: COPY
description: Only copy tensors, no quantizing
|