salamandra-2b / quantizations.yaml
robbiemu's picture
update for quantization
5dadba4
quantizations:
- IQ2_XS
- IQ3_M
- IQ3_S
- IQ3_XS
- IQ3_XXS
- IQ4_NL
- IQ4_XS
- Q3_K_L
- Q3_K_M
- Q3_K_S
- Q4_K_M
- Q4_K_S
- Q5_K_M
- Q5_K_S
- Q6_K
- Q8_0
- TQ1_0
- TQ2_0
allowed_quantization_types:
- name: Q4_0
size: 4.34G
ppl: +0.4685
details: Llama-3-8B
- name: Q4_1
size: 4.78G
ppl: +0.4511
details: Llama-3-8B
- name: Q5_0
size: 5.21G
ppl: +0.1316
details: Llama-3-8B
- name: Q5_1
size: 5.65G
ppl: +0.1062
details: Llama-3-8B
- name: IQ2_XXS
size: "2.06 bpw"
type: quantization
- name: IQ2_XS
size: "2.31 bpw"
type: quantization
- name: IQ2_S
size: "2.5 bpw"
type: quantization
- name: IQ2_M
size: "2.7 bpw"
type: quantization
- name: IQ1_S
size: "1.56 bpw"
type: quantization
- name: IQ1_M
size: "1.75 bpw"
type: quantization
- name: TQ1_0
size: "1.69 bpw"
type: ternarization
- name: TQ2_0
size: "2.06 bpw"
type: ternarization
- name: Q2_K
size: 2.96G
ppl: +3.5199
details: Llama-3-8B
- name: Q2_K_S
size: 2.96G
ppl: +3.1836
details: Llama-3-8B
- name: IQ3_XXS
size: "3.06 bpw"
type: quantization
- name: IQ3_S
size: "3.44 bpw"
type: quantization
- name: IQ3_M
size: "3.66 bpw"
type: quantization mix
- name: Q3_K
alias: Q3_K_M
- name: IQ3_XS
size: "3.3 bpw"
type: quantization
- name: Q3_K_S
size: 3.41G
ppl: +1.6321
details: Llama-3-8B
- name: Q3_K_M
size: 3.74G
ppl: +0.6569
details: Llama-3-8B
- name: Q3_K_L
size: 4.03G
ppl: +0.5562
details: Llama-3-8B
- name: IQ4_NL
size: "4.50 bpw"
type: non-linear quantization
- name: IQ4_XS
size: "4.25 bpw"
type: non-linear quantization
- name: Q4_K
alias: Q4_K_M
- name: Q4_K_S
size: 4.37G
ppl: +0.2689
details: Llama-3-8B
- name: Q4_K_M
size: 4.58G
ppl: +0.1754
details: Llama-3-8B
- name: Q5_K
alias: Q5_K_M
- name: Q5_K_S
size: 5.21G
ppl: +0.1049
details: Llama-3-8B
- name: Q5_K_M
size: 5.33G
ppl: +0.0569
details: Llama-3-8B
- name: Q6_K
size: 6.14G
ppl: +0.0217
details: Llama-3-8B
- name: Q8_0
size: 7.96G
ppl: +0.0026
details: Llama-3-8B
- name: F16
size: 14.00G
ppl: +0.0020
details: Mistral-7B
- name: BF16
size: 14.00G
ppl: -0.0050
details: Mistral-7B
- name: F32
size: 26.00G
details: 7B
- name: COPY
description: Only copy tensors, no quantizing