robbiemu
/

salamandra-2b

Text Generation

Inference Endpoints

Model card Files Files and versions Community

salamandra-2b / quantizations.yaml

robbiemu's picture

update for quantization

5dadba4 18 days ago

history blame contribute delete

2.56 kB

	quantizations:
	- IQ2_XS
	- IQ3_M
	- IQ3_S
	- IQ3_XS
	- IQ3_XXS
	- IQ4_NL
	- IQ4_XS
	- Q3_K_L
	- Q3_K_M
	- Q3_K_S
	- Q4_K_M
	- Q4_K_S
	- Q5_K_M
	- Q5_K_S
	- Q6_K
	- Q8_0
	- TQ1_0
	- TQ2_0

	allowed_quantization_types:
	- name: Q4_0
	size: 4.34G
	ppl: +0.4685
	details: Llama-3-8B
	- name: Q4_1
	size: 4.78G
	ppl: +0.4511
	details: Llama-3-8B
	- name: Q5_0
	size: 5.21G
	ppl: +0.1316
	details: Llama-3-8B
	- name: Q5_1
	size: 5.65G
	ppl: +0.1062
	details: Llama-3-8B
	- name: IQ2_XXS
	size: "2.06 bpw"
	type: quantization
	- name: IQ2_XS
	size: "2.31 bpw"
	type: quantization
	- name: IQ2_S
	size: "2.5 bpw"
	type: quantization
	- name: IQ2_M
	size: "2.7 bpw"
	type: quantization
	- name: IQ1_S
	size: "1.56 bpw"
	type: quantization
	- name: IQ1_M
	size: "1.75 bpw"
	type: quantization
	- name: TQ1_0
	size: "1.69 bpw"
	type: ternarization
	- name: TQ2_0
	size: "2.06 bpw"
	type: ternarization
	- name: Q2_K
	size: 2.96G
	ppl: +3.5199
	details: Llama-3-8B
	- name: Q2_K_S
	size: 2.96G
	ppl: +3.1836
	details: Llama-3-8B
	- name: IQ3_XXS
	size: "3.06 bpw"
	type: quantization
	- name: IQ3_S
	size: "3.44 bpw"
	type: quantization
	- name: IQ3_M
	size: "3.66 bpw"
	type: quantization mix
	- name: Q3_K
	alias: Q3_K_M
	- name: IQ3_XS
	size: "3.3 bpw"
	type: quantization
	- name: Q3_K_S
	size: 3.41G
	ppl: +1.6321
	details: Llama-3-8B
	- name: Q3_K_M
	size: 3.74G
	ppl: +0.6569
	details: Llama-3-8B
	- name: Q3_K_L
	size: 4.03G
	ppl: +0.5562
	details: Llama-3-8B
	- name: IQ4_NL
	size: "4.50 bpw"
	type: non-linear quantization
	- name: IQ4_XS
	size: "4.25 bpw"
	type: non-linear quantization
	- name: Q4_K
	alias: Q4_K_M
	- name: Q4_K_S
	size: 4.37G
	ppl: +0.2689
	details: Llama-3-8B
	- name: Q4_K_M
	size: 4.58G
	ppl: +0.1754
	details: Llama-3-8B
	- name: Q5_K
	alias: Q5_K_M
	- name: Q5_K_S
	size: 5.21G
	ppl: +0.1049
	details: Llama-3-8B
	- name: Q5_K_M
	size: 5.33G
	ppl: +0.0569
	details: Llama-3-8B
	- name: Q6_K
	size: 6.14G
	ppl: +0.0217
	details: Llama-3-8B
	- name: Q8_0
	size: 7.96G
	ppl: +0.0026
	details: Llama-3-8B
	- name: F16
	size: 14.00G
	ppl: +0.0020
	details: Mistral-7B
	- name: BF16
	size: 14.00G
	ppl: -0.0050
	details: Mistral-7B
	- name: F32
	size: 26.00G
	details: 7B
	- name: COPY
	description: Only copy tensors, no quantizing