fxmarty commited on
Commit
e489f22
1 Parent(s): b6d7ba3

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +46 -6
config.json CHANGED
@@ -21,7 +21,10 @@
21
  "pad_token_id": -1,
22
  "pretraining_tp": 1,
23
  "quantization_config": {
24
- "activation_scheme": "static",
 
 
 
25
  "export": {
26
  "kv_cache_group": [],
27
  "pack_method": "reorder",
@@ -38,11 +41,48 @@
38
  ]
39
  ]
40
  },
41
- "ignored_layers": [
42
- "lm_head"
43
- ],
44
- "kv_cache_scheme": null,
45
- "quant_method": "fp8"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  },
47
  "rms_norm_eps": 1e-06,
48
  "rope_scaling": null,
 
21
  "pad_token_id": -1,
22
  "pretraining_tp": 1,
23
  "quantization_config": {
24
+ "algo_config": null,
25
+ "exclude": [
26
+ "lm_head"
27
+ ],
28
  "export": {
29
  "kv_cache_group": [],
30
  "pack_method": "reorder",
 
41
  ]
42
  ]
43
  },
44
+ "global_quant_config": {
45
+ "bias": null,
46
+ "input_tensors": {
47
+ "ch_axis": null,
48
+ "dtype": "fp8_e4m3",
49
+ "group_size": null,
50
+ "is_dynamic": false,
51
+ "observer_cls": "PerTensorMinMaxObserver",
52
+ "qscheme": "per_tensor",
53
+ "round_method": null,
54
+ "scale_type": null,
55
+ "symmetric": null
56
+ },
57
+ "output_tensors": {
58
+ "ch_axis": null,
59
+ "dtype": "fp8_e4m3",
60
+ "group_size": null,
61
+ "is_dynamic": false,
62
+ "observer_cls": "PerTensorMinMaxObserver",
63
+ "qscheme": "per_tensor",
64
+ "round_method": null,
65
+ "scale_type": null,
66
+ "symmetric": null
67
+ },
68
+ "target_device": null,
69
+ "weight": {
70
+ "ch_axis": null,
71
+ "dtype": "fp8_e4m3",
72
+ "group_size": null,
73
+ "is_dynamic": false,
74
+ "observer_cls": "PerTensorMinMaxObserver",
75
+ "qscheme": "per_tensor",
76
+ "round_method": null,
77
+ "scale_type": null,
78
+ "symmetric": null
79
+ }
80
+ },
81
+ "layer_quant_config": {},
82
+ "layer_type_quant_config": {},
83
+ "pack_method": "reorder",
84
+ "quant_method": "quark",
85
+ "quant_mode": 1
86
  },
87
  "rms_norm_eps": 1e-06,
88
  "rope_scaling": null,