a7662257b37dce6b1e0c71dcefe3b3d4d22ce934f45cc5a2d66610975fa3e787

Files changed (4) hide show

model/config.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

model/model-00003-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3c3eddb5c1e65f1b46d0be45d19a8593267bd03751372b6044f7f1d3aa9b92c7
-size 1530193360

 version https://git-lfs.github.com/spec/v1
+oid sha256:4e02d49f6f5f3c441de0cdb0d5d95c5a1e33daf35a4d23c7da16caf3a7e77b6e
+size 1723582192

model/model.safetensors.index.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

model/smash_config.json CHANGED Viewed

@@ -5,7 +5,7 @@
         "pruners": "[]",
         "pruning_ratio": 0.0,
         "factorizers": "[]",
-        "quantizers": "llm-int8",
         "n_quantization_bits": 8,
         "output_deviation": 0.005,
         "compilers": "[]",
@@ -14,11 +14,11 @@
         "controlnet": "None",
         "unet_dim": 4,
         "device": "cuda",
-        "cache_dir": "/ceph/hdd/staff/charpent/.cache/modelsa05vehry",
         "batch_size": 1,
         "model_name": "mattshumer/Hermes-2-Pro-11B",
         "max_batch_size": 1,
-        "save_dir": "/ceph/hdd/staff/charpent/.cache/modelsp5gocu7z",
         "qtype_weight": "torch.qint8",
         "qtype_activation": "torch.quint8",
         "qobserver": "<class 'torch.ao.quantization.observer.MinMaxObserver'>",
@@ -26,6 +26,6 @@
         "qconfig": "x86",
         "group_size": 128,
         "damp_percent": 0.1,
-        "save_load_fn": "bitsandbytes"
     }
 }

         "pruners": "[]",
         "pruning_ratio": 0.0,
         "factorizers": "[]",
+        "quantizers": "['gptq']",
         "n_quantization_bits": 8,
         "output_deviation": 0.005,
         "compilers": "[]",
         "controlnet": "None",
         "unet_dim": 4,
         "device": "cuda",
+        "cache_dir": "/ceph/hdd/staff/charpent/.cache/modelszka1vn5f",
         "batch_size": 1,
         "model_name": "mattshumer/Hermes-2-Pro-11B",
         "max_batch_size": 1,
+        "save_dir": "/ceph/hdd/staff/charpent/.cache/modelsgvf954mf",
         "qtype_weight": "torch.qint8",
         "qtype_activation": "torch.quint8",
         "qobserver": "<class 'torch.ao.quantization.observer.MinMaxObserver'>",
         "qconfig": "x86",
         "group_size": 128,
         "damp_percent": 0.1,
+        "save_load_fn": "hf-gptq"
     }
 }