andersonjas
/

llama2-7b-dialogsum-qlora-gptq

Text Generation

text-generation-inference

Inference Endpoints

4-bit precision

Model card Files Files and versions Community

andersonjas commited on Nov 11, 2023

Commit

ef4c03b

•

1 Parent(s): 6697b0d

Upload LlamaForCausalLM

Files changed (2) hide show

config.json +2 -2
pytorch_model.bin +2 -2

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "chat_summarizer/quantized_8bit",
   "architectures": [
     "LlamaForCausalLM"
   ],
@@ -18,7 +18,7 @@
   "pretraining_tp": 1,
   "quantization_config": {
     "batch_size": 1,
-    "bits": 8,
     "block_name_to_quantize": "model.layers",
     "damp_percent": 0.1,
     "dataset": [

 {
+  "_name_or_path": "work_for_2023_11_10/quantized_8bit",
   "architectures": [
     "LlamaForCausalLM"
   ],
   "pretraining_tp": 1,
   "quantization_config": {
     "batch_size": 1,
+    "bits": 4,
     "block_name_to_quantize": "model.layers",
     "damp_percent": 0.1,
     "dataset": [

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:df7887db21bb240dfdfcf233a2f61135a94f97754f793cfffdee3465a0658caf
-size 7160285785

 version https://git-lfs.github.com/spec/v1
+oid sha256:f86d7b9e7286844b478926fa53504956589a8374d5eebb64f6dbf1ca3b8e2137
+size 3896979857