TheBloke
/

Falcon-180B-GPTQ

Text Generation

text-generation-inference

4-bit precision

Model card Files Files and versions Community

TheBloke commited on Sep 9, 2023

Commit

cc7a1fe

•

1 Parent(s): 78dfd93

GPTQ model commit

Files changed (1) hide show

config.json +37 -48

config.json CHANGED Viewed

@@ -1,49 +1,38 @@
 {
-  "_name_or_path": "/workspace/process/tiiuae_falcon-180b/gptq/main/",
-  "alibi": false,
-  "architectures": [
-    "FalconForCausalLM"
-  ],
-  "attention_dropout": 0.0,
-  "bias": false,
-  "bos_token_id": 11,
-  "eos_token_id": 11,
-  "hidden_dropout": 0.0,
-  "hidden_size": 14848,
-  "initializer_range": 0.02,
-  "layer_norm_epsilon": 1e-05,
-  "max_position_embeddings": 2048,
-  "model_type": "falcon",
-  "multi_query": true,
-  "new_decoder_architecture": true,
-  "num_attention_heads": 232,
-  "num_hidden_layers": 80,
-  "num_kv_heads": 8,
-  "pad_token_id": 0,
-  "parallel_attn": true,
-  "pretraining_tp": 1,
-  "quantization_config": {
-    "batch_size": 1,
-    "bits": 4,
-    "block_name_to_quantize": null,
-    "damp_percent": 0.1,
-    "dataset": null,
-    "desc_act": true,
-    "disable_exllama": true,
-    "group_size": 128,
-    "model_seqlen": null,
-    "module_name_preceding_first_block": null,
-    "pad_token_id": null,
-    "quant_method": "gptq",
-    "sym": true,
-    "tokenizer": null,
-    "true_sequential": true,
-    "use_cuda_fp16": false
-  },
-  "rope_scaling": null,
-  "rope_theta": 10000.0,
-  "torch_dtype": "float16",
-  "transformers_version": "4.33.0",
-  "use_cache": true,
-  "vocab_size": 65024
-}

 {
+    "alibi": false,
+    "architectures": [
+        "FalconForCausalLM"
+    ],
+    "attention_dropout": 0.0,
+    "bias": false,
+    "bos_token_id": 11,
+    "eos_token_id": 11,
+    "hidden_dropout": 0.0,
+    "hidden_size": 14848,
+    "initializer_range": 0.02,
+    "layer_norm_epsilon": 1e-05,
+    "model_type": "falcon",
+    "multi_query": true,
+    "new_decoder_architecture": true,
+    "num_attention_heads": 232,
+    "num_hidden_layers": 80,
+    "num_kv_heads": 8,
+    "parallel_attn": true,
+    "torch_dtype": "bfloat16",
+    "transformers_version": "4.32.0",
+    "use_cache": true,
+    "vocab_size": 65024,
+    "pretraining_tp": 1,
+    "pad_token_id": 0,
+    "quantization_config": {
+        "bits": 4,
+        "group_size": 128,
+        "damp_percent": 0.1,
+        "desc_act": true,
+        "sym": true,
+        "true_sequential": true,
+        "model_name_or_path": null,
+        "model_file_base_name": "model",
+        "quant_method": "gptq"
+    }
+}