unsloth
/

Qwen2.5-7B-Instruct-bnb-4bit

Text Generation

text-generation-inference

Inference Endpoints

4-bit precision

Model card Files Files and versions Community

danielhanchen commited on 6 days ago

Commit

13426c8

•

1 Parent(s): 688af54

Upload Qwen2ForCausalLM

Files changed (2) hide show

config.json +2 -0
generation_config.json +2 -1

config.json CHANGED Viewed

@@ -16,6 +16,7 @@
   "num_attention_heads": 28,
   "num_hidden_layers": 28,
   "num_key_value_heads": 4,
   "quantization_config": {
     "_load_in_4bit": true,
     "_load_in_8bit": false,
@@ -37,6 +38,7 @@
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.44.2",
   "use_cache": true,
   "use_sliding_window": false,
   "vocab_size": 152064

   "num_attention_heads": 28,
   "num_hidden_layers": 28,
   "num_key_value_heads": 4,
+  "pad_token_id": 151665,
   "quantization_config": {
     "_load_in_4bit": true,
     "_load_in_8bit": false,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.44.2",
+  "unsloth_fixed": true,
   "use_cache": true,
   "use_sliding_window": false,
   "vocab_size": 152064

generation_config.json CHANGED Viewed

@@ -5,7 +5,8 @@
     151645,
     151643
   ],
-  "pad_token_id": 151643,
   "repetition_penalty": 1.05,
   "temperature": 0.7,
   "top_k": 20,

     151645,
     151643
   ],
+  "max_length": 32768,
+  "pad_token_id": 151665,
   "repetition_penalty": 1.05,
   "temperature": 0.7,
   "top_k": 20,