solidrust
/

Llama-3-Orca-2.0-8B-AWQ

Text Generation

4-bit precision

Inference Endpoints

text-generation-inference

Model card Files Files and versions Community

Suparious commited on May 22

Commit

582edb4

•

1 Parent(s): 049e764

Adding AWQ model

Files changed (3) hide show

config.json +3 -2
generation_config.json +1 -1
tokenizer_config.json +2 -1

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "/home/ubuntu/.cache/huggingface/hub/models--Locutusque--Llama-3-Orca-2.0-8B/snapshots/b6a4448cf33dd2c7a729a9ac12791a38a0955d33",
   "architectures": [
     "LlamaForCausalLM"
   ],
@@ -12,6 +12,7 @@
   "initializer_range": 0.02,
   "intermediate_size": 14336,
   "max_position_embeddings": 8192,
   "model_type": "llama",
   "num_attention_heads": 32,
   "num_hidden_layers": 32,
@@ -30,7 +31,7 @@
   "rope_theta": 500000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
-  "transformers_version": "4.40.1",
   "use_cache": false,
   "vocab_size": 128256
 }

 {
+  "_name_or_path": "/home/ubuntu/.cache/huggingface/hub/models--Locutusque--Llama-3-Orca-2.0-8B/snapshots/ec02fa2b6743bc47c4453bcfe92a0233d1467bd6",
   "architectures": [
     "LlamaForCausalLM"
   ],
   "initializer_range": 0.02,
   "intermediate_size": 14336,
   "max_position_embeddings": 8192,
+  "mlp_bias": false,
   "model_type": "llama",
   "num_attention_heads": 32,
   "num_hidden_layers": 32,
   "rope_theta": 500000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
+  "transformers_version": "4.41.0",
   "use_cache": false,
   "vocab_size": 128256
 }

generation_config.json CHANGED Viewed

@@ -5,5 +5,5 @@
   "max_length": 4096,
   "temperature": 0.6,
   "top_p": 0.9,
-  "transformers_version": "4.40.1"
 }

   "max_length": 4096,
   "temperature": 0.6,
   "top_p": 0.9,
+  "transformers_version": "4.41.0"
 }

tokenizer_config.json CHANGED Viewed

@@ -2050,9 +2050,10 @@
     }
   },
   "bos_token": "<|begin_of_text|>",
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|end_of_text|>",
-  "max_length": 512,
   "model_input_names": [
     "input_ids",
     "attention_mask"

     }
   },
   "bos_token": "<|begin_of_text|>",
+  "chat_template": "{%- set ns = namespace(found=false) -%}{%- for message in messages -%}{%- if message['role'] == 'system' -%}{%- set ns.found = true -%}{%- endif -%}{%- endfor -%}{%- for message in messages %}{%- if message['role'] == 'system' -%}{{- '<|im_start|>system\n' + message['content'].rstrip() + '<|im_end|>\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'<|im_start|>user\n' + message['content'].rstrip() + '<|im_end|>\n'-}}{%- else -%}{{-'<|im_start|>assistant\n' + message['content'] + '<|im_end|>\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'<|im_start|>assistant\n'-}}{%- endif -%}",
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|end_of_text|>",
+  "max_length": 4096,
   "model_input_names": [
     "input_ids",
     "attention_mask"