GGUF
Inference Endpoints
jan-hq commited on
Commit
071a8f6
1 Parent(s): edd106e

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. .gitattributes +1 -0
  2. model.gguf +3 -0
  3. model.yml +20 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model.gguf filter=lfs diff=lfs merge=lfs -text
model.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14466f9d658bf4a79f96c3f3f22759707c291cac4e62fea625e80c7d32169991
3
+ size 4368438944
model.yml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: mistral
2
+ model: mistral:7B
3
+ version: 1
4
+
5
+ files:
6
+ - llama_model_path: model.gguf
7
+
8
+ # Results Preferences
9
+ top_p: 0.95
10
+ temperature: 0.7
11
+ frequency_penalty: 0
12
+ presence_penalty: 0
13
+ max_tokens: 4096 # Infer from base config.json -> max_position_embeddings
14
+ stream: true # true | false
15
+
16
+ # Engine / Model Settings
17
+ ngl: 32 # Infer from base config.json -> num_attention_heads
18
+ ctx_len: 4096 # Infer from base config.json -> max_position_embeddings
19
+ engine: cortex.llamacpp
20
+ prompt_template: "{system_message} [INST] {prompt} [/INST]"