Spaces:
Sleeping
Sleeping
daniellefranca96
commited on
Commit
•
d899b2a
1
Parent(s):
33c2fff
Update main.py
Browse files
main.py
CHANGED
@@ -1,9 +1,20 @@
|
|
|
|
1 |
from fastapi import FastAPI
|
2 |
from pydantic import BaseModel
|
3 |
import requests
|
4 |
from ctransformers import AutoModelForCausalLM
|
5 |
|
6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
#Pydantic object
|
9 |
class validation(BaseModel):
|
@@ -13,11 +24,15 @@ app = FastAPI()
|
|
13 |
|
14 |
@app.post("/llm_on_cpu")
|
15 |
async def stream(item: validation):
|
16 |
-
|
17 |
-
|
18 |
-
|
|
|
|
|
19 |
user="""
|
20 |
{prompt}"""
|
|
|
|
|
21 |
|
22 |
prompt = f"{prefix}{user.replace('{prompt}', item.prompt)}{suffix}"
|
23 |
return llm(prompt)
|
|
|
1 |
+
from llama_cpp import Llama
|
2 |
from fastapi import FastAPI
|
3 |
from pydantic import BaseModel
|
4 |
import requests
|
5 |
from ctransformers import AutoModelForCausalLM
|
6 |
|
7 |
+
llms = {
|
8 |
+
"tinnyllama":{"name": "TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF", "file":"tinyllama-1.1b-1t-openorca.Q4_K_M.gguf", "suffix":"<|im_end|><|im_start|>assistant", "prefix":"<|im_start|>system You are a helpful assistant <|im_end|><|im_start|>user"},
|
9 |
+
"orca2":{"name": "TheBloke/Orca-2-7B-GGUF", "file":"orca-2-7b.Q4_K_M.gguf", "suffix":"<|im_end|><|im_start|>assistant", "prefix":"<|im_start|>system You are a helpful assistant<|im_end|><|im_start|>user "},
|
10 |
+
"zephyr":{"name": "TheBloke/zephyr-7B-beta-GGUF", "file":"zephyr-7b-beta.Q4_K_M.gguf", "suffix":"</s><|assistant|>", "prefix":"<|system|>You are a helpful assistant</s><|user|> "},
|
11 |
+
"mixtral":{"name": "TheBloke/Mistral-7B-Instruct-v0.1-GGUF", "file":"mistral-7b-instruct-v0.1.Q4_K_M.gguf", "suffix":"[/INST]", "prefix":"<s>[INST] "},
|
12 |
+
"llama2":{"name": "TheBloke/Llama-2-7B-Chat-GGUF", "file":"llama-2-7b-chat.Q4_K_M.gguf", "suffix":"[/INST]", "prefix":"[INST] <<SYS>> You are a helpful assistant <</SYS>>"},
|
13 |
+
"llama2":{"name": "TheBloke/SOLAR-10.7B-Instruct-v1.0-GGUF", "file":"solar-10.7b-instruct-v1.0.Q4_K_M.gguf", "suffix":"\n### Assistant:\n", "prefix":"### User:\n"}
|
14 |
+
}
|
15 |
+
|
16 |
+
for k in llms.keys():
|
17 |
+
AutoModelForCausalLM.from_pretrained(llms[k]['name'], model_file=llms[k]['file'])
|
18 |
|
19 |
#Pydantic object
|
20 |
class validation(BaseModel):
|
|
|
24 |
|
25 |
@app.post("/llm_on_cpu")
|
26 |
async def stream(item: validation):
|
27 |
+
|
28 |
+
|
29 |
+
|
30 |
+
prefix=llms[item.llm]['prefix']
|
31 |
+
suffix=llms[item.llm]['suffix']
|
32 |
user="""
|
33 |
{prompt}"""
|
34 |
+
|
35 |
+
llm = AutoModelForCausalLM.from_pretrained(llms[item.llm]['name'], model_file=llms[item.llm]['file'])
|
36 |
|
37 |
prompt = f"{prefix}{user.replace('{prompt}', item.prompt)}{suffix}"
|
38 |
return llm(prompt)
|