Spaces:
Sleeping
Sleeping
File size: 617 Bytes
d20d20b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
from llama_cpp import Llama
from fastapi import FastAPI
from pydantic import BaseModel
import requests
from ctransformers import AutoModelForCausalLM
llm = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v0.6", model_file="ggml-model-q4_0.gguf")
#Pydantic object
class validation(BaseModel):
prompt: str
#Fast API
app = FastAPI()
@app.post("/llm_on_cpu")
async def stream(item: validation):
prefix="""<|user|>
"""
suffix="""<|endoftext|><|assistant|>"""
user="""
{prompt}"""
prompt = f"{prefix}{user.replace('{prompt}', item.prompt)}{suffix}"
return llm(prompt) |