Spaces:
Sleeping
Sleeping
from llama_cpp import Llama | |
from fastapi import FastAPI | |
from pydantic import BaseModel | |
import requests | |
from ctransformers import AutoModelForCausalLM | |
llm = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v0.6", model_file="ggml-model-q4_0.gguf") | |
#Pydantic object | |
class validation(BaseModel): | |
prompt: str | |
#Fast API | |
app = FastAPI() | |
async def stream(item: validation): | |
prefix="""<|user|> | |
""" | |
suffix="""<|endoftext|><|assistant|>""" | |
user=""" | |
{prompt}""" | |
prompt = f"{prefix}{user.replace('{prompt}', item.prompt)}{suffix}" | |
return llm(prompt) |