from ctransformers import AutoModelForCausalLM from fastapi import FastAPI from pydantic import BaseModel from llama_cpp import Llama llm = Llama.from_pretrained( repo_id="TheBloke/TowerInstruct-7B-v0.1-GGUF", filename="towerinstruct-7b-v0.1.Q5_K_M.gguf", n_ctx = 4096, ) #Pydantic object class validation(BaseModel): prompt: str #Fast API app = FastAPI() def translate_zh_to_en(llm, text): response = llm.create_chat_completion( messages = [ { "role": "user", "content": f"Translate the following text from Chinese into English.\nChinese: {text}\nEnglish:" } ], temperature=0.2, max_tokens=2048 ) # Assuming the response from llm.create_chat_completion is stored in a variable called response content = response['choices'][0]['message']['content'] return content # <|im_start|>user # Translate the following text from Portuguese into English. # Portuguese: Um grupo de investigadores lançou um novo modelo para tarefas relacionadas com tradução. # English:<|im_end|> # <|im_start|>assistant @app.post("/translate") async def stream(item: validation): return translate_zh_to_en(llm, item.prompt)