Spaces:
Sleeping
Sleeping
from ctransformers import AutoModelForCausalLM | |
from fastapi import FastAPI | |
from pydantic import BaseModel | |
from llama_cpp import Llama | |
llm = Llama.from_pretrained( | |
repo_id="TheBloke/TowerInstruct-7B-v0.1-GGUF", | |
filename="towerinstruct-7b-v0.1.Q5_K_M.gguf", | |
n_ctx = 4096, | |
) | |
#Pydantic object | |
class validation(BaseModel): | |
prompt: str | |
#Fast API | |
app = FastAPI() | |
def translate_zh_to_en(llm, text): | |
response = llm.create_chat_completion( | |
messages = [ | |
{ | |
"role": "user", | |
"content": f"Translate the following text from Chinese into English.\nChinese: {text}\nEnglish:" | |
} | |
], | |
temperature=0.2, | |
max_tokens=2048 | |
) | |
# Assuming the response from llm.create_chat_completion is stored in a variable called response | |
content = response['choices'][0]['message']['content'] | |
return content | |
# <|im_start|>user | |
# Translate the following text from Portuguese into English. | |
# Portuguese: Um grupo de investigadores lançou um novo modelo para tarefas relacionadas com tradução. | |
# English:<|im_end|> | |
# <|im_start|>assistant | |
async def stream(item: validation): | |
return translate_zh_to_en(llm, item.prompt) | |