feat(API): Implemented basic API functionality.
Browse files- .dockerignore +0 -0
- .gitattributes +2 -0
- .gitignore +2 -1
- Dockerfile +14 -0
- requirements.txt +4 -0
- src/api_run.py +13 -0
- src/handlers.py +39 -0
- src/modules/data_models.py +9 -0
- src/modules/dialog_system.py +46 -0
.dockerignore
ADDED
File without changes
|
.gitattributes
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
models/zephyr-7b-beta.Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
|
2 |
+
file-path filter=lfs diff=lfs merge=lfs -text
|
.gitignore
CHANGED
@@ -14,6 +14,7 @@ dist/
|
|
14 |
downloads/
|
15 |
eggs/
|
16 |
.eggs/
|
|
|
17 |
lib/
|
18 |
lib64/
|
19 |
parts/
|
@@ -120,7 +121,7 @@ celerybeat.pid
|
|
120 |
*.sage.py
|
121 |
|
122 |
# Environments
|
123 |
-
.
|
124 |
.venv
|
125 |
env/
|
126 |
venv/
|
|
|
14 |
downloads/
|
15 |
eggs/
|
16 |
.eggs/
|
17 |
+
.idea/
|
18 |
lib/
|
19 |
lib64/
|
20 |
parts/
|
|
|
121 |
*.sage.py
|
122 |
|
123 |
# Environments
|
124 |
+
config.yaml
|
125 |
.venv
|
126 |
env/
|
127 |
venv/
|
Dockerfile
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9
|
2 |
+
|
3 |
+
COPY requirements.txt ./requirements.txt
|
4 |
+
|
5 |
+
RUN python -m pip install -U pip && \
|
6 |
+
python -m pip install -r requirements.txt && \
|
7 |
+
python -m pip cache purge
|
8 |
+
|
9 |
+
COPY ./src /app/src
|
10 |
+
COPY ./models /app/models
|
11 |
+
|
12 |
+
WORKDIR /app
|
13 |
+
|
14 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi==0.85.2
|
2 |
+
uvicorn==0.20.0
|
3 |
+
llama-cpp-python==0.2.53
|
4 |
+
PyYAML==6.0.1
|
src/api_run.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI
|
2 |
+
|
3 |
+
from src.handlers import router
|
4 |
+
|
5 |
+
|
6 |
+
def get_application() -> FastAPI:
|
7 |
+
application = FastAPI()
|
8 |
+
application.include_router(router)
|
9 |
+
|
10 |
+
return application
|
11 |
+
|
12 |
+
|
13 |
+
app = get_application()
|
src/handlers.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import yaml
|
2 |
+
|
3 |
+
from llama_cpp import Llama
|
4 |
+
from fastapi import APIRouter, status
|
5 |
+
from fastapi.responses import JSONResponse
|
6 |
+
|
7 |
+
from src.modules.dialog_system import ConversationHandler, MessageRole
|
8 |
+
from src.modules.data_models import UserMessage, AnswerMessage
|
9 |
+
|
10 |
+
router = APIRouter()
|
11 |
+
|
12 |
+
with open('config.yml', 'r') as file:
|
13 |
+
router.config = yaml.safe_load(file)
|
14 |
+
|
15 |
+
router.llm = Llama(
|
16 |
+
model_path=router.config['model_path'],
|
17 |
+
n_ctx=int(router.config['context_tokens']),
|
18 |
+
max_answer_len=int(router.config['max_answer_tokens'])
|
19 |
+
)
|
20 |
+
|
21 |
+
router.conversation = ConversationHandler(
|
22 |
+
model=router.llm,
|
23 |
+
message_role=MessageRole
|
24 |
+
)
|
25 |
+
|
26 |
+
|
27 |
+
@router.get("v1/service/status", status_code=status.HTTP_200_OK)
|
28 |
+
async def health() -> AnswerMessage:
|
29 |
+
return AnswerMessage(message="OK")
|
30 |
+
|
31 |
+
|
32 |
+
@router.get("v1/chat/completions", response_model=AnswerMessage)
|
33 |
+
async def chat_completions(user_message: UserMessage) -> AnswerMessage:
|
34 |
+
try:
|
35 |
+
router.conversation.send_message(user_message.prompt)
|
36 |
+
response = router.conversation.generate_reply()
|
37 |
+
return AnswerMessage(message=response)
|
38 |
+
except Exception as e:
|
39 |
+
return JSONResponse(status_code=500, content={"message": str(e)})
|
src/modules/data_models.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel
|
2 |
+
|
3 |
+
|
4 |
+
class UserMessage(BaseModel):
|
5 |
+
prompt: str
|
6 |
+
|
7 |
+
|
8 |
+
class AnswerMessage(BaseModel):
|
9 |
+
message: str
|
src/modules/dialog_system.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
|
3 |
+
from llama_cpp import Llama
|
4 |
+
from llama_cpp import ChatCompletionRequestMessage as Message
|
5 |
+
from llama_cpp import ChatCompletionRequestSystemMessage as SystemMessage
|
6 |
+
from llama_cpp import ChatCompletionRequestAssistantMessage as AssistantMessage
|
7 |
+
from llama_cpp import ChatCompletionRequestUserMessage as UserMessage
|
8 |
+
|
9 |
+
|
10 |
+
@dataclass
|
11 |
+
class MessageRole:
|
12 |
+
ASSISTANT: str = "assistant"
|
13 |
+
SYSTEM: str = "system"
|
14 |
+
USER: str = "user"
|
15 |
+
EXIT: str = "exit"
|
16 |
+
|
17 |
+
|
18 |
+
class ConversationHandler:
|
19 |
+
def __init__(self, model: Llama, message_role: MessageRole) -> None:
|
20 |
+
self.model: Llama = model
|
21 |
+
self.message_role = message_role
|
22 |
+
self.messages: list[Message] = [
|
23 |
+
SystemMessage(
|
24 |
+
role=self.message_role.SYSTEM,
|
25 |
+
content='You are a helpful developer assistant, answer all the questions correctly and concisely.'
|
26 |
+
),
|
27 |
+
AssistantMessage(role=self.message_role.ASSISTANT, content='Hello, do you have any question?'),
|
28 |
+
]
|
29 |
+
|
30 |
+
def send_message(self, content: str):
|
31 |
+
new_message = UserMessage(role=self.message_role.USER, content=content)
|
32 |
+
self.messages.append(new_message)
|
33 |
+
|
34 |
+
def generate_reply(self) -> str:
|
35 |
+
response = self.model.create_chat_completion(
|
36 |
+
messages=self.messages,
|
37 |
+
temperature=0.7,
|
38 |
+
top_p=0.9,
|
39 |
+
top_k=20,
|
40 |
+
max_tokens=128
|
41 |
+
)
|
42 |
+
|
43 |
+
response_content = response['choices'][0]['message']
|
44 |
+
self.messages.append(AssistantMessage(role=self.message_role.ASSISTANT, content=response_content))
|
45 |
+
|
46 |
+
return response_content
|