lpetrl commited on
Commit
7626706
1 Parent(s): 37fd751

feat(API): Implemented basic API functionality.

Browse files
.dockerignore ADDED
File without changes
.gitattributes ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ models/zephyr-7b-beta.Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
2
+ file-path filter=lfs diff=lfs merge=lfs -text
.gitignore CHANGED
@@ -14,6 +14,7 @@ dist/
14
  downloads/
15
  eggs/
16
  .eggs/
 
17
  lib/
18
  lib64/
19
  parts/
@@ -120,7 +121,7 @@ celerybeat.pid
120
  *.sage.py
121
 
122
  # Environments
123
- .env
124
  .venv
125
  env/
126
  venv/
 
14
  downloads/
15
  eggs/
16
  .eggs/
17
+ .idea/
18
  lib/
19
  lib64/
20
  parts/
 
121
  *.sage.py
122
 
123
  # Environments
124
+ config.yaml
125
  .venv
126
  env/
127
  venv/
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ COPY requirements.txt ./requirements.txt
4
+
5
+ RUN python -m pip install -U pip && \
6
+ python -m pip install -r requirements.txt && \
7
+ python -m pip cache purge
8
+
9
+ COPY ./src /app/src
10
+ COPY ./models /app/models
11
+
12
+ WORKDIR /app
13
+
14
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastapi==0.85.2
2
+ uvicorn==0.20.0
3
+ llama-cpp-python==0.2.53
4
+ PyYAML==6.0.1
src/api_run.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+
3
+ from src.handlers import router
4
+
5
+
6
+ def get_application() -> FastAPI:
7
+ application = FastAPI()
8
+ application.include_router(router)
9
+
10
+ return application
11
+
12
+
13
+ app = get_application()
src/handlers.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import yaml
2
+
3
+ from llama_cpp import Llama
4
+ from fastapi import APIRouter, status
5
+ from fastapi.responses import JSONResponse
6
+
7
+ from src.modules.dialog_system import ConversationHandler, MessageRole
8
+ from src.modules.data_models import UserMessage, AnswerMessage
9
+
10
+ router = APIRouter()
11
+
12
+ with open('config.yml', 'r') as file:
13
+ router.config = yaml.safe_load(file)
14
+
15
+ router.llm = Llama(
16
+ model_path=router.config['model_path'],
17
+ n_ctx=int(router.config['context_tokens']),
18
+ max_answer_len=int(router.config['max_answer_tokens'])
19
+ )
20
+
21
+ router.conversation = ConversationHandler(
22
+ model=router.llm,
23
+ message_role=MessageRole
24
+ )
25
+
26
+
27
+ @router.get("v1/service/status", status_code=status.HTTP_200_OK)
28
+ async def health() -> AnswerMessage:
29
+ return AnswerMessage(message="OK")
30
+
31
+
32
+ @router.get("v1/chat/completions", response_model=AnswerMessage)
33
+ async def chat_completions(user_message: UserMessage) -> AnswerMessage:
34
+ try:
35
+ router.conversation.send_message(user_message.prompt)
36
+ response = router.conversation.generate_reply()
37
+ return AnswerMessage(message=response)
38
+ except Exception as e:
39
+ return JSONResponse(status_code=500, content={"message": str(e)})
src/modules/data_models.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+
3
+
4
+ class UserMessage(BaseModel):
5
+ prompt: str
6
+
7
+
8
+ class AnswerMessage(BaseModel):
9
+ message: str
src/modules/dialog_system.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+
3
+ from llama_cpp import Llama
4
+ from llama_cpp import ChatCompletionRequestMessage as Message
5
+ from llama_cpp import ChatCompletionRequestSystemMessage as SystemMessage
6
+ from llama_cpp import ChatCompletionRequestAssistantMessage as AssistantMessage
7
+ from llama_cpp import ChatCompletionRequestUserMessage as UserMessage
8
+
9
+
10
+ @dataclass
11
+ class MessageRole:
12
+ ASSISTANT: str = "assistant"
13
+ SYSTEM: str = "system"
14
+ USER: str = "user"
15
+ EXIT: str = "exit"
16
+
17
+
18
+ class ConversationHandler:
19
+ def __init__(self, model: Llama, message_role: MessageRole) -> None:
20
+ self.model: Llama = model
21
+ self.message_role = message_role
22
+ self.messages: list[Message] = [
23
+ SystemMessage(
24
+ role=self.message_role.SYSTEM,
25
+ content='You are a helpful developer assistant, answer all the questions correctly and concisely.'
26
+ ),
27
+ AssistantMessage(role=self.message_role.ASSISTANT, content='Hello, do you have any question?'),
28
+ ]
29
+
30
+ def send_message(self, content: str):
31
+ new_message = UserMessage(role=self.message_role.USER, content=content)
32
+ self.messages.append(new_message)
33
+
34
+ def generate_reply(self) -> str:
35
+ response = self.model.create_chat_completion(
36
+ messages=self.messages,
37
+ temperature=0.7,
38
+ top_p=0.9,
39
+ top_k=20,
40
+ max_tokens=128
41
+ )
42
+
43
+ response_content = response['choices'][0]['message']
44
+ self.messages.append(AssistantMessage(role=self.message_role.ASSISTANT, content=response_content))
45
+
46
+ return response_content