mzwing commited on
Commit
d425760
β€’
1 Parent(s): c05d4db

feat: try to use llama.cpp server to load gguf model

Browse files
Files changed (4) hide show
  1. Dockerfile +13 -28
  2. README.md +5 -3
  3. loadmodel.json +0 -4
  4. nginx.conf +0 -41
Dockerfile CHANGED
@@ -1,42 +1,27 @@
1
- FROM debian:bookworm-slim
 
 
2
 
3
  WORKDIR /app
4
 
5
- RUN apt-get update && apt-get install -y apt-transport-https ca-certificates wget gpg curl \
6
- && wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null \
7
- && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list \
8
- && apt-get update && apt-get install -y --no-install-recommends \
9
  aria2 \
10
- nginx \
11
- curl \
12
  unzip \
13
- intel-oneapi-mkl \
14
  && rm -rf /var/lib/apt/lists/*
15
 
16
- RUN aria2c -c -x16 https://github.com/MZWNET/actions/releases/download/nitro-v0.3.21/nitro-0.3.21-linux-amd64-intel-mkl-avx2.zip \
17
- && unzip nitro-0.3.21-linux-amd64-intel-mkl-avx2.zip -d /app \
18
- && chmod +x /app/nitro \
19
- && rm -rf nitro-0.3.21-linux-amd64-intel-mkl-avx2.zip
20
-
21
- RUN mkdir -p /etc/nginx/
22
-
23
- COPY nginx.conf /etc/nginx/nginx.conf
24
-
25
- RUN echo '#!/bin/sh\n\
26
- (source /opt/intel/oneapi/setvars.sh && /app/nitro) &\n\
27
- nginx\n\
28
- sleep 5\n\
29
- curl -X POST http://127.0.0.1:3928/inferences/llamacpp/loadmodel \\\n\
30
- -H "Content-Type: application/json" \\\n\
31
- -d @/app/model/loadmodel.json\n\
32
- wait' > /start.sh && chmod +x /start.sh
33
 
34
- COPY loadmodel.json /app/model/loadmodel.json
35
 
36
  ENV PATH="/app:${PATH}"
37
 
38
  ENV HF_HOME="/data/.huggingface"
39
 
40
- EXPOSE 80
41
 
42
- CMD ["/start.sh"]
 
1
+ ARG LLAMA_CPP_VERSION
2
+
3
+ FROM intel/oneapi-runtime:latest
4
 
5
  WORKDIR /app
6
 
7
+ RUN apt-get update && apt-get install -y --no-install-recommends \
 
 
 
8
  aria2 \
 
 
9
  unzip \
10
+ clinfo \
11
  && rm -rf /var/lib/apt/lists/*
12
 
13
+ RUN aria2c -c -x16 https://github.com/MZWNET/actions/releases/download/llama_cpp-$(LLAMA_CPP_VERSION)/llama-$(LLAMA_CPP_VERSION)-bin-linux-avx2-intel-mkl-x64.zip \
14
+ && unzip llama-$(LLAMA_CPP_VERSION)-bin-linux-avx2-intel-mkl-x64.zip -d /app/llama.cpp \
15
+ && mv /app/llama.cpp/server /app/ \
16
+ && chmod +x /app/server \
17
+ && rm -rf llama-$(LLAMA_CPP_VERSION)-bin-linux-avx2-intel-mkl-x64.zip llama.cpp
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ RUN clinfo -l
20
 
21
  ENV PATH="/app:${PATH}"
22
 
23
  ENV HF_HOME="/data/.huggingface"
24
 
25
+ EXPOSE 1145
26
 
27
+ CMD ["server", "-m ~/.cache/huggingface/hub/mzwing/AquilaChat2-7B-16K-GGUF/AquilaChat2-7B-16K.Q8_0.gguf -t $(nproc) -c 16384 -a AquilaChat2-7B-16K --port 1145 --api-key sk-1145141919810"]
README.md CHANGED
@@ -1,19 +1,21 @@
1
  ---
2
- title: Nitro AquilaChat2-7B-16K-GGUF
3
  emoji: πŸ‘€
4
  colorFrom: blue
5
  colorTo: blue
6
  sdk: docker
7
  pinned: false
8
  license: mit
9
- short_description: AquilaChat2-7B-16K-GGUF, loaded by nitro.
10
  app_port: 1145
11
  custom_headers:
12
  cross-origin-embedder-policy: credentialless
13
  cross-origin-opener-policy: same-origin
14
  cross-origin-resource-policy: cross-origin
 
 
15
  preload_from_hub:
16
  - mzwing/AquilaChat2-7B-16K-GGUF AquilaChat2-7B-16K.Q8_0.gguf
17
  ---
18
 
19
- AquilaChat2-7B-16K-GGUF, loaded by nitro.
 
1
  ---
2
+ title: AquilaChat2-7B-16K-GGUF
3
  emoji: πŸ‘€
4
  colorFrom: blue
5
  colorTo: blue
6
  sdk: docker
7
  pinned: false
8
  license: mit
9
+ short_description: AquilaChat2-7B-16K-GGUF, loaded by llama.cpp server.
10
  app_port: 1145
11
  custom_headers:
12
  cross-origin-embedder-policy: credentialless
13
  cross-origin-opener-policy: same-origin
14
  cross-origin-resource-policy: cross-origin
15
+ models:
16
+ - mzwing/AquilaChat2-7B-16K-GGUF
17
  preload_from_hub:
18
  - mzwing/AquilaChat2-7B-16K-GGUF AquilaChat2-7B-16K.Q8_0.gguf
19
  ---
20
 
21
+ AquilaChat2-7B-16K-GGUF, loaded by llama.cpp server.
loadmodel.json DELETED
@@ -1,4 +0,0 @@
1
- {
2
- "llama_model_path": "~/.cache/huggingface/hub/mzwing/AquilaChat2-7B-16K-GGUF/AquilaChat2-7B-16K.Q8_0.gguf",
3
- "ctx_len": 16384
4
- }
 
 
 
 
 
nginx.conf DELETED
@@ -1,41 +0,0 @@
1
- worker_processes auto;
2
-
3
- events {
4
- worker_connections 1024;
5
- }
6
-
7
- http {
8
- include mime.types;
9
- default_type application/octet-stream;
10
-
11
- sendfile on;
12
- keepalive_timeout 65;
13
-
14
- proxy_connect_timeout 600;
15
- proxy_send_timeout 600;
16
- proxy_read_timeout 600;
17
- send_timeout 600;
18
-
19
- map $http_upgrade $connection_upgrade {
20
- default upgrade;
21
- '' close;
22
- }
23
-
24
- server {
25
- listen 1145;
26
-
27
- location / {
28
- proxy_pass http://127.0.0.1:3928;
29
- proxy_set_header Host $host;
30
- proxy_set_header X-Real-IP $remote_addr;
31
- proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
32
- proxy_set_header Upgrade $http_upgrade;
33
- proxy_set_header Connection $connection_upgrade;
34
- }
35
-
36
- location ~ ^/(inferences/llamacpp/loadmodel/|inferences/llamacpp/unloadmodel/) {
37
- deny all;
38
- return 403;
39
- }
40
- }
41
- }