Spaces:
Runtime error
Runtime error
feat: try to use llama.cpp server to load gguf model
Browse files- Dockerfile +13 -28
- README.md +5 -3
- loadmodel.json +0 -4
- nginx.conf +0 -41
Dockerfile
CHANGED
@@ -1,42 +1,27 @@
|
|
1 |
-
|
|
|
|
|
2 |
|
3 |
WORKDIR /app
|
4 |
|
5 |
-
RUN apt-get update && apt-get install -y
|
6 |
-
&& wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null \
|
7 |
-
&& echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list \
|
8 |
-
&& apt-get update && apt-get install -y --no-install-recommends \
|
9 |
aria2 \
|
10 |
-
nginx \
|
11 |
-
curl \
|
12 |
unzip \
|
13 |
-
|
14 |
&& rm -rf /var/lib/apt/lists/*
|
15 |
|
16 |
-
RUN aria2c -c -x16 https://github.com/MZWNET/actions/releases/download/
|
17 |
-
&& unzip
|
18 |
-
&&
|
19 |
-
&&
|
20 |
-
|
21 |
-
RUN mkdir -p /etc/nginx/
|
22 |
-
|
23 |
-
COPY nginx.conf /etc/nginx/nginx.conf
|
24 |
-
|
25 |
-
RUN echo '#!/bin/sh\n\
|
26 |
-
(source /opt/intel/oneapi/setvars.sh && /app/nitro) &\n\
|
27 |
-
nginx\n\
|
28 |
-
sleep 5\n\
|
29 |
-
curl -X POST http://127.0.0.1:3928/inferences/llamacpp/loadmodel \\\n\
|
30 |
-
-H "Content-Type: application/json" \\\n\
|
31 |
-
-d @/app/model/loadmodel.json\n\
|
32 |
-
wait' > /start.sh && chmod +x /start.sh
|
33 |
|
34 |
-
|
35 |
|
36 |
ENV PATH="/app:${PATH}"
|
37 |
|
38 |
ENV HF_HOME="/data/.huggingface"
|
39 |
|
40 |
-
EXPOSE
|
41 |
|
42 |
-
CMD ["/
|
|
|
1 |
+
ARG LLAMA_CPP_VERSION
|
2 |
+
|
3 |
+
FROM intel/oneapi-runtime:latest
|
4 |
|
5 |
WORKDIR /app
|
6 |
|
7 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
|
|
|
|
|
8 |
aria2 \
|
|
|
|
|
9 |
unzip \
|
10 |
+
clinfo \
|
11 |
&& rm -rf /var/lib/apt/lists/*
|
12 |
|
13 |
+
RUN aria2c -c -x16 https://github.com/MZWNET/actions/releases/download/llama_cpp-$(LLAMA_CPP_VERSION)/llama-$(LLAMA_CPP_VERSION)-bin-linux-avx2-intel-mkl-x64.zip \
|
14 |
+
&& unzip llama-$(LLAMA_CPP_VERSION)-bin-linux-avx2-intel-mkl-x64.zip -d /app/llama.cpp \
|
15 |
+
&& mv /app/llama.cpp/server /app/ \
|
16 |
+
&& chmod +x /app/server \
|
17 |
+
&& rm -rf llama-$(LLAMA_CPP_VERSION)-bin-linux-avx2-intel-mkl-x64.zip llama.cpp
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
+
RUN clinfo -l
|
20 |
|
21 |
ENV PATH="/app:${PATH}"
|
22 |
|
23 |
ENV HF_HOME="/data/.huggingface"
|
24 |
|
25 |
+
EXPOSE 1145
|
26 |
|
27 |
+
CMD ["server", "-m ~/.cache/huggingface/hub/mzwing/AquilaChat2-7B-16K-GGUF/AquilaChat2-7B-16K.Q8_0.gguf -t $(nproc) -c 16384 -a AquilaChat2-7B-16K --port 1145 --api-key sk-1145141919810"]
|
README.md
CHANGED
@@ -1,19 +1,21 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
emoji: π
|
4 |
colorFrom: blue
|
5 |
colorTo: blue
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
license: mit
|
9 |
-
short_description: AquilaChat2-7B-16K-GGUF, loaded by
|
10 |
app_port: 1145
|
11 |
custom_headers:
|
12 |
cross-origin-embedder-policy: credentialless
|
13 |
cross-origin-opener-policy: same-origin
|
14 |
cross-origin-resource-policy: cross-origin
|
|
|
|
|
15 |
preload_from_hub:
|
16 |
- mzwing/AquilaChat2-7B-16K-GGUF AquilaChat2-7B-16K.Q8_0.gguf
|
17 |
---
|
18 |
|
19 |
-
AquilaChat2-7B-16K-GGUF, loaded by
|
|
|
1 |
---
|
2 |
+
title: AquilaChat2-7B-16K-GGUF
|
3 |
emoji: π
|
4 |
colorFrom: blue
|
5 |
colorTo: blue
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
license: mit
|
9 |
+
short_description: AquilaChat2-7B-16K-GGUF, loaded by llama.cpp server.
|
10 |
app_port: 1145
|
11 |
custom_headers:
|
12 |
cross-origin-embedder-policy: credentialless
|
13 |
cross-origin-opener-policy: same-origin
|
14 |
cross-origin-resource-policy: cross-origin
|
15 |
+
models:
|
16 |
+
- mzwing/AquilaChat2-7B-16K-GGUF
|
17 |
preload_from_hub:
|
18 |
- mzwing/AquilaChat2-7B-16K-GGUF AquilaChat2-7B-16K.Q8_0.gguf
|
19 |
---
|
20 |
|
21 |
+
AquilaChat2-7B-16K-GGUF, loaded by llama.cpp server.
|
loadmodel.json
DELETED
@@ -1,4 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"llama_model_path": "~/.cache/huggingface/hub/mzwing/AquilaChat2-7B-16K-GGUF/AquilaChat2-7B-16K.Q8_0.gguf",
|
3 |
-
"ctx_len": 16384
|
4 |
-
}
|
|
|
|
|
|
|
|
|
|
nginx.conf
DELETED
@@ -1,41 +0,0 @@
|
|
1 |
-
worker_processes auto;
|
2 |
-
|
3 |
-
events {
|
4 |
-
worker_connections 1024;
|
5 |
-
}
|
6 |
-
|
7 |
-
http {
|
8 |
-
include mime.types;
|
9 |
-
default_type application/octet-stream;
|
10 |
-
|
11 |
-
sendfile on;
|
12 |
-
keepalive_timeout 65;
|
13 |
-
|
14 |
-
proxy_connect_timeout 600;
|
15 |
-
proxy_send_timeout 600;
|
16 |
-
proxy_read_timeout 600;
|
17 |
-
send_timeout 600;
|
18 |
-
|
19 |
-
map $http_upgrade $connection_upgrade {
|
20 |
-
default upgrade;
|
21 |
-
'' close;
|
22 |
-
}
|
23 |
-
|
24 |
-
server {
|
25 |
-
listen 1145;
|
26 |
-
|
27 |
-
location / {
|
28 |
-
proxy_pass http://127.0.0.1:3928;
|
29 |
-
proxy_set_header Host $host;
|
30 |
-
proxy_set_header X-Real-IP $remote_addr;
|
31 |
-
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
32 |
-
proxy_set_header Upgrade $http_upgrade;
|
33 |
-
proxy_set_header Connection $connection_upgrade;
|
34 |
-
}
|
35 |
-
|
36 |
-
location ~ ^/(inferences/llamacpp/loadmodel/|inferences/llamacpp/unloadmodel/) {
|
37 |
-
deny all;
|
38 |
-
return 403;
|
39 |
-
}
|
40 |
-
}
|
41 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|