File size: 1,729 Bytes
2b7e75a
d119bf0
 
b7c945f
 
 
 
 
 
 
e5222d3
718364e
d119bf0
0c5837d
b7c945f
d119bf0
90dab9b
b7c945f
d119bf0
90dab9b
4b045d6
b7c945f
d119bf0
186058d
d119bf0
 
90dab9b
b7c945f
4b045d6
 
e5222d3
4b045d6
 
c3049dc
de2174e
4411e6f
718364e
de2174e
4411e6f
de2174e
 
e3ac9c1
718364e
de2174e
31ec84e
1fe6da2
d0da2f1
 
4b045d6
e3028b3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
FROM nvidia/cuda:12.1.1-devel-ubuntu22.04

# Install necessary packages
RUN apt update && apt install -y \
    git \
    build-essential \
    libopenblas-dev \
    wget \
    python3-pip \
    nodejs \
    npm

# Create a new user to avoid using root
RUN useradd -m -u 1000 user

# Switch to the new user
USER user

# Set environment variables
ENV HOME=/home/user \
    PATH=/home/user/.local/bin:$PATH

# Set the working directory
WORKDIR $HOME/app

# Copy the current directory contents into the container at /home/user/app
COPY --chown=user . $HOME/app

# Install aphrodite-engine from PyPI to handle dependencies
RUN python3 -m pip install aphrodite-engine

# Clone the specific branch of aphrodite-engine for the latest features
RUN git clone --branch feat/exllamav2-support https://github.com/PygmalionAI/aphrodite-engine.git $HOME/aphrodite-engine

# Install additional dependencies
RUN pip install huggingface-hub hf-transfer

# Set environment variable to enable hf-transfer
ENV HF_HUB_ENABLE_HF_TRANSFER=1

# Download the model using huggingface-cli
RUN huggingface-cli download LoneStriker/TinyLlama-1.1B-32k-Instruct-8.0bpw-h8-exl2 --local-dir $HOME/goliath-gptq --local-dir-use-symlinks False --cache-dir $HOME/cache

# Expose the port the API server will listen on
EXPOSE 7860
RUN pip install aioprometheus
ENV PYTHONPATH=$HOME/aphrodite-engine

# Command to run the API server from the cloned directory
CMD ["/bin/bash", "-c", "cd $HOME/aphrodite-engine/aphrodite/endpoints/kobold && /bin/python3 api_server.py -q exl2 --dtype auto -gmu 0.95 --kv-cache-dtype fp8_e5m2 --max-num-seqs 15 --served-model-name \"BagelMIsteryTour-v2-8x7B-AWQ\" --enforce-eager -tp 4 --port 7860 --host 0.0.0.0 --model ~/goliath-gptq"]