File size: 1,219 Bytes
4ac8f37
b7c945f
 
 
 
 
 
 
e5222d3
718364e
0c5837d
b7c945f
90dab9b
b7c945f
90dab9b
 
b7c945f
186058d
90dab9b
b7c945f
de2174e
dc6f869
e5222d3
de2174e
 
c3049dc
de2174e
4411e6f
718364e
de2174e
4411e6f
de2174e
 
e3ac9c1
718364e
de2174e
31ec84e
de2174e
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04
RUN apt update && apt install -y \
    git \
    build-essential \
    libopenblas-dev \
    wget \
    python3-pip \
    nodejs \
    npm

RUN useradd -m -u 1000 user

USER user

ENV HOME=/home/user \
    PATH=/home/user/.local/bin:$PATH

WORKDIR $HOME/app
COPY --chown=user . $HOME/app

# Clone the aphrodite-engine from the specific branch
RUN git clone --branch feat/exllamav2-support https://github.com/PygmalionAI/aphrodite-engine.git $HOME/aphrodite-engine

# Install dependencies from the cloned repository
RUN cd $HOME/aphrodite-engine && pip install -e .

# Install additional dependencies
RUN pip install huggingface-hub hf-transfer

# Set environment variable to enable hf-transfer
ENV HF_HUB_ENABLE_HF_TRANSFER=1

# Download the model using huggingface-cli
RUN huggingface-cli download LoneStriker/TinyLlama-1.1B-32k-Instruct-8.0bpw-h8-exl2 --local-dir $HOME/goliath-gptq --local-dir-use-symlinks False --cache-dir $HOME/cache

# Expose the port the API server will listen on
EXPOSE 7860

# Command to run the API server
CMD ["/bin/bash", "-c", "/bin/python3 -m aphrodite.endpoints.openai.api_server $ENGINE_ARGS --port 7860 --host 0.0.0.0 --model ~/goliath-gptq"]