Spaces:
Runtime error
Runtime error
File size: 1,219 Bytes
4ac8f37 b7c945f e5222d3 718364e 0c5837d b7c945f 90dab9b b7c945f 90dab9b b7c945f 186058d 90dab9b b7c945f de2174e dc6f869 e5222d3 de2174e c3049dc de2174e 4411e6f 718364e de2174e 4411e6f de2174e e3ac9c1 718364e de2174e 31ec84e de2174e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04
RUN apt update && apt install -y \
git \
build-essential \
libopenblas-dev \
wget \
python3-pip \
nodejs \
npm
RUN useradd -m -u 1000 user
USER user
ENV HOME=/home/user \
PATH=/home/user/.local/bin:$PATH
WORKDIR $HOME/app
COPY --chown=user . $HOME/app
# Clone the aphrodite-engine from the specific branch
RUN git clone --branch feat/exllamav2-support https://github.com/PygmalionAI/aphrodite-engine.git $HOME/aphrodite-engine
# Install dependencies from the cloned repository
RUN cd $HOME/aphrodite-engine && pip install -e .
# Install additional dependencies
RUN pip install huggingface-hub hf-transfer
# Set environment variable to enable hf-transfer
ENV HF_HUB_ENABLE_HF_TRANSFER=1
# Download the model using huggingface-cli
RUN huggingface-cli download LoneStriker/TinyLlama-1.1B-32k-Instruct-8.0bpw-h8-exl2 --local-dir $HOME/goliath-gptq --local-dir-use-symlinks False --cache-dir $HOME/cache
# Expose the port the API server will listen on
EXPOSE 7860
# Command to run the API server
CMD ["/bin/bash", "-c", "/bin/python3 -m aphrodite.endpoints.openai.api_server $ENGINE_ARGS --port 7860 --host 0.0.0.0 --model ~/goliath-gptq"] |