Spaces:
Runtime error
Runtime error
File size: 1,729 Bytes
2b7e75a d119bf0 b7c945f e5222d3 718364e d119bf0 0c5837d b7c945f d119bf0 90dab9b b7c945f d119bf0 90dab9b 4b045d6 b7c945f d119bf0 186058d d119bf0 90dab9b b7c945f 4b045d6 e5222d3 4b045d6 c3049dc de2174e 4411e6f 718364e de2174e 4411e6f de2174e e3ac9c1 718364e de2174e 31ec84e 1fe6da2 d0da2f1 4b045d6 e3028b3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
FROM nvidia/cuda:12.1.1-devel-ubuntu22.04
# Install necessary packages
RUN apt update && apt install -y \
git \
build-essential \
libopenblas-dev \
wget \
python3-pip \
nodejs \
npm
# Create a new user to avoid using root
RUN useradd -m -u 1000 user
# Switch to the new user
USER user
# Set environment variables
ENV HOME=/home/user \
PATH=/home/user/.local/bin:$PATH
# Set the working directory
WORKDIR $HOME/app
# Copy the current directory contents into the container at /home/user/app
COPY --chown=user . $HOME/app
# Install aphrodite-engine from PyPI to handle dependencies
RUN python3 -m pip install aphrodite-engine
# Clone the specific branch of aphrodite-engine for the latest features
RUN git clone --branch feat/exllamav2-support https://github.com/PygmalionAI/aphrodite-engine.git $HOME/aphrodite-engine
# Install additional dependencies
RUN pip install huggingface-hub hf-transfer
# Set environment variable to enable hf-transfer
ENV HF_HUB_ENABLE_HF_TRANSFER=1
# Download the model using huggingface-cli
RUN huggingface-cli download LoneStriker/TinyLlama-1.1B-32k-Instruct-8.0bpw-h8-exl2 --local-dir $HOME/goliath-gptq --local-dir-use-symlinks False --cache-dir $HOME/cache
# Expose the port the API server will listen on
EXPOSE 7860
RUN pip install aioprometheus
ENV PYTHONPATH=$HOME/aphrodite-engine
# Command to run the API server from the cloned directory
CMD ["/bin/bash", "-c", "cd $HOME/aphrodite-engine/aphrodite/endpoints/kobold && /bin/python3 api_server.py -q exl2 --dtype auto -gmu 0.95 --kv-cache-dtype fp8_e5m2 --max-num-seqs 15 --served-model-name \"BagelMIsteryTour-v2-8x7B-AWQ\" --enforce-eager -tp 4 --port 7860 --host 0.0.0.0 --model ~/goliath-gptq"]
|