FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 # Install necessary packages RUN apt update && apt install -y \ git \ build-essential \ libopenblas-dev \ wget \ python3-pip \ nodejs \ npm # Set up a new user named "user" with user ID 1000 RUN useradd -m -u 1000 user # Switch to the "user" user USER user # Set home to the user's home directory and adjust PATH ENV HOME=/home/user \ PATH=/home/user/.local/bin:$PATH # Set the working directory to the user's home directory WORKDIR $HOME/app # Copy the current directory contents into the container at $HOME/app setting the owner to the user COPY --chown=user . $HOME/app # Install aphrodite-engine and clone repository RUN python3 -m pip install aphrodite-engine # Install Hugging Face libraries RUN pip install huggingface-hub hf-transfer # Set environment variable to enable hf-transfer ENV HF_HUB_ENABLE_HF_TRANSFER=1 # Attempt to download goliath model - this line might cause delays due to model size. RUN huggingface-cli download TheBloke/goliath-120b-gptq --local-dir $HOME/goliath-gptq --local-dir-use-symlinks False --cache-dir $HOME/cache # Set environment variable for a bypass server (not sure how this is being used in your application) ENV BYPASS_SERVER=http://localhost:8080 # Expose the port for the API server EXPOSE 7860 # Run the aphrodite engine API server CMD /bin/bash -c "/bin/python3 -m aphrodite.endpoints.kobold.api_server $ENGINE_ARGS --port 7860 --model ~/goliath-gptq"