# Create and change to the directory | |
mkdir -p DeepSeek-V2-Chat.Q2_K.gguf | |
cd DeepSeek-V2-Chat.Q2_K.gguf | |
# Download the GGUF files | |
for i in {1..5}; do | |
wget "https://huggingface.co/leafspark/DeepSeek-V2-Chat-GGUF/resolve/main/DeepSeek-V2-Chat.q2_k.gguf/DeepSeek-V2-Chat.Q2_K-0000$i-of-00005.gguf?download=true" -O DeepSeek-V2-Chat.Q2_K-0000$i-of-00005.gguf | |
done | |
# Download the llama.cpp binaries based on the OS | |
case "$(uname -s)" in | |
Linux) | |
wget https://github.com/ggerganov/llama.cpp/releases/download/b2961/llama-b2961-bin-ubuntu-x64.zip | |
unzip llama-b2961-bin-ubuntu-x64.zip -d . | |
;; | |
Darwin) | |
if [[ $(uname -m) == 'arm64' ]]; then | |
wget https://github.com/ggerganov/llama.cpp/releases/download/b2961/llama-b2961-bin-macos-arm64.zip | |
unzip llama-b2961-bin-macos-arm64.zip -d . | |
else | |
wget https://github.com/ggerganov/llama.cpp/releases/download/b2961/llama-b2961-bin-macos-x64.zip | |
unzip llama-b2961-bin-macos-x64.zip -d . | |
fi | |
;; | |
esac | |
# Execute the server command | |
./server \ | |
-m DeepSeek-V2-Chat.q2_k.gguf \ | |
-c 4096 \ | |
-i \ | |
--mlock | |
--override-kv deepseek2.attention.q_lora_rank=int:1536 | |
--override-kv deepseek2.attention.kv_lora_rank=int:512 | |
--override-kv deepseek2.expert_shared_count=int:2 | |
--override-kv deepseek2.expert_feed_forward_length=int:1536 | |
--override-kv deepseek2.leading_dense_block_count=int:1 |