File size: 2,662 Bytes
5d5e38b 7585b58 15be350 5d5e38b 5a4673f 5d5e38b 5a4673f 7585b58 5d5e38b 7585b58 15be350 aec30a9 15be350 5d5e38b aec30a9 15be350 7585b58 d223d66 5d5e38b 15be350 7585b58 707f408 15be350 707f408 5d5e38b 15be350 7585b58 5d5e38b aec30a9 15be350 aec30a9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
#!/bin/bash
MODEL_DIR="maykeye_tinyllama"
METADATA_FILE="maykeye_tinyllama-metadata.json"
###############################################################################
# Pull both model folder, llamafile (for the engine) and llama.cpp (for the conversion script)
echo == Prep Enviroment ==
git submodule update --init
###############################################################################
echo == Build and prep the llamafile engine execuable ==
pushd llamafile
make -j8
make
# This is where each executables is located for reference purpose for now as of 2024-04-05
# and was determined by running `sudo make install PREFIX=/usr/local`
# ./o/llamafile/zipalign --> /usr/local/bin/zipalign
# ./o/llama.cpp/main/main --> /usr/local/bin/llamafile
# ./o/llama.cpp/imatrix/imatrix --> /usr/local/bin/llamafile-imatrix
# ./o/llama.cpp/quantize/quantize --> /usr/local/bin/llamafile-quantize
# ./build/llamafile-convert --> /usr/local/bin/llamafile-convert
# ./o/llama.cpp/perplexity/perplexity --> /usr/local/bin/llamafile-perplexity
# ./o/llama.cpp/llava/llava-quantize --> /usr/local/bin/llava-quantize
popd
###############################################################################
echo == What is our llamafile name going to be? ==
./llama.cpp/convert_hf_to_gguf.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --dry-run 2>/dev/null
OUTFILE_PATH=$(./llama.cpp/convert_hf_to_gguf.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --dry-run 2>/dev/null)
OUTFILE_FILE=$(basename ${OUTFILE_PATH})
OUTFILE="${OUTFILE_FILE%.gguf}"
echo We will be aiming to generate $OUTFILE.llamafile
###############################################################################
echo == Convert from safetensor to gguf ==
./llama.cpp/convert_hf_to_gguf.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --verbose
mv ${MODEL_DIR}/${OUTFILE}.gguf ${OUTFILE}.gguf
###############################################################################
echo == Generating Llamafile ==
cp ./llamafile/o/llama.cpp/main/main ${OUTFILE}.llamafile
# Create an .args file with settings defaults
cat >.args <<EOF
-m
${OUTFILE}.gguf
EOF
# zip align engine, gguf and default args
./llamafile/o/llamafile/zipalign -j0 ${OUTFILE}.llamafile ${OUTFILE}.gguf .args
###############################################################################
echo == Test Output ./${OUTFILE}.llamafile ==
./${OUTFILE}.llamafile --cli -p "hello world the gruff man said"
###############################################################################
echo == Useful GGUF Technical Dump ==
./llama.cpp/gguf-py/scripts/gguf_dump.py --markdown ${OUTFILE}.gguf > ${OUTFILE}.md |