File size: 2,584 Bytes

5d5e38b
7585b58
15be350
 
 
5d5e38b
5a4673f
5d5e38b
19c0773
7585b58
5d5e38b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7585b58
15be350
 
aec30a9
 
 
 
15be350
 
5d5e38b
 
aec30a9
15be350
7585b58
19c0773
 
 
d223d66
5d5e38b
15be350
7585b58
707f408
 
 
15be350
707f408
 
5d5e38b
15be350
7585b58
5d5e38b
aec30a9
15be350

#!/bin/bash

MODEL_DIR="maykeye_tinyllama"
METADATA_FILE="maykeye_tinyllama-metadata.json"

###############################################################################
# Pull both model folder, llamafile (for the engine) and llama.cpp (for the conversion script)
echo == Prep Enviroment ==
#git submodule update --init

###############################################################################
echo == Build and prep the llamafile engine execuable ==
pushd llamafile
make -j8
make
# This is where each executables is located for reference purpose for now as of 2024-04-05
# and was determined by running `sudo make install PREFIX=/usr/local`
# ./o/llamafile/zipalign --> /usr/local/bin/zipalign
# ./o/llama.cpp/main/main --> /usr/local/bin/llamafile
# ./o/llama.cpp/imatrix/imatrix --> /usr/local/bin/llamafile-imatrix
# ./o/llama.cpp/quantize/quantize --> /usr/local/bin/llamafile-quantize
# ./build/llamafile-convert --> /usr/local/bin/llamafile-convert
# ./o/llama.cpp/perplexity/perplexity --> /usr/local/bin/llamafile-perplexity
# ./o/llama.cpp/llava/llava-quantize --> /usr/local/bin/llava-quantize
popd

###############################################################################
echo == What is our llamafile name going to be? ==
./llama.cpp/convert_hf_to_gguf.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --dry-run 2>/dev/null
OUTFILE_PATH=$(./llama.cpp/convert_hf_to_gguf.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --dry-run 2>/dev/null)
OUTFILE_FILE=$(basename ${OUTFILE_PATH})
OUTFILE="${OUTFILE_FILE%.gguf}"
echo We will be aiming to generate $OUTFILE.llamafile

###############################################################################
echo == Convert from safetensor to gguf ==
./llama.cpp/convert_hf_to_gguf.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --verbose
mv ${MODEL_DIR}/${OUTFILE}.gguf ${OUTFILE}.gguf

# Generate Diagnostics Dumpfile 
./llama.cpp/gguf-py/scripts/gguf_dump.py --markdown ${OUTFILE}.gguf > ${OUTFILE}.dump.md

###############################################################################
echo == Generating Llamafile ==
cp ./llamafile/o/llama.cpp/main/main ${OUTFILE}.llamafile

# Create an .args file with settings defaults
cat >.args <<EOF
-m
${OUTFILE}.gguf
EOF

# zip align engine, gguf and default args
./llamafile/o/llamafile/zipalign -j0 ${OUTFILE}.llamafile ${OUTFILE}.gguf .args

###############################################################################
echo == Test Output ./${OUTFILE}.llamafile ==
./${OUTFILE}.llamafile --cli -p "hello world the gruff man said"