#!/bin/bash ############################################################################### echo == Prep Enviroment == # Pull both the model folder and llama.cpp (for the conversion script) #git submodule update --init ############################################################################### echo == Build and prep the llamafile engine execuable == pushd llamafile make -j8 make # This is where each executables is located for reference purpose for now as of 2024-04-05 # and was determined by running `sudo make install PREFIX=/usr/local` # ./o/llamafile/zipalign --> /usr/local/bin/zipalign # ./o/llama.cpp/main/main --> /usr/local/bin/llamafile # ./o/llama.cpp/imatrix/imatrix --> /usr/local/bin/llamafile-imatrix # ./o/llama.cpp/quantize/quantize --> /usr/local/bin/llamafile-quantize # ./build/llamafile-convert --> /usr/local/bin/llamafile-convert # ./o/llama.cpp/perplexity/perplexity --> /usr/local/bin/llamafile-perplexity # ./o/llama.cpp/llava/llava-quantize --> /usr/local/bin/llava-quantize popd ############################################################################### echo == Convert from safetensor to gguf == ./llama.cpp/convert.py maykeye_tinyllama --outtype f16 --metadata maykeye_tinyllama-metadata.json mv maykeye_tinyllama/TinyLLama-v0-5M-F16.gguf TinyLLama-v0-5M-F16.gguf echo == Generating Llamafile == cp ./llamafile/o/llama.cpp/main/main TinyLLama-v0-5M-F16.llamafile # Create an .args file with settings defaults cat >.args <