update script approach to use covert.py --get-outfile flag
Browse files- TinyLLama-v0-5M-F16.gguf +2 -2
- TinyLLama-v0-5M-F16.llamafile +2 -2
- llama.cpp +1 -1
- llamafile +1 -1
- llamafile-creation.sh +14 -9
TinyLLama-v0-5M-F16.gguf
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4dccfdb2707a0747e58dec0e1e21d48ccefafd0e9d74cafbc45803a615074c2
|
3 |
+
size 10008160
|
TinyLLama-v0-5M-F16.llamafile
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a6fee85a6906722ef26d7f8925485654220ac3870d56ec1b8e260919e2dc33a4
|
3 |
+
size 19468831
|
llama.cpp
CHANGED
@@ -1 +1 @@
|
|
1 |
-
Subproject commit
|
|
|
1 |
+
Subproject commit 74fe2ea7a8d713da7378812215213ba74050cda6
|
llamafile
CHANGED
@@ -1 +1 @@
|
|
1 |
-
Subproject commit
|
|
|
1 |
+
Subproject commit 790029485fdd3a81284efdcae1c0483a4d39a6a6
|
llamafile-creation.sh
CHANGED
@@ -1,5 +1,8 @@
|
|
1 |
#!/bin/bash
|
2 |
|
|
|
|
|
|
|
3 |
###############################################################################
|
4 |
# Pull both model folder, llamafile (for the engine) and llama.cpp (for the conversion script)
|
5 |
echo == Prep Enviroment ==
|
@@ -21,27 +24,29 @@ make
|
|
21 |
# ./o/llama.cpp/llava/llava-quantize --> /usr/local/bin/llava-quantize
|
22 |
popd
|
23 |
|
|
|
|
|
|
|
|
|
|
|
24 |
###############################################################################
|
25 |
echo == Convert from safetensor to gguf ==
|
26 |
-
./llama.cpp/convert.py
|
27 |
-
mv
|
28 |
|
29 |
###############################################################################
|
30 |
echo == Generating Llamafile ==
|
31 |
-
cp ./llamafile/o/llama.cpp/main/main
|
32 |
|
33 |
# Create an .args file with settings defaults
|
34 |
cat >.args <<EOF
|
35 |
-m
|
36 |
-
|
37 |
EOF
|
38 |
|
39 |
# zip align engine, gguf and default args
|
40 |
-
./llamafile/o/llamafile/zipalign -j0
|
41 |
-
TinyLLama-v0-5M-F16.llamafile \
|
42 |
-
TinyLLama-v0-5M-F16.gguf \
|
43 |
-
.args
|
44 |
|
45 |
###############################################################################
|
46 |
echo == Test Output ==
|
47 |
-
|
|
|
1 |
#!/bin/bash
|
2 |
|
3 |
+
MODEL_DIR="maykeye_tinyllama"
|
4 |
+
METADATA_FILE="maykeye_tinyllama-metadata.json"
|
5 |
+
|
6 |
###############################################################################
|
7 |
# Pull both model folder, llamafile (for the engine) and llama.cpp (for the conversion script)
|
8 |
echo == Prep Enviroment ==
|
|
|
24 |
# ./o/llama.cpp/llava/llava-quantize --> /usr/local/bin/llava-quantize
|
25 |
popd
|
26 |
|
27 |
+
###############################################################################
|
28 |
+
echo == What is our llamafile name going to be? ==
|
29 |
+
OUTFILE=$(./llama.cpp/convert.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --get-outfile)
|
30 |
+
echo We will be aiming to generate $OUTFILE.llamafile
|
31 |
+
|
32 |
###############################################################################
|
33 |
echo == Convert from safetensor to gguf ==
|
34 |
+
./llama.cpp/convert.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16
|
35 |
+
mv ${MODEL_DIR}/${OUTFILE}.gguf ${OUTFILE}.gguf
|
36 |
|
37 |
###############################################################################
|
38 |
echo == Generating Llamafile ==
|
39 |
+
cp ./llamafile/o/llama.cpp/main/main ${OUTFILE}.llamafile
|
40 |
|
41 |
# Create an .args file with settings defaults
|
42 |
cat >.args <<EOF
|
43 |
-m
|
44 |
+
${OUTFILE}.gguf
|
45 |
EOF
|
46 |
|
47 |
# zip align engine, gguf and default args
|
48 |
+
./llamafile/o/llamafile/zipalign -j0 ${OUTFILE}.llamafile ${OUTFILE}.gguf .args
|
|
|
|
|
|
|
49 |
|
50 |
###############################################################################
|
51 |
echo == Test Output ==
|
52 |
+
./${OUTFILE}.llamafile --cli -p "hello world the gruff man said"
|