mofosyne commited on
Commit
5d5e38b
1 Parent(s): 4ff23e8

include llamafile repo as a submodule and make build process more self contained

Browse files
Files changed (6) hide show
  1. .args +0 -1
  2. .gitmodules +3 -0
  3. TinyLLama-v0-5M-F16.llamafile +2 -2
  4. llama.cpp +1 -1
  5. llamafile +1 -0
  6. llamafile-creation.sh +28 -12
.args CHANGED
@@ -1,3 +1,2 @@
1
  -m
2
  TinyLLama-v0-5M-F16.gguf
3
- ...
 
1
  -m
2
  TinyLLama-v0-5M-F16.gguf
 
.gitmodules CHANGED
@@ -4,3 +4,6 @@
4
  [submodule "llama.cpp"]
5
  path = llama.cpp
6
  url = [email protected]:mofosyne/llama.cpp.git
 
 
 
 
4
  [submodule "llama.cpp"]
5
  path = llama.cpp
6
  url = [email protected]:mofosyne/llama.cpp.git
7
+ [submodule "llamafile"]
8
+ path = llamafile
9
+ url = [email protected]:Mozilla-Ocho/llamafile.git
TinyLLama-v0-5M-F16.llamafile CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9847179c1f3a04f49deb90efa62173d02e10f272fe6cfe6325c6277cebd1b054
3
- size 17633471
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f098bd53e7e689289be7322d19304eeea6f3b305ddbcfda4a15a452a1be35d1
3
+ size 17633572
llama.cpp CHANGED
@@ -1 +1 @@
1
- Subproject commit 8f4412980b41ccdc164ff220bfcd564f2a4a86cb
 
1
+ Subproject commit da064a809badd5086d61fd82accbb16ad93cde94
llamafile ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit cb92b32a6dfae4dff06d7333afe51f2b7224f709
llamafile-creation.sh CHANGED
@@ -1,30 +1,46 @@
1
- #!/bin/sh
2
 
 
 
3
  # Pull both the model folder and llama.cpp (for the conversion script)
4
- git submodule update --init
5
 
6
- # Convert from safetensor to gguf
7
- # (Assuming llama.cpp is in the next folder)
8
- ./llama.cpp/convert.py maykeye_tinyllama --metadata maykeye_tinyllama-metadata.json
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- # Copy the generated gguf to this folder
 
 
11
  mv maykeye_tinyllama/TinyLLama-v0-5M-F16.gguf TinyLLama-v0-5M-F16.gguf
12
 
13
- # Get the llamafile engine
14
- cp /usr/local/bin/llamafile TinyLLama-v0-5M-F16.llamafile
15
 
16
  # Create an .args file with settings defaults
17
  cat >.args <<EOF
18
  -m
19
  TinyLLama-v0-5M-F16.gguf
20
- ...
21
  EOF
22
 
23
- # Combine
24
  zipalign -j0 \
25
  TinyLLama-v0-5M-F16.llamafile \
26
  TinyLLama-v0-5M-F16.gguf \
27
  .args
28
 
29
- # Test
30
- ./TinyLLama-v0-5M-F16.llamafile --cli -p "hello world the gruff man said"
 
 
1
+ #!/bin/bash
2
 
3
+ ###############################################################################
4
+ echo == Prep Enviroment ==
5
  # Pull both the model folder and llama.cpp (for the conversion script)
6
+ #git submodule update --init
7
 
8
+ ###############################################################################
9
+ echo == Build and prep the llamafile engine execuable ==
10
+ pushd llamafile
11
+ make -j8
12
+ make
13
+ # This is where each executables is located for reference purpose for now as of 2024-04-05
14
+ # and was determined by running `sudo make install PREFIX=/usr/local`
15
+ # ./o/llamafile/zipalign --> /usr/local/bin/zipalign
16
+ # ./o/llama.cpp/main/main --> /usr/local/bin/llamafile
17
+ # ./o/llama.cpp/imatrix/imatrix --> /usr/local/bin/llamafile-imatrix
18
+ # ./o/llama.cpp/quantize/quantize --> /usr/local/bin/llamafile-quantize
19
+ # ./build/llamafile-convert --> /usr/local/bin/llamafile-convert
20
+ # ./o/llama.cpp/perplexity/perplexity --> /usr/local/bin/llamafile-perplexity
21
+ # ./o/llama.cpp/llava/llava-quantize --> /usr/local/bin/llava-quantize
22
+ popd
23
 
24
+ ###############################################################################
25
+ echo == Convert from safetensor to gguf ==
26
+ ./llama.cpp/convert.py maykeye_tinyllama --outtype f16 --metadata maykeye_tinyllama-metadata.json
27
  mv maykeye_tinyllama/TinyLLama-v0-5M-F16.gguf TinyLLama-v0-5M-F16.gguf
28
 
29
+ echo == Generating Llamafile ==
30
+ cp ./llamafile/o/llama.cpp/main/main TinyLLama-v0-5M-F16.llamafile
31
 
32
  # Create an .args file with settings defaults
33
  cat >.args <<EOF
34
  -m
35
  TinyLLama-v0-5M-F16.gguf
 
36
  EOF
37
 
38
+ # zip align engine, gguf and default args
39
  zipalign -j0 \
40
  TinyLLama-v0-5M-F16.llamafile \
41
  TinyLLama-v0-5M-F16.gguf \
42
  .args
43
 
44
+ ###############################################################################
45
+ echo == Test Output ==
46
+ ./TinyLLama-v0-5M-F16.llamafile --cli -p "hello world the gruff man said"