More fixes.
#16
by
Lewdiculous
- opened
- gguf-imat-llama-3.py +10 -32
gguf-imat-llama-3.py
CHANGED
@@ -5,7 +5,6 @@ import subprocess
|
|
5 |
import shutil
|
6 |
from huggingface_hub import snapshot_download
|
7 |
|
8 |
-
# Clone or update the llama.cpp repository with --depth 1
|
9 |
def clone_or_update_llama_cpp():
|
10 |
print("Preparing...")
|
11 |
base_dir = os.path.dirname(os.path.abspath(__file__))
|
@@ -18,7 +17,6 @@ def clone_or_update_llama_cpp():
|
|
18 |
os.chdir(base_dir)
|
19 |
print("The 'llama.cpp' repository is ready.")
|
20 |
|
21 |
-
# Download and extract the latest release of llama.cpp Windows binaries
|
22 |
def download_llama_release():
|
23 |
base_dir = os.path.dirname(os.path.abspath(__file__))
|
24 |
dl_dir = os.path.join(base_dir, "bin", "dl")
|
@@ -45,7 +43,6 @@ def download_llama_release():
|
|
45 |
else:
|
46 |
print("Failed to fetch the latest release information.")
|
47 |
|
48 |
-
# Download and extract the Cuda .dll resources if they aren't present in the bin folder
|
49 |
def download_cudart_if_necessary(latest_release_tag):
|
50 |
base_dir = os.path.dirname(os.path.abspath(__file__))
|
51 |
cudart_dl_dir = os.path.join(base_dir, "bin", "dl")
|
@@ -55,7 +52,6 @@ def download_cudart_if_necessary(latest_release_tag):
|
|
55 |
cudart_zip_file = os.path.join(cudart_dl_dir, "cudart-llama-bin-win-cu12.2.0-x64.zip")
|
56 |
cudart_extracted_files = ["cublas64_12.dll", "cublasLt64_12.dll", "cudart64_12.dll"]
|
57 |
|
58 |
-
# Check if all required files exist
|
59 |
if all(os.path.exists(os.path.join(base_dir, "bin", file)) for file in cudart_extracted_files):
|
60 |
print("Cuda resources already exist. Skipping download.")
|
61 |
else:
|
@@ -71,7 +67,6 @@ def download_cudart_if_necessary(latest_release_tag):
|
|
71 |
else:
|
72 |
print("Failed to download the cudart release file.")
|
73 |
|
74 |
-
# Ask for user input to download or fetch from cache the specified model repository if it doesn't exist
|
75 |
def download_model_repo():
|
76 |
base_dir = os.path.dirname(os.path.abspath(__file__))
|
77 |
models_dir = os.path.join(base_dir, "models")
|
@@ -82,36 +77,28 @@ def download_model_repo():
|
|
82 |
model_name = model_id.split("/")[-1]
|
83 |
model_dir = os.path.join(models_dir, model_name)
|
84 |
|
85 |
-
# Check if the model repository already exists
|
86 |
if os.path.exists(model_dir):
|
87 |
print("Model repository already exists. Using existing repository.")
|
88 |
|
89 |
-
# If the model already exists, prompt the user if they want to delete the model directory
|
90 |
delete_model_dir = input("Remove HF model folder after converting original model to GGUF? (yes/no) (default: no): ").strip().lower()
|
91 |
|
92 |
-
# Ask for the name of the imatrix.txt file
|
93 |
imatrix_file_name = input("Enter the name of the imatrix.txt file (default: imatrix.txt): ").strip() or "imatrix.txt"
|
94 |
|
95 |
-
# Convert the existing model to GGUF F16 format and generate imatrix.dat
|
96 |
convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name)
|
97 |
|
98 |
else:
|
99 |
revision = input("Enter the revision (branch, tag, or commit) to download (default: main): ") or "main"
|
100 |
|
101 |
-
# Ask the user if they want to remove the HF model folder after conversion
|
102 |
delete_model_dir = input("Remove HF model folder after converting original model to GGUF? (yes/no) (default: no): ").strip().lower()
|
103 |
|
104 |
print("Downloading model repository...")
|
105 |
snapshot_download(repo_id=model_id, local_dir=model_dir, revision=revision)
|
106 |
print("Model repository downloaded successfully.")
|
107 |
|
108 |
-
# Ask for the name of the imatrix.txt file
|
109 |
imatrix_file_name = input("Enter the name of the imatrix.txt file (default: imatrix.txt): ").strip() or "imatrix.txt"
|
110 |
|
111 |
-
# Convert the downloaded model to GGUF F16 format and generate imatrix.dat
|
112 |
convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name)
|
113 |
|
114 |
-
# Convert the downloaded model to GGUF F16 format
|
115 |
def convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name):
|
116 |
convert_script = os.path.join(base_dir, "llama.cpp", "convert.py")
|
117 |
gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
|
@@ -120,38 +107,30 @@ def convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir,
|
|
120 |
if not os.path.exists(gguf_dir):
|
121 |
os.makedirs(gguf_dir)
|
122 |
|
123 |
-
# Check if F16 file already exists
|
124 |
if not os.path.exists(gguf_model_path):
|
125 |
-
# Execute the conversion command
|
126 |
subprocess.run(["python", convert_script, model_dir, "--outfile", gguf_model_path, "--outtype", "f16", "--vocab-type", "bpe"])
|
127 |
|
128 |
-
# Delete the original model directory under conditions
|
129 |
if delete_model_dir == 'yes' or delete_model_dir == 'y':
|
130 |
shutil.rmtree(model_dir)
|
131 |
print(f"Original model directory '{model_dir}' deleted.")
|
132 |
else:
|
133 |
print(f"Original model directory '{model_dir}' was not deleted. You can remove it manually.")
|
134 |
|
135 |
-
# Generate imatrix.dat if it doesn't exist
|
136 |
imatrix_exe = os.path.join(base_dir, "bin", "imatrix.exe")
|
137 |
-
|
138 |
-
|
139 |
-
if not os.path.exists(
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
print("imatrix.dat
|
146 |
-
else:
|
147 |
-
print("Failed to generate imatrix.dat file.")
|
148 |
else:
|
149 |
-
print("
|
150 |
|
151 |
-
# Quantize the models
|
152 |
quantize_models(base_dir, model_name)
|
153 |
|
154 |
-
# Quantize models with different options
|
155 |
def quantize_models(base_dir, model_name):
|
156 |
gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
|
157 |
f16_gguf_path = os.path.join(gguf_dir, f"{model_name}-F16.gguf")
|
@@ -174,7 +153,6 @@ def quantize_models(base_dir, model_name):
|
|
174 |
f16_gguf_path, quantized_gguf_path, quant_option], cwd=gguf_dir)
|
175 |
print(f"Model quantized with {quant_option} option.")
|
176 |
|
177 |
-
# Main function - Steps
|
178 |
def main():
|
179 |
clone_or_update_llama_cpp()
|
180 |
latest_release_tag = download_llama_release()
|
|
|
5 |
import shutil
|
6 |
from huggingface_hub import snapshot_download
|
7 |
|
|
|
8 |
def clone_or_update_llama_cpp():
|
9 |
print("Preparing...")
|
10 |
base_dir = os.path.dirname(os.path.abspath(__file__))
|
|
|
17 |
os.chdir(base_dir)
|
18 |
print("The 'llama.cpp' repository is ready.")
|
19 |
|
|
|
20 |
def download_llama_release():
|
21 |
base_dir = os.path.dirname(os.path.abspath(__file__))
|
22 |
dl_dir = os.path.join(base_dir, "bin", "dl")
|
|
|
43 |
else:
|
44 |
print("Failed to fetch the latest release information.")
|
45 |
|
|
|
46 |
def download_cudart_if_necessary(latest_release_tag):
|
47 |
base_dir = os.path.dirname(os.path.abspath(__file__))
|
48 |
cudart_dl_dir = os.path.join(base_dir, "bin", "dl")
|
|
|
52 |
cudart_zip_file = os.path.join(cudart_dl_dir, "cudart-llama-bin-win-cu12.2.0-x64.zip")
|
53 |
cudart_extracted_files = ["cublas64_12.dll", "cublasLt64_12.dll", "cudart64_12.dll"]
|
54 |
|
|
|
55 |
if all(os.path.exists(os.path.join(base_dir, "bin", file)) for file in cudart_extracted_files):
|
56 |
print("Cuda resources already exist. Skipping download.")
|
57 |
else:
|
|
|
67 |
else:
|
68 |
print("Failed to download the cudart release file.")
|
69 |
|
|
|
70 |
def download_model_repo():
|
71 |
base_dir = os.path.dirname(os.path.abspath(__file__))
|
72 |
models_dir = os.path.join(base_dir, "models")
|
|
|
77 |
model_name = model_id.split("/")[-1]
|
78 |
model_dir = os.path.join(models_dir, model_name)
|
79 |
|
|
|
80 |
if os.path.exists(model_dir):
|
81 |
print("Model repository already exists. Using existing repository.")
|
82 |
|
|
|
83 |
delete_model_dir = input("Remove HF model folder after converting original model to GGUF? (yes/no) (default: no): ").strip().lower()
|
84 |
|
|
|
85 |
imatrix_file_name = input("Enter the name of the imatrix.txt file (default: imatrix.txt): ").strip() or "imatrix.txt"
|
86 |
|
|
|
87 |
convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name)
|
88 |
|
89 |
else:
|
90 |
revision = input("Enter the revision (branch, tag, or commit) to download (default: main): ") or "main"
|
91 |
|
|
|
92 |
delete_model_dir = input("Remove HF model folder after converting original model to GGUF? (yes/no) (default: no): ").strip().lower()
|
93 |
|
94 |
print("Downloading model repository...")
|
95 |
snapshot_download(repo_id=model_id, local_dir=model_dir, revision=revision)
|
96 |
print("Model repository downloaded successfully.")
|
97 |
|
|
|
98 |
imatrix_file_name = input("Enter the name of the imatrix.txt file (default: imatrix.txt): ").strip() or "imatrix.txt"
|
99 |
|
|
|
100 |
convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name)
|
101 |
|
|
|
102 |
def convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name):
|
103 |
convert_script = os.path.join(base_dir, "llama.cpp", "convert.py")
|
104 |
gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
|
|
|
107 |
if not os.path.exists(gguf_dir):
|
108 |
os.makedirs(gguf_dir)
|
109 |
|
|
|
110 |
if not os.path.exists(gguf_model_path):
|
|
|
111 |
subprocess.run(["python", convert_script, model_dir, "--outfile", gguf_model_path, "--outtype", "f16", "--vocab-type", "bpe"])
|
112 |
|
|
|
113 |
if delete_model_dir == 'yes' or delete_model_dir == 'y':
|
114 |
shutil.rmtree(model_dir)
|
115 |
print(f"Original model directory '{model_dir}' deleted.")
|
116 |
else:
|
117 |
print(f"Original model directory '{model_dir}' was not deleted. You can remove it manually.")
|
118 |
|
|
|
119 |
imatrix_exe = os.path.join(base_dir, "bin", "imatrix.exe")
|
120 |
+
imatrix_output_src = os.path.join(gguf_dir, "imatrix.dat")
|
121 |
+
imatrix_output_dst = os.path.join(gguf_dir, "imatrix.dat")
|
122 |
+
if not os.path.exists(imatrix_output_dst):
|
123 |
+
try:
|
124 |
+
subprocess.run([imatrix_exe, "-m", gguf_model_path, "-f", os.path.join(base_dir, "imatrix", imatrix_file_name), "-ngl", "8"], cwd=gguf_dir)
|
125 |
+
shutil.move(imatrix_output_src, imatrix_output_dst)
|
126 |
+
print("imatrix.dat moved successfully.")
|
127 |
+
except Exception as e:
|
128 |
+
print("Error occurred while moving imatrix.dat:", e)
|
|
|
|
|
129 |
else:
|
130 |
+
print("imatrix.dat already exists in the GGUF folder.")
|
131 |
|
|
|
132 |
quantize_models(base_dir, model_name)
|
133 |
|
|
|
134 |
def quantize_models(base_dir, model_name):
|
135 |
gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
|
136 |
f16_gguf_path = os.path.join(gguf_dir, f"{model_name}-F16.gguf")
|
|
|
153 |
f16_gguf_path, quantized_gguf_path, quant_option], cwd=gguf_dir)
|
154 |
print(f"Model quantized with {quant_option} option.")
|
155 |
|
|
|
156 |
def main():
|
157 |
clone_or_update_llama_cpp()
|
158 |
latest_release_tag = download_llama_release()
|