Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -29,7 +29,7 @@ def script_to_use(model_id, api):
|
|
29 |
return "convert.py" if arch in LLAMA_LIKE_ARCHS else "convert-hf-to-gguf.py"
|
30 |
|
31 |
def generate_importance_matrix(model_path, train_data_path):
|
32 |
-
imatrix_command = f"./imatrix -m ../{model_path} -f {train_data_path} -ngl 0"
|
33 |
|
34 |
os.chdir("llama.cpp")
|
35 |
|
@@ -134,32 +134,19 @@ def process_model(model_id, q_method, private_repo, train_data_file, split_model
|
|
134 |
imatrix_path = "llama.cpp/imatrix.dat"
|
135 |
use_imatrix = q_method.startswith("IQ")
|
136 |
|
137 |
-
if use_imatrix:
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
if not os.path.isfile(train_data_path):
|
147 |
-
raise Exception(f"Training data file not found: {train_data_path}")
|
148 |
-
else:
|
149 |
-
# for now it's a decent fallback/default
|
150 |
-
train_data_path = "imatrix_calibration.txt"
|
151 |
-
|
152 |
-
|
153 |
-
print(f"Using fallback training data file: {train_data_path}")
|
154 |
-
|
155 |
-
|
156 |
-
if not os.path.isfile(train_data_path):
|
157 |
-
raise Exception(f"Fallback training data file not found: {train_data_path}")
|
158 |
|
159 |
generate_importance_matrix(fp16, train_data_path)
|
160 |
else:
|
161 |
-
print("
|
162 |
-
|
163 |
|
164 |
username = whoami(oauth_token.token)["name"]
|
165 |
quantized_gguf_name = f"{model_name.lower()}-{q_method.lower()}-imat.gguf"
|
@@ -169,12 +156,10 @@ def process_model(model_id, q_method, private_repo, train_data_file, split_model
|
|
169 |
else:
|
170 |
quantise_ggml = f"./llama.cpp/quantize {fp16} {quantized_gguf_path} {q_method}"
|
171 |
|
172 |
-
|
173 |
print(f"Quantization command: {quantise_ggml}")
|
174 |
|
175 |
result = subprocess.run(quantise_ggml, shell=True, capture_output=True, text=True)
|
176 |
|
177 |
-
|
178 |
print(f"Quantization command stdout: {result.stdout}")
|
179 |
print(f"Quantization command stderr: {result.stderr}")
|
180 |
|
@@ -183,7 +168,6 @@ def process_model(model_id, q_method, private_repo, train_data_file, split_model
|
|
183 |
print(f"Quantized successfully with {q_method} option!")
|
184 |
print(f"Quantized model path: {quantized_gguf_path}")
|
185 |
|
186 |
-
# Create empty repo
|
187 |
new_repo_url = api.create_repo(repo_id=f"{username}/{model_name}-{q_method}-imat.gguf", exist_ok=True, private=private_repo)
|
188 |
new_repo_id = new_repo_url.repo_id
|
189 |
print("Repo created successfully!", new_repo_url)
|
@@ -239,7 +223,7 @@ def process_model(model_id, q_method, private_repo, train_data_file, split_model
|
|
239 |
except Exception as e:
|
240 |
raise Exception(f"Error uploading quantized model: {e}")
|
241 |
|
242 |
-
|
243 |
imatrix_path = "llama.cpp/imatrix.dat"
|
244 |
if os.path.isfile(imatrix_path):
|
245 |
try:
|
|
|
29 |
return "convert.py" if arch in LLAMA_LIKE_ARCHS else "convert-hf-to-gguf.py"
|
30 |
|
31 |
def generate_importance_matrix(model_path, train_data_path):
|
32 |
+
imatrix_command = f"./imatrix -m ../{model_path} -f {train_data_path} -ngl 0" #No GPU on the basic spaces unlike main, it works regardless but takes >2 hours
|
33 |
|
34 |
os.chdir("llama.cpp")
|
35 |
|
|
|
134 |
imatrix_path = "llama.cpp/imatrix.dat"
|
135 |
use_imatrix = q_method.startswith("IQ")
|
136 |
|
137 |
+
if train_data_file and use_imatrix:
|
138 |
+
|
139 |
+
train_data_path = train_data_file.name
|
140 |
+
|
141 |
+
|
142 |
+
print(f"Training data file path: {train_data_path}")
|
143 |
+
|
144 |
+
if not os.path.isfile(train_data_path):
|
145 |
+
raise Exception(f"Training data file not found: {train_data_path}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
|
147 |
generate_importance_matrix(fp16, train_data_path)
|
148 |
else:
|
149 |
+
print("No training data file provided or not using imatrix quantization.")
|
|
|
150 |
|
151 |
username = whoami(oauth_token.token)["name"]
|
152 |
quantized_gguf_name = f"{model_name.lower()}-{q_method.lower()}-imat.gguf"
|
|
|
156 |
else:
|
157 |
quantise_ggml = f"./llama.cpp/quantize {fp16} {quantized_gguf_path} {q_method}"
|
158 |
|
|
|
159 |
print(f"Quantization command: {quantise_ggml}")
|
160 |
|
161 |
result = subprocess.run(quantise_ggml, shell=True, capture_output=True, text=True)
|
162 |
|
|
|
163 |
print(f"Quantization command stdout: {result.stdout}")
|
164 |
print(f"Quantization command stderr: {result.stderr}")
|
165 |
|
|
|
168 |
print(f"Quantized successfully with {q_method} option!")
|
169 |
print(f"Quantized model path: {quantized_gguf_path}")
|
170 |
|
|
|
171 |
new_repo_url = api.create_repo(repo_id=f"{username}/{model_name}-{q_method}-imat.gguf", exist_ok=True, private=private_repo)
|
172 |
new_repo_id = new_repo_url.repo_id
|
173 |
print("Repo created successfully!", new_repo_url)
|
|
|
223 |
except Exception as e:
|
224 |
raise Exception(f"Error uploading quantized model: {e}")
|
225 |
|
226 |
+
# Upload imatrix.dat if it exists
|
227 |
imatrix_path = "llama.cpp/imatrix.dat"
|
228 |
if os.path.isfile(imatrix_path):
|
229 |
try:
|