Files changed (1) hide show
  1. gguf-imat-llama-3.py +10 -32
gguf-imat-llama-3.py CHANGED
@@ -5,7 +5,6 @@ import subprocess
5
  import shutil
6
  from huggingface_hub import snapshot_download
7
 
8
- # Clone or update the llama.cpp repository with --depth 1
9
  def clone_or_update_llama_cpp():
10
  print("Preparing...")
11
  base_dir = os.path.dirname(os.path.abspath(__file__))
@@ -18,7 +17,6 @@ def clone_or_update_llama_cpp():
18
  os.chdir(base_dir)
19
  print("The 'llama.cpp' repository is ready.")
20
 
21
- # Download and extract the latest release of llama.cpp Windows binaries
22
  def download_llama_release():
23
  base_dir = os.path.dirname(os.path.abspath(__file__))
24
  dl_dir = os.path.join(base_dir, "bin", "dl")
@@ -45,7 +43,6 @@ def download_llama_release():
45
  else:
46
  print("Failed to fetch the latest release information.")
47
 
48
- # Download and extract the Cuda .dll resources if they aren't present in the bin folder
49
  def download_cudart_if_necessary(latest_release_tag):
50
  base_dir = os.path.dirname(os.path.abspath(__file__))
51
  cudart_dl_dir = os.path.join(base_dir, "bin", "dl")
@@ -55,7 +52,6 @@ def download_cudart_if_necessary(latest_release_tag):
55
  cudart_zip_file = os.path.join(cudart_dl_dir, "cudart-llama-bin-win-cu12.2.0-x64.zip")
56
  cudart_extracted_files = ["cublas64_12.dll", "cublasLt64_12.dll", "cudart64_12.dll"]
57
 
58
- # Check if all required files exist
59
  if all(os.path.exists(os.path.join(base_dir, "bin", file)) for file in cudart_extracted_files):
60
  print("Cuda resources already exist. Skipping download.")
61
  else:
@@ -71,7 +67,6 @@ def download_cudart_if_necessary(latest_release_tag):
71
  else:
72
  print("Failed to download the cudart release file.")
73
 
74
- # Ask for user input to download or fetch from cache the specified model repository if it doesn't exist
75
  def download_model_repo():
76
  base_dir = os.path.dirname(os.path.abspath(__file__))
77
  models_dir = os.path.join(base_dir, "models")
@@ -82,36 +77,28 @@ def download_model_repo():
82
  model_name = model_id.split("/")[-1]
83
  model_dir = os.path.join(models_dir, model_name)
84
 
85
- # Check if the model repository already exists
86
  if os.path.exists(model_dir):
87
  print("Model repository already exists. Using existing repository.")
88
 
89
- # If the model already exists, prompt the user if they want to delete the model directory
90
  delete_model_dir = input("Remove HF model folder after converting original model to GGUF? (yes/no) (default: no): ").strip().lower()
91
 
92
- # Ask for the name of the imatrix.txt file
93
  imatrix_file_name = input("Enter the name of the imatrix.txt file (default: imatrix.txt): ").strip() or "imatrix.txt"
94
 
95
- # Convert the existing model to GGUF F16 format and generate imatrix.dat
96
  convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name)
97
 
98
  else:
99
  revision = input("Enter the revision (branch, tag, or commit) to download (default: main): ") or "main"
100
 
101
- # Ask the user if they want to remove the HF model folder after conversion
102
  delete_model_dir = input("Remove HF model folder after converting original model to GGUF? (yes/no) (default: no): ").strip().lower()
103
 
104
  print("Downloading model repository...")
105
  snapshot_download(repo_id=model_id, local_dir=model_dir, revision=revision)
106
  print("Model repository downloaded successfully.")
107
 
108
- # Ask for the name of the imatrix.txt file
109
  imatrix_file_name = input("Enter the name of the imatrix.txt file (default: imatrix.txt): ").strip() or "imatrix.txt"
110
 
111
- # Convert the downloaded model to GGUF F16 format and generate imatrix.dat
112
  convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name)
113
 
114
- # Convert the downloaded model to GGUF F16 format
115
  def convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name):
116
  convert_script = os.path.join(base_dir, "llama.cpp", "convert.py")
117
  gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
@@ -120,38 +107,30 @@ def convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir,
120
  if not os.path.exists(gguf_dir):
121
  os.makedirs(gguf_dir)
122
 
123
- # Check if F16 file already exists
124
  if not os.path.exists(gguf_model_path):
125
- # Execute the conversion command
126
  subprocess.run(["python", convert_script, model_dir, "--outfile", gguf_model_path, "--outtype", "f16", "--vocab-type", "bpe"])
127
 
128
- # Delete the original model directory under conditions
129
  if delete_model_dir == 'yes' or delete_model_dir == 'y':
130
  shutil.rmtree(model_dir)
131
  print(f"Original model directory '{model_dir}' deleted.")
132
  else:
133
  print(f"Original model directory '{model_dir}' was not deleted. You can remove it manually.")
134
 
135
- # Generate imatrix.dat if it doesn't exist
136
  imatrix_exe = os.path.join(base_dir, "bin", "imatrix.exe")
137
- imatrix_output = os.path.join(gguf_dir, "imatrix.dat")
138
- imatrix_txt = os.path.join(base_dir, "imatrix", imatrix_file_name)
139
- if not os.path.exists(imatrix_output):
140
- # Execute the imatrix command
141
- subprocess.run([imatrix_exe, "-m", gguf_model_path, "-f", imatrix_txt, "-ngl", "8"], cwd=gguf_dir)
142
- # Move the imatrix.dat file to the GGUF folder
143
- if os.path.exists(os.path.join(gguf_dir, "imatrix.dat")):
144
- shutil.move(os.path.join(gguf_dir, "imatrix.dat"), gguf_dir)
145
- print("imatrix.dat generated successfully.")
146
- else:
147
- print("Failed to generate imatrix.dat file.")
148
  else:
149
- print("Skipping imatrix generation as imatrix.dat already exists.")
150
 
151
- # Quantize the models
152
  quantize_models(base_dir, model_name)
153
 
154
- # Quantize models with different options
155
  def quantize_models(base_dir, model_name):
156
  gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
157
  f16_gguf_path = os.path.join(gguf_dir, f"{model_name}-F16.gguf")
@@ -174,7 +153,6 @@ def quantize_models(base_dir, model_name):
174
  f16_gguf_path, quantized_gguf_path, quant_option], cwd=gguf_dir)
175
  print(f"Model quantized with {quant_option} option.")
176
 
177
- # Main function - Steps
178
  def main():
179
  clone_or_update_llama_cpp()
180
  latest_release_tag = download_llama_release()
 
5
  import shutil
6
  from huggingface_hub import snapshot_download
7
 
 
8
  def clone_or_update_llama_cpp():
9
  print("Preparing...")
10
  base_dir = os.path.dirname(os.path.abspath(__file__))
 
17
  os.chdir(base_dir)
18
  print("The 'llama.cpp' repository is ready.")
19
 
 
20
  def download_llama_release():
21
  base_dir = os.path.dirname(os.path.abspath(__file__))
22
  dl_dir = os.path.join(base_dir, "bin", "dl")
 
43
  else:
44
  print("Failed to fetch the latest release information.")
45
 
 
46
  def download_cudart_if_necessary(latest_release_tag):
47
  base_dir = os.path.dirname(os.path.abspath(__file__))
48
  cudart_dl_dir = os.path.join(base_dir, "bin", "dl")
 
52
  cudart_zip_file = os.path.join(cudart_dl_dir, "cudart-llama-bin-win-cu12.2.0-x64.zip")
53
  cudart_extracted_files = ["cublas64_12.dll", "cublasLt64_12.dll", "cudart64_12.dll"]
54
 
 
55
  if all(os.path.exists(os.path.join(base_dir, "bin", file)) for file in cudart_extracted_files):
56
  print("Cuda resources already exist. Skipping download.")
57
  else:
 
67
  else:
68
  print("Failed to download the cudart release file.")
69
 
 
70
  def download_model_repo():
71
  base_dir = os.path.dirname(os.path.abspath(__file__))
72
  models_dir = os.path.join(base_dir, "models")
 
77
  model_name = model_id.split("/")[-1]
78
  model_dir = os.path.join(models_dir, model_name)
79
 
 
80
  if os.path.exists(model_dir):
81
  print("Model repository already exists. Using existing repository.")
82
 
 
83
  delete_model_dir = input("Remove HF model folder after converting original model to GGUF? (yes/no) (default: no): ").strip().lower()
84
 
 
85
  imatrix_file_name = input("Enter the name of the imatrix.txt file (default: imatrix.txt): ").strip() or "imatrix.txt"
86
 
 
87
  convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name)
88
 
89
  else:
90
  revision = input("Enter the revision (branch, tag, or commit) to download (default: main): ") or "main"
91
 
 
92
  delete_model_dir = input("Remove HF model folder after converting original model to GGUF? (yes/no) (default: no): ").strip().lower()
93
 
94
  print("Downloading model repository...")
95
  snapshot_download(repo_id=model_id, local_dir=model_dir, revision=revision)
96
  print("Model repository downloaded successfully.")
97
 
 
98
  imatrix_file_name = input("Enter the name of the imatrix.txt file (default: imatrix.txt): ").strip() or "imatrix.txt"
99
 
 
100
  convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name)
101
 
 
102
  def convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name):
103
  convert_script = os.path.join(base_dir, "llama.cpp", "convert.py")
104
  gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
 
107
  if not os.path.exists(gguf_dir):
108
  os.makedirs(gguf_dir)
109
 
 
110
  if not os.path.exists(gguf_model_path):
 
111
  subprocess.run(["python", convert_script, model_dir, "--outfile", gguf_model_path, "--outtype", "f16", "--vocab-type", "bpe"])
112
 
 
113
  if delete_model_dir == 'yes' or delete_model_dir == 'y':
114
  shutil.rmtree(model_dir)
115
  print(f"Original model directory '{model_dir}' deleted.")
116
  else:
117
  print(f"Original model directory '{model_dir}' was not deleted. You can remove it manually.")
118
 
 
119
  imatrix_exe = os.path.join(base_dir, "bin", "imatrix.exe")
120
+ imatrix_output_src = os.path.join(gguf_dir, "imatrix.dat")
121
+ imatrix_output_dst = os.path.join(gguf_dir, "imatrix.dat")
122
+ if not os.path.exists(imatrix_output_dst):
123
+ try:
124
+ subprocess.run([imatrix_exe, "-m", gguf_model_path, "-f", os.path.join(base_dir, "imatrix", imatrix_file_name), "-ngl", "8"], cwd=gguf_dir)
125
+ shutil.move(imatrix_output_src, imatrix_output_dst)
126
+ print("imatrix.dat moved successfully.")
127
+ except Exception as e:
128
+ print("Error occurred while moving imatrix.dat:", e)
 
 
129
  else:
130
+ print("imatrix.dat already exists in the GGUF folder.")
131
 
 
132
  quantize_models(base_dir, model_name)
133
 
 
134
  def quantize_models(base_dir, model_name):
135
  gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
136
  f16_gguf_path = os.path.join(gguf_dir, f"{model_name}-F16.gguf")
 
153
  f16_gguf_path, quantized_gguf_path, quant_option], cwd=gguf_dir)
154
  print(f"Model quantized with {quant_option} option.")
155
 
 
156
  def main():
157
  clone_or_update_llama_cpp()
158
  latest_release_tag = download_llama_release()