|
|
|
|
|
from huggingface_hub import login, get_token, whoami, repo_exists, file_exists, upload_folder, create_repo, upload_file, create_branch |
|
import os |
|
import sys |
|
import subprocess |
|
import glob |
|
|
|
|
|
oname = os.name |
|
if oname == 'nt': |
|
osclear = 'cls' |
|
osmv = 'move' |
|
osrmd = 'rmdir /s /q' |
|
oscp = 'copy' |
|
pyt = 'venv\\scripts\\python.exe' |
|
slsh = '\\' |
|
elif oname == 'posix': |
|
osclear = 'clear' |
|
osmv = 'mv' |
|
osrmd = 'rm -r' |
|
oscp = 'cp' |
|
pyt = './venv/bin/python' |
|
slsh = '/' |
|
else: |
|
sys.exit('This script is not compatible with your machine.') |
|
def clear_screen(): |
|
os.system(osclear) |
|
|
|
|
|
if os.environ.get('KAGGLE_KERNEL_RUN_TYPE', None) is not None: |
|
from kaggle_secrets import UserSecretsClient |
|
from kaggle_web_client import BackendError |
|
try: |
|
login(UserSecretsClient().get_secret("HF_TOKEN")) |
|
except BackendError: |
|
print(''' |
|
When using Kaggle, make sure to use the secret key HF_TOKEN with a 'WRITE' token. |
|
This will prevent the need to login every time you run the script. |
|
Set your secrets with the secrets add-on on the top of the screen. |
|
''') |
|
if get_token() is not None: |
|
|
|
login(get_token()) |
|
tfound = "Where are my doritos?" |
|
else: |
|
|
|
login(input("API token not detected. Enter your HuggingFace (WRITE) token: ")) |
|
tfound = "false" |
|
|
|
|
|
while True: |
|
if whoami().get('auth', {}).get('accessToken', {}).get('role', None) != 'write': |
|
clear_screen() |
|
if os.environ.get('HF_TOKEN', None) is not None: |
|
print(''' |
|
You have the environment variable HF_TOKEN set. |
|
You cannot log in. |
|
Either set the environment variable to a 'WRITE' token or remove it. |
|
''') |
|
input("Press enter to continue.") |
|
sys.exit("Exiting...") |
|
if os.environ.get('COLAB_BACKEND_VERSION', None) is not None: |
|
print(''' |
|
Your Colab secret key is read-only |
|
Please switch your key to 'write' or disable notebook access on the left. |
|
''') |
|
sys.exit("Stuck in loop, exiting...") |
|
elif os.environ.get('KAGGLE_KERNEL_RUN_TYPE', None) is not None: |
|
print(''' |
|
Your Kaggle secret key is read-only |
|
Please switch your key to 'write' or unattach from notebook in add-ons at the top. |
|
Having a read-only key attched will require login every time. |
|
''') |
|
print("You do not have write access to this repository. Please use a valid token with (WRITE) access.") |
|
login(input("Enter your HuggingFace (WRITE) token: ")) |
|
continue |
|
break |
|
clear_screen() |
|
|
|
|
|
repo_url = input("Enter unquantized model repository (User/Repo): ") |
|
|
|
|
|
if repo_exists(repo_url) == False: |
|
print(f"Model repo doesn't exist at https://huggingface.co/{repo_url}") |
|
sys.exit("Exiting...") |
|
model = repo_url.replace("/", "_") |
|
modelname = repo_url.split("/")[1] |
|
clear_screen() |
|
|
|
|
|
qmount = int(input("Enter the number of quants you want to create: ")) |
|
qmount += 1 |
|
clear_screen() |
|
|
|
|
|
print(f"Type the BPW for the following {qmount - 1} quants. Recommend staying over 2.4 BPW. Use the vram calculator to find the best BPW values: https://huggingface.co/spaces/NyxKrage/LLM-Model-VRAM-Calculator") |
|
qnum = {} |
|
for i in range(1, qmount): |
|
qnum[f"bpw{i}"] = float(input(f"Enter BPW for quant {i} (2.00-8.00): ")) |
|
clear_screen() |
|
|
|
|
|
bpwvalue = list(qnum.values()) |
|
|
|
|
|
bpwvalue.sort() |
|
|
|
if not os.path.exists(f"models{slsh}{model}{slsh}converted-st"): |
|
result = subprocess.run(f"{pyt} download-model.py {repo_url}", shell=True) |
|
if result.returncode != 0: |
|
print("Download failed.") |
|
sys.exit("Exiting...") |
|
clear_screen() |
|
|
|
if not glob.glob(f"models/{model}/*.safetensors"): |
|
convertst = input("Couldn't find safetensors model, do you want to convert to safetensors? (y/n): ") |
|
while convertst != 'y' and convertst != 'n': |
|
convertst = input("Please enter 'y' or 'n': ") |
|
if convertst == 'y': |
|
print("Converting weights to safetensors, please wait...") |
|
result = subprocess.run(f"{pyt} convert-to-safetensors.py models{slsh}{model} --output models{slsh}{model}-st", shell=True) |
|
if result.returncode != 0: |
|
print("Converting failed. Please look for a safetensors model or convert model manually.") |
|
sys.exit("Exiting...") |
|
subprocess.run(f"{osrmd} models{slsh}{model}", shell=True) |
|
subprocess.run(f"{osmv} models{slsh}{model}-st models{slsh}{model}", shell=True) |
|
open(f"models{slsh}{model}{slsh}converted-st", 'w').close() |
|
print("Finished converting") |
|
else: |
|
sys.exit("Can't quantize a non-safetensors model. Exiting...") |
|
clear_screen() |
|
|
|
|
|
if repo_exists(f"{whoami().get('name', None)}/{modelname}-exl2") == False: |
|
print("Creating model repository...") |
|
create_repo(f"{whoami().get('name', None)}/{modelname}-exl2", private=True) |
|
print(f"Created repo at https://huggingface.co/{whoami().get('name', None)}/{modelname}-exl2") |
|
|
|
|
|
print("Writing model card...") |
|
with open('./README.md', 'w') as file: |
|
file.write(f"# Exl2 quants for [{modelname}](https://huggingface.co/{repo_url})\n\n") |
|
file.write("## Automatically quantized using the auto quant from [hf-scripts](https://huggingface.co/anthonyg5005/hf-scripts)\n\n") |
|
file.write(f"Would recommend {whoami().get('name', None)} to change up this README to include more info.\n\n") |
|
file.write("### BPW:\n\n") |
|
for bpw in bpwvalue: |
|
file.write(f"[{bpw}](https://huggingface.co/{whoami().get('name', None)}/{modelname}-exl2/tree/{bpw}bpw)\n\n") |
|
print("Created README.md") |
|
|
|
upload_file(path_or_fileobj="README.md", path_in_repo="README.md", repo_id=f"{whoami().get('name', None)}/{modelname}-exl2", commit_message="Add temp README") |
|
print("Uploaded README.md to main") |
|
else: |
|
input("repo already exists, are you resuming a previous process? (Press enter to continue, ctrl+c to exit)") |
|
|
|
|
|
for bpw in bpwvalue: |
|
if os.path.exists(f"{model}-measure{slsh}measurement.json"): |
|
cmdir = False |
|
mskip = f" -m {model}-measure{slsh}measurement.json" |
|
else: |
|
cmdir = True |
|
mskip = "" |
|
print(f"Starting quantization for BPW {bpw}") |
|
os.makedirs(f"{model}-exl2-{bpw}bpw-WD", exist_ok=True) |
|
os.makedirs(f"{model}-exl2-{bpw}bpw", exist_ok=True) |
|
subprocess.run(f"{oscp} models{slsh}{model}{slsh}config.json {model}-exl2-{bpw}bpw-WD", shell=True) |
|
|
|
result = subprocess.run(f"{pyt} exllamav2/convert.py -i models/{model} -o {model}-exl2-{bpw}bpw-WD -cf {model}-exl2-{bpw}bpw -b {bpw}{mskip}", shell=True) |
|
if result.returncode != 0: |
|
print("Quantization failed.") |
|
sys.exit("Exiting...") |
|
if cmdir == True: |
|
os.makedirs(f"{model}-measure", exist_ok=True) |
|
subprocess.run(f"{oscp} {model}-exl2-{bpw}bpw-WD{slsh}measurement.json {model}-measure", shell=True) |
|
open(f"{model}-measure/Delete folder when no more quants are needed from this model", 'w').close() |
|
try: |
|
create_branch(f"{whoami().get('name', None)}/{modelname}-exl2", branch=f"{bpw}bpw") |
|
except: |
|
print(f"Branch {bpw} already exists, trying upload...") |
|
upload_folder(folder_path=f"{model}-exl2-{bpw}bpw", repo_id=f"{whoami().get('name', None)}/{modelname}-exl2", commit_message=f"Add quant for BPW {bpw}", revision=f"{bpw}bpw") |
|
subprocess.run(f"{osrmd} {model}-exl2-{bpw}bpw-WD", shell=True) |
|
subprocess.run(f"{osrmd} {model}-exl2-{bpw}bpw", shell=True) |
|
|
|
if file_exists(f"{whoami().get('name', None)}/{modelname}-exl2", "measurement.json") == False: |
|
upload_file(path_or_fileobj=f"{model}-measure{slsh}measurement.json", path_in_repo="measurement.json", repo_id=f"{whoami().get('name', None)}/{modelname}-exl2", commit_message="Add measurement.json") |
|
|
|
print(f'''Quants available at https://huggingface.co/{whoami().get('name', None)}/{modelname}-exl2 |
|
\nRepo is private, go to https://huggingface.co/{whoami().get('name', None)}/{modelname}-exl2/settings to make public if you'd like.''') |
|
|