Anthonyg5005 commited on
Commit
bdfff0c
1 Parent(s): da00d36

add bf16 to convert

Browse files
README.md CHANGED
@@ -21,7 +21,7 @@ Feel free to send in PRs or use this code however you'd like.\
21
 
22
  - [Manage branches (create/delete)](https://huggingface.co/Anthonyg5005/hf-scripts/blob/main/manage%20branches.py)
23
 
24
- - [EXL2 Single Quant V3](https://colab.research.google.com/drive/1Vc7d6JU3Z35OVHmtuMuhT830THJnzNfS?usp=sharing) **(COLAB)**
25
 
26
  ## work in progress/not tested (ordered by priority)
27
 
 
21
 
22
  - [Manage branches (create/delete)](https://huggingface.co/Anthonyg5005/hf-scripts/blob/main/manage%20branches.py)
23
 
24
+ - [EXL2 Single Quant V3](https://colab.research.google.com/#fileId=https://huggingface.co/Anthonyg5005/hf-scripts/blob/main/ipynb/EXL2_Private_Quant_V3.ipynb) **(COLAB)**
25
 
26
  ## work in progress/not tested (ordered by priority)
27
 
auto-exl2-upload/auto-exl2-upload.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6f43d73fb8f630a2323c707b8d374eb4f8492bb1f9278e0f01dcc24a04b81fc
3
- size 8603
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8957446b8346fe63db8344806062d6476f5e0ed438ac97ca3a37d06636141337
3
+ size 8725
auto-exl2-upload/exl2-quant.py CHANGED
@@ -118,17 +118,17 @@ bpwvalue = list(qnum.values())
118
  bpwvalue.sort()
119
 
120
  #ask to change repo visibility to public on hf hub
121
- priv2pub = input("Do you want to make the repo public after successful quants? (y/n): ")
122
  while priv2pub != 'y' and priv2pub != 'n':
123
- priv2pub = input("Please enter 'y' or 'n': ")
124
  clear_screen()
125
 
126
  #ask to delete original fp16 weights
127
- delmodel = input("Do you want to delete the original model? (Won't delete if paused or failed) (y/N): ")
128
  if delmodel == '':
129
  delmodel = 'n'
130
  while delmodel != 'y' and delmodel != 'n':
131
- delmodel = input("Please enter 'y' or 'n': ")
132
  if delmodel == '':
133
  delmodel = 'n'
134
  clear_screen()
@@ -143,12 +143,19 @@ if not os.path.exists(f"models{slsh}{model}{slsh}converted-st"): #check if model
143
 
144
  #convert to safetensors if bin
145
  if not glob.glob(f"models/{model}/*.safetensors"): #check if safetensors model exists
146
- convertst = input("Couldn't find safetensors model, do you want to convert to safetensors? (y/n): ")
147
  while convertst != 'y' and convertst != 'n':
148
- convertst = input("Please enter 'y' or 'n': ")
 
 
 
 
 
 
 
149
  if convertst == 'y':
150
  print("Converting weights to safetensors, please wait...")
151
- result = subprocess.run(f"{pyt} convert-to-safetensors.py models{slsh}{model} --output models{slsh}{model}-st", shell=True) #convert to safetensors (Credit to oobabooga for this script as well)
152
  if result.returncode != 0:
153
  print("Converting failed. Please look for a safetensors model or convert model manually.")
154
  sys.exit("Exiting...")
@@ -171,7 +178,6 @@ if repo_exists(f"{whoami().get('name', None)}/{modelname}-exl2") == False:
171
  with open('./README.md', 'w') as file:
172
  file.write(f"# Exl2 quants for [{modelname}](https://huggingface.co/{repo_url})\n\n")
173
  file.write("## Automatically quantized using the auto quant script from [hf-scripts](https://huggingface.co/anthonyg5005/hf-scripts)\n\n")
174
- file.write(f"Would recommend {whoami().get('name', None)} to change up this README to include more info.\n\n")
175
  file.write("### BPW:\n\n")
176
  for bpw in bpwvalue:
177
  file.write(f"[{bpw}](https://huggingface.co/{whoami().get('name', None)}/{modelname}-exl2/tree/{bpw}bpw)\\\n")
@@ -208,6 +214,11 @@ for bpw in bpwvalue:
208
  create_branch(f"{whoami().get('name', None)}/{modelname}-exl2", branch=f"{bpw}bpw") #create branch
209
  except:
210
  print(f"Branch {bpw} already exists, trying upload...")
 
 
 
 
 
211
  upload_folder(folder_path=f"{model}-exl2-{bpw}bpw", repo_id=f"{whoami().get('name', None)}/{modelname}-exl2", commit_message=f"Add quant for BPW {bpw}", revision=f"{bpw}bpw") #upload quantized model
212
  subprocess.run(f"{osrmd} {model}-exl2-{bpw}bpw-WD", shell=True) #remove working directory
213
  subprocess.run(f"{osrmd} {model}-exl2-{bpw}bpw", shell=True) #remove compile directory
 
118
  bpwvalue.sort()
119
 
120
  #ask to change repo visibility to public on hf hub
121
+ priv2pub = input("Do you want to make the repo public after successful quants? (y/n): ").lower()
122
  while priv2pub != 'y' and priv2pub != 'n':
123
+ priv2pub = input("Please enter 'y' or 'n': ").lower()
124
  clear_screen()
125
 
126
  #ask to delete original fp16 weights
127
+ delmodel = input("Do you want to delete the original model? (Won't delete if paused or failed) (y/N): ").lower()
128
  if delmodel == '':
129
  delmodel = 'n'
130
  while delmodel != 'y' and delmodel != 'n':
131
+ delmodel = input("Please enter 'y' or 'n': ").lower()
132
  if delmodel == '':
133
  delmodel = 'n'
134
  clear_screen()
 
143
 
144
  #convert to safetensors if bin
145
  if not glob.glob(f"models/{model}/*.safetensors"): #check if safetensors model exists
146
+ convertst = input("Couldn't find safetensors model, do you want to convert to safetensors? (y/n): ").lower()
147
  while convertst != 'y' and convertst != 'n':
148
+ convertst = input("Please enter 'y' or 'n': ").lower()
149
+ convusebf16 = input("Would you like to use bf16 loading? Will reduce ram usage (y/n): ").lower()
150
+ while convusebf16 != 'y' and convusebf16 != 'n':
151
+ convusebf16 = input("Please enter 'y' or 'n': ").lower()
152
+ if convusebf16 == 'y':
153
+ usingbf16 = "--bf16"
154
+ else:
155
+ usingbf16 = ""
156
  if convertst == 'y':
157
  print("Converting weights to safetensors, please wait...")
158
+ result = subprocess.run(f"{pyt} convert-to-safetensors.py models{slsh}{model} --output models{slsh}{model}-st {usingbf16}", shell=True) #convert to safetensors (Credit to oobabooga for this script as well)
159
  if result.returncode != 0:
160
  print("Converting failed. Please look for a safetensors model or convert model manually.")
161
  sys.exit("Exiting...")
 
178
  with open('./README.md', 'w') as file:
179
  file.write(f"# Exl2 quants for [{modelname}](https://huggingface.co/{repo_url})\n\n")
180
  file.write("## Automatically quantized using the auto quant script from [hf-scripts](https://huggingface.co/anthonyg5005/hf-scripts)\n\n")
 
181
  file.write("### BPW:\n\n")
182
  for bpw in bpwvalue:
183
  file.write(f"[{bpw}](https://huggingface.co/{whoami().get('name', None)}/{modelname}-exl2/tree/{bpw}bpw)\\\n")
 
214
  create_branch(f"{whoami().get('name', None)}/{modelname}-exl2", branch=f"{bpw}bpw") #create branch
215
  except:
216
  print(f"Branch {bpw} already exists, trying upload...")
217
+ try:
218
+ os.remove(f"{model}-exl2-{bpw}bpw/README.md") #bypasses encode issue when uploading some models
219
+ print("Deleting model README.")
220
+ except:
221
+ print("Skipping README delete.")
222
  upload_folder(folder_path=f"{model}-exl2-{bpw}bpw", repo_id=f"{whoami().get('name', None)}/{modelname}-exl2", commit_message=f"Add quant for BPW {bpw}", revision=f"{bpw}bpw") #upload quantized model
223
  subprocess.run(f"{osrmd} {model}-exl2-{bpw}bpw-WD", shell=True) #remove working directory
224
  subprocess.run(f"{osrmd} {model}-exl2-{bpw}bpw", shell=True) #remove compile directory
exl2-multi-quant-local/exl2-multi-quant-local.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35482a8018761c89f6c1824910541983876f33b636db008c09d4825b499f5704
3
- size 7424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80ce84370ae0ee56854c6f5c6bee00ece8d3fd133e05acefde4b5c85de553057
3
+ size 7520
exl2-multi-quant-local/exl2-quant.py CHANGED
@@ -90,11 +90,11 @@ bpwvalue = list(qnum.values())
90
  bpwvalue.sort()
91
 
92
  #ask to delete fp16 after done
93
- delmodel = input("Do you want to delete the original model? (Won't delete if paused or failed) (y/N): ")
94
  if delmodel == '':
95
  delmodel = 'n'
96
  while delmodel != 'y' and delmodel != 'n':
97
- delmodel = input("Please enter 'y' or 'n': ")
98
  if delmodel == '':
99
  delmodel = 'n'
100
  if delmodel == 'y':
@@ -112,12 +112,19 @@ if not os.path.exists(f"models{slsh}{model}{slsh}converted-st"): #check if model
112
 
113
  #convert to safetensors if bin
114
  if not glob.glob(f"models/{model}/*.safetensors"): #check if safetensors model exists
115
- convertst = input("Couldn't find safetensors model, do you want to convert to safetensors? (y/n): ")
116
  while convertst != 'y' and convertst != 'n':
117
- convertst = input("Please enter 'y' or 'n': ")
 
 
 
 
 
 
 
118
  if convertst == 'y':
119
  print("Converting weights to safetensors, please wait...")
120
- result = subprocess.run(f"{pyt} convert-to-safetensors.py models{slsh}{model} --output models{slsh}{model}-st", shell=True) #convert to safetensors (Credit to oobabooga for this script as well)
121
  if result.returncode != 0:
122
  print("Converting failed. Please look for a safetensors model or convert model manually.")
123
  sys.exit("Exiting...")
 
90
  bpwvalue.sort()
91
 
92
  #ask to delete fp16 after done
93
+ delmodel = input("Do you want to delete the original model? (Won't delete if paused or failed) (y/N): ").lower()
94
  if delmodel == '':
95
  delmodel = 'n'
96
  while delmodel != 'y' and delmodel != 'n':
97
+ delmodel = input("Please enter 'y' or 'n': ").lower()
98
  if delmodel == '':
99
  delmodel = 'n'
100
  if delmodel == 'y':
 
112
 
113
  #convert to safetensors if bin
114
  if not glob.glob(f"models/{model}/*.safetensors"): #check if safetensors model exists
115
+ convertst = input("Couldn't find safetensors model, do you want to convert to safetensors? (y/n): ").lower()
116
  while convertst != 'y' and convertst != 'n':
117
+ convertst = input("Please enter 'y' or 'n': ").lower()
118
+ convusebf16 = input("Would you like to use bf16 loading? Will reduce ram usage (y/n): ").lower()
119
+ while convusebf16 != 'y' and convusebf16 != 'n':
120
+ convusebf16 = input("Please enter 'y' or 'n': ").lower()
121
+ if convusebf16 == 'y':
122
+ usingbf16 = "--bf16"
123
+ else:
124
+ usingbf16 = ""
125
  if convertst == 'y':
126
  print("Converting weights to safetensors, please wait...")
127
+ result = subprocess.run(f"{pyt} convert-to-safetensors.py models{slsh}{model} --output models{slsh}{model}-st {usingbf16}", shell=True) #convert to safetensors (Credit to oobabooga for this script as well)
128
  if result.returncode != 0:
129
  print("Converting failed. Please look for a safetensors model or convert model manually.")
130
  sys.exit("Exiting...")
ipynb/EXL2_Private_Quant_V3.ipynb CHANGED
@@ -1,23 +1,10 @@
1
  {
2
- "nbformat": 4,
3
- "nbformat_minor": 0,
4
- "metadata": {
5
- "colab": {
6
- "provenance": [],
7
- "gpuType": "T4"
8
- },
9
- "kernelspec": {
10
- "name": "python3",
11
- "display_name": "Python 3"
12
- },
13
- "language_info": {
14
- "name": "python"
15
- },
16
- "accelerator": "GPU"
17
- },
18
  "cells": [
19
  {
20
  "cell_type": "markdown",
 
 
 
21
  "source": [
22
  "#Quantizing huggingface models to exl2\n",
23
  "This version of my exl2 quantize colab creates a single quantizaion to upload privatly.\\\n",
@@ -27,10 +14,7 @@
27
  "#Outdated\n",
28
  "More recent stuff in [Anthonyg5005/hf-scripts](https://huggingface.co/Anthonyg5005/hf-scripts)\\\n",
29
  "If you need to quant a model to exl2 for free, check out the bot from the [Exllama Discord server](https://discord.gg/NSFwVuCjRq)"
30
- ],
31
- "metadata": {
32
- "id": "Ku0ezvyD42ng"
33
- }
34
  },
35
  {
36
  "cell_type": "code",
@@ -57,6 +41,12 @@
57
  },
58
  {
59
  "cell_type": "code",
 
 
 
 
 
 
60
  "source": [
61
  "#@title Login to HF (Required to upload files)\n",
62
  "#@markdown From my Colab/Kaggle login script on [Anthonyg5005/hf-scripts](https://huggingface.co/Anthonyg5005/hf-scripts/blob/main/HF%20Login%20Snippet%20Kaggle.py)\n",
@@ -110,16 +100,16 @@
110
  " login(input(\"Enter your HuggingFace (WRITE) token: \"))\n",
111
  " continue\n",
112
  " break"
113
- ],
114
- "metadata": {
115
- "cellView": "form",
116
- "id": "8Hl3fQmRLybp"
117
- },
118
- "execution_count": null,
119
- "outputs": []
120
  },
121
  {
122
  "cell_type": "code",
 
 
 
 
 
 
123
  "source": [
124
  "#@title ##Choose HF model to download\n",
125
  "#@markdown ###Repo should be formatted as user/repo\n",
@@ -133,22 +123,22 @@
133
  "#@markdown Convert Pytorch weights to Safetensors\n",
134
  "convert_safetensors = False # @param {type:\"boolean\"}\n",
135
  "if convert_safetensors == True:\n",
136
- " !python convert-to-safetensors.py models/{model} --output models/{model}-st\n",
137
  " !rm -r models/{model}\n",
138
  " !mv models/{model}-st models/{model}\n",
139
  " print(\"Finished converting\")\n",
140
  "#@markdown If model files are stored in a pytorch .bin extention then enable convert_safetensors above.\\\n",
141
  "#@markdown ![Example Image](https://huggingface.co/Anthonyg5005/hf-scripts/resolve/main/ipynb/pytorch-example.jpg \"File extension is .bin\")"
142
- ],
143
- "metadata": {
144
- "id": "NI1LUMD7H-Zx",
145
- "cellView": "form"
146
- },
147
- "execution_count": null,
148
- "outputs": []
149
  },
150
  {
151
  "cell_type": "code",
 
 
 
 
 
 
152
  "source": [
153
  "#@title Quantize the model\n",
154
  "#@markdown ###Quantization time will last based on model size\n",
@@ -205,16 +195,16 @@
205
  "else:\n",
206
  " quant = f\"convert.py -i models/{model} -o {model}-exl2-{BPW}bpw-WD -cf {model}-exl2-{BPW}bpw -b {BPW}\"\n",
207
  "!python {quant}"
208
- ],
209
- "metadata": {
210
- "id": "8anbEbGyNmBI",
211
- "cellView": "form"
212
- },
213
- "execution_count": null,
214
- "outputs": []
215
  },
216
  {
217
  "cell_type": "code",
 
 
 
 
 
 
218
  "source": [
219
  "#@title Upload to huggingface privately\n",
220
  "#@markdown You may also set it to public but I'd recommend waiting for my next ipynb that will create mutliple quants and place them all into individual branches.\n",
@@ -225,13 +215,23 @@
225
  "create_repo(f\"{whoami().get('name', None)}/{model}-exl2-{BPW}bpw\", private=True)\n",
226
  "HfApi().upload_folder(folder_path=f\"{model}-exl2-{BPW}bpw\", repo_id=f\"{whoami().get('name', None)}/{model}-exl2-{BPW}bpw\", repo_type=\"model\", commit_message=\"Upload from Colab automation\")\n",
227
  "print(f\"uploaded to https://huggingface.co/{whoami().get('name', None)}/{model}-exl2-{BPW}bpw\")"
228
- ],
229
- "metadata": {
230
- "cellView": "form",
231
- "id": "XORLS2uPrbma"
232
- },
233
- "execution_count": null,
234
- "outputs": []
235
  }
236
- ]
237
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "cells": [
3
  {
4
  "cell_type": "markdown",
5
+ "metadata": {
6
+ "id": "Ku0ezvyD42ng"
7
+ },
8
  "source": [
9
  "#Quantizing huggingface models to exl2\n",
10
  "This version of my exl2 quantize colab creates a single quantizaion to upload privatly.\\\n",
 
14
  "#Outdated\n",
15
  "More recent stuff in [Anthonyg5005/hf-scripts](https://huggingface.co/Anthonyg5005/hf-scripts)\\\n",
16
  "If you need to quant a model to exl2 for free, check out the bot from the [Exllama Discord server](https://discord.gg/NSFwVuCjRq)"
17
+ ]
 
 
 
18
  },
19
  {
20
  "cell_type": "code",
 
41
  },
42
  {
43
  "cell_type": "code",
44
+ "execution_count": null,
45
+ "metadata": {
46
+ "cellView": "form",
47
+ "id": "8Hl3fQmRLybp"
48
+ },
49
+ "outputs": [],
50
  "source": [
51
  "#@title Login to HF (Required to upload files)\n",
52
  "#@markdown From my Colab/Kaggle login script on [Anthonyg5005/hf-scripts](https://huggingface.co/Anthonyg5005/hf-scripts/blob/main/HF%20Login%20Snippet%20Kaggle.py)\n",
 
100
  " login(input(\"Enter your HuggingFace (WRITE) token: \"))\n",
101
  " continue\n",
102
  " break"
103
+ ]
 
 
 
 
 
 
104
  },
105
  {
106
  "cell_type": "code",
107
+ "execution_count": null,
108
+ "metadata": {
109
+ "cellView": "form",
110
+ "id": "NI1LUMD7H-Zx"
111
+ },
112
+ "outputs": [],
113
  "source": [
114
  "#@title ##Choose HF model to download\n",
115
  "#@markdown ###Repo should be formatted as user/repo\n",
 
123
  "#@markdown Convert Pytorch weights to Safetensors\n",
124
  "convert_safetensors = False # @param {type:\"boolean\"}\n",
125
  "if convert_safetensors == True:\n",
126
+ " !python convert-to-safetensors.py models/{model} --output models/{model}-st --bf16 --max-shard-size 1GB\n",
127
  " !rm -r models/{model}\n",
128
  " !mv models/{model}-st models/{model}\n",
129
  " print(\"Finished converting\")\n",
130
  "#@markdown If model files are stored in a pytorch .bin extention then enable convert_safetensors above.\\\n",
131
  "#@markdown ![Example Image](https://huggingface.co/Anthonyg5005/hf-scripts/resolve/main/ipynb/pytorch-example.jpg \"File extension is .bin\")"
132
+ ]
 
 
 
 
 
 
133
  },
134
  {
135
  "cell_type": "code",
136
+ "execution_count": null,
137
+ "metadata": {
138
+ "cellView": "form",
139
+ "id": "8anbEbGyNmBI"
140
+ },
141
+ "outputs": [],
142
  "source": [
143
  "#@title Quantize the model\n",
144
  "#@markdown ###Quantization time will last based on model size\n",
 
195
  "else:\n",
196
  " quant = f\"convert.py -i models/{model} -o {model}-exl2-{BPW}bpw-WD -cf {model}-exl2-{BPW}bpw -b {BPW}\"\n",
197
  "!python {quant}"
198
+ ]
 
 
 
 
 
 
199
  },
200
  {
201
  "cell_type": "code",
202
+ "execution_count": null,
203
+ "metadata": {
204
+ "cellView": "form",
205
+ "id": "XORLS2uPrbma"
206
+ },
207
+ "outputs": [],
208
  "source": [
209
  "#@title Upload to huggingface privately\n",
210
  "#@markdown You may also set it to public but I'd recommend waiting for my next ipynb that will create mutliple quants and place them all into individual branches.\n",
 
215
  "create_repo(f\"{whoami().get('name', None)}/{model}-exl2-{BPW}bpw\", private=True)\n",
216
  "HfApi().upload_folder(folder_path=f\"{model}-exl2-{BPW}bpw\", repo_id=f\"{whoami().get('name', None)}/{model}-exl2-{BPW}bpw\", repo_type=\"model\", commit_message=\"Upload from Colab automation\")\n",
217
  "print(f\"uploaded to https://huggingface.co/{whoami().get('name', None)}/{model}-exl2-{BPW}bpw\")"
218
+ ]
 
 
 
 
 
 
219
  }
220
+ ],
221
+ "metadata": {
222
+ "accelerator": "GPU",
223
+ "colab": {
224
+ "gpuType": "T4",
225
+ "provenance": []
226
+ },
227
+ "kernelspec": {
228
+ "display_name": "Python 3",
229
+ "name": "python3"
230
+ },
231
+ "language_info": {
232
+ "name": "python"
233
+ }
234
+ },
235
+ "nbformat": 4,
236
+ "nbformat_minor": 0
237
+ }
ipynb/Multi_Quant_exl2.ipynb CHANGED
@@ -159,7 +159,7 @@
159
  "\n",
160
  "if not glob.glob(f\"models/{model}/*.safetensors\"): #check if safetensors model exists, if not try converting\n",
161
  " print(\"Converting weights to safetensors, please wait...\")\n",
162
- " result = subprocess.run(f\"{pyt} convert-to-safetensors.py models{slsh}{model} --output models{slsh}{model}-st\", shell=True) #convert to safetensors (Credit to oobabooga for this script as well)\n",
163
  " if result.returncode != 0:\n",
164
  " print(\"Converting failed. Please look for a safetensors/bin model.\")\n",
165
  " sys.exit(\"Exiting...\")\n",
 
159
  "\n",
160
  "if not glob.glob(f\"models/{model}/*.safetensors\"): #check if safetensors model exists, if not try converting\n",
161
  " print(\"Converting weights to safetensors, please wait...\")\n",
162
+ " result = subprocess.run(f\"{pyt} convert-to-safetensors.py models{slsh}{model} --output models{slsh}{model}-st --max-shard-size 1GB --bf16\", shell=True) #convert to safetensors (Credit to oobabooga for this script as well)\n",
163
  " if result.returncode != 0:\n",
164
  " print(\"Converting failed. Please look for a safetensors/bin model.\")\n",
165
  " sys.exit(\"Exiting...\")\n",