build exllamav2 at beginning

Browse files

Files changed (3) hide show

ipynb/EXL2_Private_Quant_V1.ipynb +52 -51
ipynb/EXL2_Private_Quant_V2.ipynb +2 -1
ipynb/EXL2_Private_Quant_V3.ipynb +2 -1

ipynb/EXL2_Private_Quant_V1.ipynb CHANGED Viewed

@@ -1,33 +1,17 @@
 {
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "provenance": [],
-      "gpuType": "T4"
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    },
-    "accelerator": "GPU"
-  },
   "cells": [
     {
       "cell_type": "markdown",
       "source": [
         "#Quantizing huggingface models to exl2\n",
         "This version of my exl2 quantize colab creates a single quantizaion to download privatly.\\\n",
         "To calculate an estimate for VRAM size use: [NyxKrage/LLM-Model-VRAM-Calculator](https://huggingface.co/spaces/NyxKrage/LLM-Model-VRAM-Calculator)\\\n",
         "Not all models and architectures are compatible with exl2.\\\n",
         "Will upload to private hf repo in future."
-      ],
-      "metadata": {
-        "id": "Ku0ezvyD42ng"
-      }
     },
     {
       "cell_type": "code",
@@ -44,12 +28,19 @@
         "print(\"Installing pip dependencies\")\n",
         "!pip install -q -r requirements.txt\n",
         "!pip install -q huggingface_hub requests tqdm\n",
         "!wget https://raw.githubusercontent.com/oobabooga/text-generation-webui/main/download-model.py\n",
         "modeldw = \"none\""
       ]
     },
     {
       "cell_type": "code",
       "source": [
         "#@title Login to HF (Required only for gated models)\n",
         "#@markdown From my Colab/Kaggle login script on [Anthonyg5005/hf-scripts](https://huggingface.co/Anthonyg5005/hf-scripts/blob/main/HF%20Login%20Snippet%20Kaggle.py)\n",
@@ -75,16 +66,16 @@
         "else:\n",
         "    #if the token is not found then prompt user to provide it:\n",
         "    login(input(\"API token not detected. Enter your HuggingFace (WRITE) token: \"))"
-      ],
-      "metadata": {
-        "cellView": "form",
-        "id": "8Hl3fQmRLybp"
-      },
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
       "source": [
         "#@title ##Choose HF model to download\n",
         "#@markdown Weights must be stored in safetensors\n",
@@ -96,19 +87,19 @@
         "modeldw = f\"{User}/{Repo}\"\n",
         "model = f\"{User}_{Repo}\"\n",
         "!python download-model.py {modeldw}"
-      ],
-      "metadata": {
-        "cellView": "form",
-        "id": "NI1LUMD7H-Zx"
-      },
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
       "source": [
         "#@title Quantize the model\n",
-        "#@markdown ###Takes ~13 minutes to start quantizing first time, then quantization will last based on model size\n",
         "#@markdown Target bits per weight:\n",
         "BPW = \"4.125\" # @param {type:\"string\"}\n",
         "!mkdir {model}-exl2-{BPW}bpw-WD\n",
@@ -123,16 +114,16 @@
         "else:\n",
         "    quant = f\"convert.py -i models/{model} -o {model}-exl2-{BPW}bpw-WD -cf {model}-exl2-{BPW}bpw -b {BPW}\"\n",
         "!python {quant}"
-      ],
-      "metadata": {
-        "id": "8anbEbGyNmBI",
-        "cellView": "form"
-      },
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
       "source": [
         "#@title Zip and download the model\n",
         "!rm -r {model}-exl2-{BPW}bpw-WD\n",
@@ -142,13 +133,23 @@
         "from google.colab import files\n",
         "files.download(f\"{model}-{BPW}bpw.zip\")\n",
         "print(\"Colab download speeds very slow so download will take a while\")"
-      ],
-      "metadata": {
-        "cellView": "form",
-        "id": "XORLS2uPrbma"
-      },
-      "execution_count": null,
-      "outputs": []
     }
-  ]
-}

 {
   "cells": [
     {
       "cell_type": "markdown",
+      "metadata": {
+        "id": "Ku0ezvyD42ng"
+      },
       "source": [
         "#Quantizing huggingface models to exl2\n",
         "This version of my exl2 quantize colab creates a single quantizaion to download privatly.\\\n",
         "To calculate an estimate for VRAM size use: [NyxKrage/LLM-Model-VRAM-Calculator](https://huggingface.co/spaces/NyxKrage/LLM-Model-VRAM-Calculator)\\\n",
         "Not all models and architectures are compatible with exl2.\\\n",
         "Will upload to private hf repo in future."
+      ]
     },
     {
       "cell_type": "code",
         "print(\"Installing pip dependencies\")\n",
         "!pip install -q -r requirements.txt\n",
         "!pip install -q huggingface_hub requests tqdm\n",
+        "!pip install . -q\n",
         "!wget https://raw.githubusercontent.com/oobabooga/text-generation-webui/main/download-model.py\n",
         "modeldw = \"none\""
       ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "8Hl3fQmRLybp"
+      },
+      "outputs": [],
       "source": [
         "#@title Login to HF (Required only for gated models)\n",
         "#@markdown From my Colab/Kaggle login script on [Anthonyg5005/hf-scripts](https://huggingface.co/Anthonyg5005/hf-scripts/blob/main/HF%20Login%20Snippet%20Kaggle.py)\n",
         "else:\n",
         "    #if the token is not found then prompt user to provide it:\n",
         "    login(input(\"API token not detected. Enter your HuggingFace (WRITE) token: \"))"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "NI1LUMD7H-Zx"
+      },
+      "outputs": [],
       "source": [
         "#@title ##Choose HF model to download\n",
         "#@markdown Weights must be stored in safetensors\n",
         "modeldw = f\"{User}/{Repo}\"\n",
         "model = f\"{User}_{Repo}\"\n",
         "!python download-model.py {modeldw}"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "8anbEbGyNmBI"
+      },
+      "outputs": [],
       "source": [
         "#@title Quantize the model\n",
+        "#@markdown ###Quantization time will last based on model size\n",
         "#@markdown Target bits per weight:\n",
         "BPW = \"4.125\" # @param {type:\"string\"}\n",
         "!mkdir {model}-exl2-{BPW}bpw-WD\n",
         "else:\n",
         "    quant = f\"convert.py -i models/{model} -o {model}-exl2-{BPW}bpw-WD -cf {model}-exl2-{BPW}bpw -b {BPW}\"\n",
         "!python {quant}"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "XORLS2uPrbma"
+      },
+      "outputs": [],
       "source": [
         "#@title Zip and download the model\n",
         "!rm -r {model}-exl2-{BPW}bpw-WD\n",
         "from google.colab import files\n",
         "files.download(f\"{model}-{BPW}bpw.zip\")\n",
         "print(\"Colab download speeds very slow so download will take a while\")"
+      ]
     }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "gpuType": "T4",
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}

ipynb/EXL2_Private_Quant_V2.ipynb CHANGED Viewed

@@ -27,6 +27,7 @@
         "print(\"Installing pip dependencies\")\n",
         "!pip install -q -r requirements.txt\n",
         "!pip install -q huggingface_hub requests tqdm\n",
         "#@markdown Uses [download-model.py](https://github.com/oobabooga/text-generation-webui/blob/main/download-model.py) by [oobabooga](https://github.com/oobabooga)\n",
         "!wget https://raw.githubusercontent.com/oobabooga/text-generation-webui/main/download-model.py\n",
         "model = \"none\"\n",
@@ -126,7 +127,7 @@
       "outputs": [],
       "source": [
         "#@title Quantize the model\n",
-        "#@markdown ###Takes ~13 minutes to start quantizing first time, then quantization will last based on model size\n",
         "#@markdown Target bits per weight:\n",
         "BPW = \"4.125\" # @param {type:\"string\"}\n",
         "!mkdir {model}-exl2-{BPW}bpw-WD\n",

         "print(\"Installing pip dependencies\")\n",
         "!pip install -q -r requirements.txt\n",
         "!pip install -q huggingface_hub requests tqdm\n",
+        "!pip install . -q\n",
         "#@markdown Uses [download-model.py](https://github.com/oobabooga/text-generation-webui/blob/main/download-model.py) by [oobabooga](https://github.com/oobabooga)\n",
         "!wget https://raw.githubusercontent.com/oobabooga/text-generation-webui/main/download-model.py\n",
         "model = \"none\"\n",
       "outputs": [],
       "source": [
         "#@title Quantize the model\n",
+        "#@markdown ###Quantization time will last based on model size\n",
         "#@markdown Target bits per weight:\n",
         "BPW = \"4.125\" # @param {type:\"string\"}\n",
         "!mkdir {model}-exl2-{BPW}bpw-WD\n",

ipynb/EXL2_Private_Quant_V3.ipynb CHANGED Viewed

@@ -29,6 +29,7 @@
         "print(\"Installing pip dependencies\")\n",
         "!pip install -q -r requirements.txt\n",
         "!pip install -q huggingface_hub requests tqdm accelerate transformers\n",
         "#@markdown Uses [download-model.py](https://github.com/oobabooga/text-generation-webui/blob/main/download-model.py) and [convert-to-safetensors.py](https://github.com/oobabooga/text-generation-webui/blob/main/convert-to-safetensors.py) by [oobabooga](https://github.com/oobabooga)\n",
         "!wget https://raw.githubusercontent.com/oobabooga/text-generation-webui/main/download-model.py\n",
         "!wget https://raw.githubusercontent.com/oobabooga/text-generation-webui/main/convert-to-safetensors.py\n",
@@ -138,7 +139,7 @@
       "outputs": [],
       "source": [
         "#@title Quantize the model\n",
-        "#@markdown ###Takes ~13 minutes to start quantizing first time, then quantization will last based on model size\n",
         "#@markdown Target bits per weight:\n",
         "BPW = \"4.125\" # @param {type:\"string\"}\n",
         "!mkdir {model}-exl2-{BPW}bpw-WD\n",

         "print(\"Installing pip dependencies\")\n",
         "!pip install -q -r requirements.txt\n",
         "!pip install -q huggingface_hub requests tqdm accelerate transformers\n",
+        "!pip install . -q\n",
         "#@markdown Uses [download-model.py](https://github.com/oobabooga/text-generation-webui/blob/main/download-model.py) and [convert-to-safetensors.py](https://github.com/oobabooga/text-generation-webui/blob/main/convert-to-safetensors.py) by [oobabooga](https://github.com/oobabooga)\n",
         "!wget https://raw.githubusercontent.com/oobabooga/text-generation-webui/main/download-model.py\n",
         "!wget https://raw.githubusercontent.com/oobabooga/text-generation-webui/main/convert-to-safetensors.py\n",
       "outputs": [],
       "source": [
         "#@title Quantize the model\n",
+        "#@markdown ###Quantization time will last based on model size\n",
         "#@markdown Target bits per weight:\n",
         "BPW = \"4.125\" # @param {type:\"string\"}\n",
         "!mkdir {model}-exl2-{BPW}bpw-WD\n",