Spaces:

NyxKrage
/

LLM-Model-VRAM-Calculator

Running

File size: 25,935 Bytes

<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <script>
      function strToHtml(str) {
        let parser = new DOMParser();
        return parser.parseFromString(str, "text/html");
      }

      //Short, jQuery-independent function to read html table and write them into an Array.
      //Kudos to RobG at StackOverflow
      function tableToObj(table) {
        var rows = table.rows;
        var propCells = rows[0].cells;
        var propNames = [];
        var results = [];
        var obj, row, cells;

        // Use the first row for the property names
        // Could use a header section but result is the same if
        // there is only one header row
        for (var i = 0, iLen = propCells.length; i < iLen; i++) {
          propNames.push(
            (propCells[i].textContent || propCells[i].innerText).trim()
          );
        }

        // Use the rows for data
        // Could use tbody rows here to exclude header & footer
        // but starting from 1 gives required result
        for (var j = 1, jLen = rows.length; j < jLen; j++) {
          cells = rows[j].cells;
          obj = {};

          for (var k = 0; k < iLen; k++) {
            obj[propNames[k]] = (
              cells[k].textContent || cells[k].innerText
            ).trim();
          }
          results.push(obj);
        }
        return results;
      }

      function formatGpu(gpus) {
        return gpus.map(
          (g) => `${g["Product Name"]} - ${g["Memory"].split(",")[0]}`
        );
      }

      const gguf_quants = {
        "IQ1_S": 1.56,
        "IQ2_XXS": 2.06,
        "IQ2_XS": 2.31,
        "IQ2_S": 2.5,
        "IQ2_M": 2.7,
        "IQ3_XXS": 3.06,
        "IQ3_XS": 3.3,
        "Q2_K": 3.35,
        "Q3_K_S": 3.5,
        "IQ3_S": 3.5,
        "IQ3_M": 3.7,
        "Q3_K_M": 3.91,
        "Q3_K_L": 4.27,
        "IQ4_XS": 4.25,
        "IQ4_NL": 4.5,
        "Q4_0": 4.55,
        "Q4_K_S": 4.58,
        "Q4_K_M": 4.85,
        "Q5_0": 5.54,
        "Q5_K_S": 5.54,
        "Q5_K_M": 5.69,
        "Q6_K": 6.59,
        "Q8_0": 8.5,
      }
  
      async function modelConfig(hf_model, hf_token) {
        auth = hf_token == "" ? {} : {
            headers: {
                'Authorization': `Bearer ${hf_token}`
            }
        }
        let config = await fetch(
          `https://huggingface.co/${hf_model}/raw/main/config.json`, auth
        ).then(r => r.json())
        let model_size = 0
        try {
          model_size = (await fetch(`https://huggingface.co/${hf_model}/resolve/main/model.safetensors.index.json`, auth).then(r => r.json()))["metadata"]["total_size"] / 2
          if (isNaN(model_size)) {
            throw new Erorr("no size in safetensors metadata")
          }
        } catch (e) {
          try {
            model_size = (await fetch(`https://huggingface.co/${hf_model}/resolve/main/pytorch_model.bin.index.json`, auth).then(r => r.json()))["metadata"]["total_size"] / 2
            if (isNaN(model_size)) {
              throw new Erorr("no size in pytorch metadata")
            }
          } catch {
            let model_page = await fetch(
                "https://corsproxy.io/?" + encodeURIComponent(`https://huggingface.co/${hf_model}`)
            ).then(r => r.text())
            let el = document.createElement( 'html' );
            el.innerHTML = model_page
            let params_el = el.querySelector('div[data-target="ModelSafetensorsParams"]')
            if (params_el !== null) {
              model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["safetensors"]["total"]
            } else {
              params_el = el.querySelector('div[data-target="ModelHeader"]')
              model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["model"]["safetensors"]["total"]
            }
          }
        }
        config.parameters = model_size
        return config
      }

      function inputBuffer(context=8192, model_config, bsz=512) {
        /* Calculation taken from github:ggerganov/llama.cpp/llama.cpp:11248
          ctx->inp_tokens  = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_batch);
          ctx->inp_embd    = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, hparams.n_embd, cparams.n_batch);
          ctx->inp_pos     = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_batch);
          ctx->inp_KQ_mask = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, cparams.n_ctx, cparams.n_batch);
          ctx->inp_K_shift = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_ctx);
          ctx->inp_sum     = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, 1, cparams.n_batch);

          n_embd is hidden size (github:ggeranov/llama.cpp/convert.py:248)
        */
        const inp_tokens = bsz
        const inp_embd = model_config["hidden_size"] * bsz
        const inp_pos = bsz
        const inp_KQ_mask = context * bsz
        const inp_K_shift = context
        const inp_sum = bsz

        return inp_tokens + inp_embd + inp_pos + inp_KQ_mask + inp_K_shift + inp_sum
      }

      function computeBuffer(context=8192, model_config, bsz=512) {
        if (bsz != 512) {
          alert("batch size other than 512 is currently not supported for the compute buffer, using batchsize 512 for compute buffer calculation, end result result will be an overestimatition")
        }
        return (context / 1024 * 2 + 0.75) * model_config["num_attention_heads"] * 1024 * 1024
      }

      function kvCache(context=8192, model_config, cache_bit=16) {
        const n_gqa = model_config["num_attention_heads"] / model_config["num_key_value_heads"]
        const n_embd_gqa = model_config["hidden_size"] / n_gqa
        const n_elements = n_embd_gqa * (model_config["num_hidden_layers"] * context)
        const size = 2 * n_elements
        return size * (cache_bit / 8)
      }

      function contextSize(context=8192, model_config, bsz=512, cache_bit=16) {
        return Number.parseFloat((inputBuffer(context, model_config, bsz) + kvCache(context, model_config, cache_bit) + computeBuffer(context, model_config, bsz)).toFixed(2))
      }

      function modelSize(model_config, bpw=4.5) {
        return Number.parseFloat((model_config["parameters"] * bpw / 8).toFixed(2))
      }

      async function calculateSizes(format) {
        try {
          const model_config = await modelConfig(document.getElementById("modelsearch").value, document.getElementById("hf_token").value)
          const context = parseInt(document.getElementById("contextsize").value)
          let bsz = 512
          let cache_bit = 16
          let bpw = 0
          if (format === "gguf") {
            bsz = parseInt(document.getElementById("batchsize").value)
            bpw = gguf_quants[document.getElementById("quantsize").innerText]
  
          } else if (format == "exl2") {
            cache_bit = Number.parseInt(document.getElementById("kvCache").value)
            bpw = Number.parseFloat(document.getElementById("bpw").value)
          }
  
          const model_size = modelSize(model_config, bpw)
          const context_size = contextSize(context, model_config, bsz, cache_bit)
          const total_size = ((model_size + context_size) / 2**30)
          document.getElementById("resultmodel").innerText = (model_size / 2**30).toFixed(2)
          document.getElementById("resultcontext").innerText = (context_size / 2**30).toFixed(2)
          const result_total_el = document.getElementById("resulttotal");
          result_total_el.innerText = total_size.toFixed(2)
  
          const gpu = document.getElementById("gpusearch").value
          if (gpu !== "") {
            const vram = parseFloat(gpu.split("-")[1].replace("GB", "").trim())
            if (vram - total_size > 0.5) {
              result_total_el.style.backgroundColor = "#bef264"
            } else if (vram - total_size > 0) {
              result_total_el.style.backgroundColor = "#facc15"
            } else {
              result_total_el.style.backgroundColor = "#ef4444"
            }
          }
        } catch(e) {
          alert(e);
        }
      }
    </script>
    <link href="./styles.css" rel="stylesheet">
    <title>Can I run it? - LLM VRAM Calculator</title>
  </head>
  <body class="p-8">
    <div x-data="{ format: 'gguf' }" class="flex flex-col max-h-screen items-center mt-16 gap-10">
      <h1 class="text-xl font-semibold leading-6 text-gray-900">
        LLM Model, Can I run it?
      </h1>
      <p>
        To support gated or private repos, you need to <a href="https://huggingface.co/settings/tokens" style="color: #4444ff"><b>create an authentification token</b></a>, to check the box <span style="color: #6e1818"><b>"Read access to contents of all public gated repos you can access"</b></span> and then enter the token in the field below.
      </p>
      
      <div class="flex flex-col gap-10">
        <div class="w-auto flex flex-col gap-4">
          <!-- Huggingface Authentification Token -->
          <div
            class="relative"
            x-data="{
              results: null,
              query: null
            }"
          >
            <label
              for="gpusearch"
              class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
              >Huggingface Token (optional)</label
            >
            <input
              class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
              id="hf_token"
            />
          </div>
          <!-- GPU Selector -->
          <div
            class="relative"
            x-data="{
              results: null,
              query: null
            }"
          >
            <label
              for="gpusearch"
              class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
              >GPU (optional)</label
            >
            <input
              class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
              placeholder="GeForce RTX 3090 - 24 GB"
              id="gpusearch"
              name="gpusearch"
              list="gpulist"
              x-model="query"
              @keypress.debounce.150ms="results = query === '' ? [] : formatGpu(tableToObj(strToHtml(await fetch('https://corsproxy.io/?https://www.techpowerup.com/gpu-specs/?ajaxsrch=' + query).then(r => r.text())).querySelector('table')))"
            />
            <datalist id="gpulist">
              <template x-for="item in results">
                <option :value="item" x-text="item"></option>
              </template>
            </datalist>
          </div>
          <!-- Model Selector -->


          <div class="flex flex-row gap-4 relative">
            <label
              for="contextsize"
              class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
            >
              Model (unquantized)
            </label>
            <div
              class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
              x-data="{
                          open: false,
                          value: 'Nexusflow/Starling-LM-7B-beta',
                          results: null,
                          toggle() {
                            if (this.open) {
                              return this.close()
                            }

                            this.$refs.input.focus()
                
                            this.open = true
                          },
                          close(focusAfter) {
                            if (! this.open) return
              
                            this.open = false
              
                            focusAfter && focusAfter.focus()
                          }
                        }"
              x-on:keydown.escape.prevent.stop="close($refs.input)"
              x-id="['model-typeahead']"
              class="relative"
            >
              <!-- Input -->
              <input
                id="modelsearch"
                x-ref="input"
                x-on:click="toggle()"
                @keypress.debounce.150ms="results = (await
                    fetch('https://huggingface.co/api/quicksearch?type=model&q=' +
                    encodeURIComponent(value)).then(r => r.json())).models.filter(m => !m.id.includes('GGUF') && !m.id.includes('AWQ') && !m.id.includes('GPTQ') && !m.id.includes('exl2'));"
                :aria-expanded="open"
                :aria-controls="$id('model-typeahead')"
                x-model="value"
                class="flex justify-between items-center gap-2 w-full"
              />

              <!-- Panel -->
              <div
                x-ref="panel"
                x-show="open"
                x-transition.origin.top.left
                x-on:click.outside="close($refs.input)"
                :id="$id('model-typeahead')"
                style="display: none"
                class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10"
              >
                <template x-for="result in results">
                  <a
                    @click="value = result.id; close($refs.input)"
                    x-text="result.id"
                    class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500"
                  ></a>
                </template>
              </div>
            </div>
          </div>


          <!-- Context Size Selector -->
          <div class="relative">
            <label
              for="contextsize"
              class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
            >
              Context Size
            </label>
            <input
              value="8192"
              type="number"
              name="contextsize"
              id="contextsize"
              step="1024"
              class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
            />
          </div>
          <!-- Quant Format Selector -->
          <div class="relative">
            <label
              class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
              >Quant Format</label
            >
            <fieldset
              x-model="format"
              class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
            >
              <legend class="sr-only">Quant format</legend>
              <div
                class="space-y-4 sm:flex sm:items-center sm:space-x-10 sm:space-y-0"
              >
                <div class="flex items-center">
                  <input
                    id="gguf-format"
                    name="quant-format"
                    type="radio"
                    value="gguf"
                    checked
                    class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600"
                  />
                  <label
                    for="gguf-format"
                    class="ml-3 block text-sm font-medium leading-6 text-gray-900"
                    >GGUF</label
                  >
                </div>
                <div class="flex items-center">
                  <input
                    id="exl2-format"
                    name="quant-format"
                    type="radio"
                    value="exl2"
                    class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600"
                  />
                  <label
                    for="exl2-format"
                    class="ml-3 block text-sm font-medium leading-6 text-gray-900"
                    >EXL2</label
                  >
                </div>
                <div class="flex items-center">
                  <input
                    id="gptq-format"
                    name="quant-format"
                    type="radio"
                    disabled
                    value="gptq"
                    class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600"
                  />
                  <label
                    for="gptq-format"
                    class="ml-3 block text-sm font-medium leading-6 text-gray-900"
                    >GPTQ (coming soon)</label
                  >
                </div>
              </div>
            </fieldset>
          </div>
          <!-- EXL2 Options -->
          <div x-show="format === 'exl2'" class="flex flex-row gap-4">
            <div class="relative flex-grow">
              <label
                for="bpw"
                class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
              >
                BPW
              </label>
              <input
                value="4.5"
                type="number"
                step="0.01"
                id="bpw"
                name="bpw"
                class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
              />
            </div>
            <div
              class="flex-shrink relative rounded-md"
            >
              <div
                class="w-fit p-3 h-full flex items-center gap-2 justify-center rounded-md border-0 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
              >
                <label
                  for="kvCache"
                  class="inline-block bg-white text-xs font-medium text-gray-900"
                >
                  KV Cache
                </label>
                <select id="kvCache" name="kvCache">
                  <option value="16">16 bit</option>
                  <option value="8">8 bit</option>
                  <option value="4">4 bit</option>
                </select>
              </div>
            </div>
          </div>
          <!-- GGUF Options -->
          <div x-show="format === 'gguf'" class="relative">
            <div class="flex flex-row gap-4">
              <label
                for="contextsize"
                class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
              >
                Quantization Size
              </label>
              <div
                class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
                x-data="{
                open: false,
                value: '',
                toggle() {
                  if (this.open) {
                    return this.close()
                  }

                  this.$refs.button.focus()
      
                  this.open = true
                },
                close(focusAfter) {
                  if (! this.open) return
    
                  this.open = false
    
                  focusAfter && focusAfter.focus()
                }
              }"
                x-on:keydown.escape.prevent.stop="close($refs.button)"
                x-id="['dropdown-button']"
                class="relative"
              >
                <!-- Button -->
                <button
                  x-ref="button"
                  x-on:click="toggle()"
                  :aria-expanded="open"
                  :aria-controls="$id('dropdown-button')"
                  type="button"
                  id="quantsize"
                  x-text="value.length === 0 ? 'Q4_K_S' : value"
                  class="flex justify-between items-center gap-2 w-full"
                >
                  Q4_K_S

                  <!-- Heroicon: chevron-down -->
                  <svg
                    xmlns="http://www.w3.org/2000/svg"
                    class="h-5 w-5 text-gray-400"
                    viewBox="0 0 20 20"
                    fill="currentColor"
                  >
                    <path
                      fill-rule="evenodd"
                      d="M5.293 7.293a1 1 0 011.414 0L10 10.586l3.293-3.293a1 1 0 111.414 1.414l-4 4a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414z"
                      clip-rule="evenodd"
                    />
                  </svg>
                </button>

                <!-- Panel -->
                <div
                  x-data="{ quants: [
                  'IQ1_S',
                  'IQ2_XXS',
                  'IQ2_XS',
                  'IQ2_S',
                  'IQ2_M',
                  'IQ3_XXS',
                  'IQ3_XS',
                  'Q2_K',
                  'Q3_K_S',
                  'IQ3_S',
                  'IQ3_M',
                  'Q3_K_M',
                  'Q3_K_L',
                  'IQ4_XS',
                  'IQ4_NL',
                  'Q4_0',
                  'Q4_K_S',
                  'Q4_K_M',
                  'Q5_0',
                  'Q5_K_S',
                  'Q5_K_M',
                  'Q6_K',
                  'Q8_0'
                ]}"
                  x-ref="panel"
                  x-show="open"
                  x-transition.origin.top.left
                  x-on:click.outside="close($refs.button)"
                  :id="$id('dropdown-button')"
                  style="display: none"
                  class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10"
                >
                  <template x-for="quant in quants">
                    <a
                      @click="value = quant; close($refs.button)"
                      x-text="quant"
                      class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500"
                    ></a>
                  </template>
                </div>
              </div>
              <div class="relative">
                <label
                  for="batchsize"
                  class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
                >
                  Batch Size
                </label>
                <input
                  value="512"
                  type="number"
                  step="128"
                  id="batchsize"
                  class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
                />
              </div>
            </div>
          </div>
          <button
            type="button"
            class="rounded-md bg-slate-800 px-3 py-2 text-sm font-semibold text-white shadow-sm hover:bg-slate-700 focus-visible:outline focus-visible:outline-2 focus-visible:outline-offset-2 focus-visible:outline-indigo-600"
            @click="calculateSizes(format)"
          >
            Submit
          </button>
        </div>
        <div class="w-auto flex flex-col gap-4">
          <div class="relative">
            <label
              class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
            >
              Model Size (GB)
            </label>
            <div
              id="resultmodel"
              class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
            >4.20</div>
          </div>
          <div class="relative">
            <label
              class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
            >
              Context Size (GB)
            </label>
            <div
              id="resultcontext"
              class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
            >6.90</div>
          </div>
          <div class="relative">
            <label
              class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
            >
              Total Size (GB)
            </label>
            <div
              id="resulttotal"
              class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
            >420.69</div>
          </div>
        </div>
      </div>
    </div>
    <script
      src="https://cdn.jsdelivr.net/npm/[email protected]/dist/cdn.min.js"
    ></script>
    <script defer>
      calculateSizes("gguf")
    </script>
  </body>
</html>