Spaces:

webai-community
/

demos

Running

App Files Files Community

Yang Gu commited on Apr 23

Commit

e4aee93

•

1 Parent(s): 1322fe0

Add ort-phi2

Browse files

Files changed (7) hide show

demo/ort-phi2/index.html +378 -0
demo/ort-phi2/models/phi2-int4/config.json +34 -0
demo/ort-phi2/models/phi2-int4/phi2-int4.onnx +3 -0
demo/ort-phi2/models/phi2-int4/tokenizer.json +0 -0
demo/ort-phi2/models/phi2-int4/tokenizer_config.json +323 -0
main.js +18 -24
transformers/transformers.js +0 -0

demo/ort-phi2/index.html ADDED Viewed

	@@ -0,0 +1,378 @@

+<!DOCTYPE html>
+<html>
+<head>
+  <title>Example</title>
+</head>
+<body>
+  <!-- <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web@dev/dist/ort.webgpu.min.js"> </script> -->
+  <script src="https://wp-27.sh.intel.com/workspace/project/onnxruntime/js/web/dist/ort.webgpu.min.js"> </script>
+  <script type="module">
+    import { AutoTokenizer, env } from '../../transformers/transformers.js';
+    function log(i) { console.log(i); document.getElementById('status').innerText += `\n${i}`; }
+    const MODELS = {
+      "tinyllama": { name: "tinyllama", path: "schmuell/TinyLlama-1.1B-Chat-v1.0-int4" },
+      "tinyllama_fp16": { name: "tinyllama-fp16", path: "schmuell/TinyLlama-1.1B-Chat-v1.0-fp16", externaldata: true },
+      "phi2": { name: "phi2", path: "phi2-int4" },
+      "phi2-mb": { name: "phi2-mb", path: "schmuell/phi2-mb", externaldata: true },
+      "stablelm": { name: "stablelm", path: "schmuell/stablelm-2-zephyr-1_6b-int4" },
+    }
+    function getConfig() {
+      const query = window.location.search.substring(1);
+      var config = {
+        model: "phi2",
+        provider: "webgpu",
+        profiler: 0,
+        verbose: 0,
+        threads: 1,
+        trace: 0,
+        csv: 0,
+        max_tokens: 256,
+        local: 1,
+      }
+      let vars = query.split("&");
+      for (var i = 0; i < vars.length; i++) {
+        let pair = vars[i].split("=");
+        if (pair[0] in config) {
+          const key = pair[0];
+          const value = decodeURIComponent(pair[1]);
+          if (typeof config[key] == "number") {
+            config[key] = parseInt(value);
+          }
+          else {
+            config[key] = value;
+          }
+        } else if (pair[0].length > 0) {
+          throw new Error("unknown argument: " + pair[0]);
+        }
+      }
+      if (MODELS[config.model] !== undefined) {
+        config.model = MODELS[config.model];
+      }
+      return config;
+    }
+    class LLM {
+      sess = undefined;
+      profiler = false;
+      trace = false;
+      feed = {};
+      output_tokens = [];
+      eos = 2;
+      need_position_ids = true;
+      stop = false;
+      kv_dims = [];
+      dtype = "float16";
+      constructor() {
+      }
+      async load(model, options) {
+        const provider = options.provider || "webgpu";
+        const verbose = options.verbose;
+        const local = options.local;
+        this.profiler = options.profiler;
+        this.trace = options.trace;
+        const model_path = (local) ? "models/" + model.path : "https://huggingface.co/" + model.path + "/resolve/main";
+        log(`loading... ${model.name},  ${provider}`);
+        const json_bytes = await fetchAndCache(model_path + "/config.json");
+        let textDecoder = new TextDecoder();
+        const model_config = JSON.parse(textDecoder.decode(json_bytes));
+        const model_bytes = await fetchAndCache(model_path + "/phi2-int4.onnx");
+        const externaldata = (model.externaldata) ? await fetchAndCache(model_path + '/onnx/decoder_model_merged.onnx.data') : false;
+        let modelSize = model_bytes.byteLength;
+        if (externaldata) {
+          modelSize += externaldata.byteLength;
+        }
+        log(`model size ${Math.round(modelSize / 1024 / 1024)} MB`);
+        const opt = {
+          executionProviders: [provider],
+          preferredOutputLocation: {},
+        };
+        switch (provider) {
+          case "webgpu":
+            if (!("gpu" in navigator)) {
+              throw new Error("webgpu is NOT supported");
+            }
+            for (let i = 0; i < model_config.num_hidden_layers; ++i) {
+              opt.preferredOutputLocation[`present.${i}.key`] = 'gpu-buffer';
+              opt.preferredOutputLocation[`present.${i}.value`] = 'gpu-buffer';
+            }
+            break;
+          case "webnn":
+            if (!("ml" in navigator)) {
+              throw new Error("webnn is NOT supported");
+            }
+            break;
+        }
+        if (externaldata !== undefined) {
+          opt.externalData = [
+            {
+              data: externaldata,
+              path: 'decoder_model_merged.onnx.data'
+            },
+          ]
+        }
+        if (verbose) {
+          opt.logSeverityLevel = 0;
+          opt.logVerbosityLevel = 0;
+          ort.env.logLevel = "verbose";
+          ort.env.debug = true;
+        }
+        ort.env.webgpu.profiling = {};
+        if (this.profiler) {
+          opt.enableProfiling = true;
+          ort.env.webgpu.profilingMode = 'default';
+          ort.env.webgpu.profiling.mode = 'default';
+        }
+        this.sess = await ort.InferenceSession.create(model_bytes, opt);
+        if (this.trace) {
+          ort.env.trace = true;
+          ort.env.webgpu.profiling.ondata = (version, inputsMetadata, outputsMetadata, kernelId, kernelType,
+            kernelName, programName, startTime, endTime) => { };
+        }
+        this.eos = model_config.eos_token_id;
+        this.kv_dims = [1, model_config.num_key_value_heads, 0, model_config.hidden_size / model_config.num_attention_heads];
+        this.dtype = config.model.dtype || "float16";
+        this.num_layers = model_config.num_hidden_layers;
+        this.initilize_feed();
+      }
+      initilize_feed() {
+        this.feed = {};
+        const empty = (this.dtype === "float16") ? new Uint16Array() : [];
+        for (let i = 0; i < this.num_layers; ++i) {
+          this.feed[`past_key_values.${i}.key`] = new ort.Tensor(this.dtype, empty, this.kv_dims)
+          this.feed[`past_key_values.${i}.value`] = new ort.Tensor(this.dtype, empty, this.kv_dims)
+        }
+        this.output_tokens = [];
+      }
+      argmax(t) {
+        const arr = t.data;
+        const start = t.dims[2] * (t.dims[1] - 1);
+        let max = arr[start];
+        let maxidx = 0;
+        for (let i = 0; i < t.dims[2]; i++) {
+          const val = arr[i + start];
+          if (!isFinite(val)) {
+            throw new Error("found infinitive in logits");
+          }
+          if (val > max) {
+            max = arr[i + start];
+            maxidx = i;
+          }
+        }
+        return maxidx;
+      }
+      update_kv_cache(feed, outputs) {
+        for (const name in outputs) {
+          if (name.startsWith('present')) {
+            let newName = name.replace('present', 'past_key_values');
+            // free old gpu buffer
+            const t = feed[newName];
+            if (t.location === 'gpu-buffer') {
+              t.dispose();
+            }
+            feed[newName] = outputs[name];
+          }
+        }
+      }
+      abort() {
+        this.stop = true;
+      }
+      async generate(tokens, callback, options) {
+        const keep_cache = options.keep_cache;
+        const max_tokens = options.max_tokens || 256;
+        const feed = this.feed;
+        const input_ids = new ort.Tensor('int64', BigInt64Array.from(tokens.map(BigInt)), [1, tokens.length]);
+        feed['input_ids'] = input_ids;
+        this.stop = false;
+        if (keep_cache) {
+          this.output_tokens.push(...input_ids)
+        } else {
+          this.initilize_feed();
+          this.output_tokens = Array.from(feed['input_ids'].data);
+        }
+        let last_token = 0n;
+        let seqlen = this.output_tokens.length;
+        if (this.need_position_ids) {
+          if (keep_cache) {
+            feed['position_ids'] = new ort.Tensor('int64', BigInt64Array.from({ length: seqlen }, (_, i) => BigInt(i)), [1, input_ids.length]);
+          } else {
+            feed['position_ids'] = new ort.Tensor('int64', BigInt64Array.from({ length: seqlen }, (_, i) => BigInt(i)), [1, seqlen]);
+          }
+        }
+        while (last_token != this.eos && seqlen < max_tokens && !this.stop) {
+          seqlen = this.output_tokens.length;
+          feed['attention_mask'] = new ort.Tensor('int64', BigInt64Array.from({ length: seqlen }, () => 1n), [1, seqlen]);
+          let outputs;
+          if (this.trace) {
+            console.timeStamp("RUN-BEGIN");
+            outputs = await this.sess.run(feed);
+            console.timeStamp("RUN-END");
+          } else {
+            outputs = await this.sess.run(feed);
+          }
+          last_token = BigInt(this.argmax(outputs.logits));
+          this.output_tokens.push(last_token);
+          if (callback && !this.profiler) {
+            callback(this.output_tokens);
+          }
+          this.update_kv_cache(feed, outputs);
+          feed['input_ids'] = new ort.Tensor('int64', BigInt64Array.from([last_token]), [1, 1]);
+          if (this.need_position_ids) {
+            feed['position_ids'] = new ort.Tensor('int64', BigInt64Array.from([BigInt(seqlen)]), [1, 1]);
+          }
+        }
+        if (this.profiler) {
+          this.sess.endProfiling();
+        }
+        return this.output_tokens;
+      }
+    }
+    const config = getConfig();
+    env.localModelPath = 'models';
+    env.allowRemoteModels = config.local == 0;
+    env.allowLocalModels = config.local == 1;
+    ort.env.wasm.numThreads = config.threads;
+    ort.env.wasm.simd = true;
+    const cons_log = [];
+    if (config.profiler === 2) {
+      console.log = function (message) {
+        if (!message.includes('_fence_')) {
+          cons_log.push(message);
+        }
+      };
+    }
+    const tokenizer = await AutoTokenizer.from_pretrained(config.model.path);
+    function create_download_link(cons_log) {
+      if (cons_log.length > 0) {
+        let link = document.getElementById('download').childNodes[0];
+        if (link === undefined) {
+          link = document.createElement("a", "download-link");
+          link.download = "profiler.log";
+          link.innerText = "Download";
+          document.getElementById('download').appendChild(link);
+        }
+        const base64 = btoa(cons_log.join('\n'));
+        link.href = `data:application/json;base64,${base64}`;
+      }
+    }
+    async function fetchAndCache(url) {
+      try {
+        const cache = await caches.open("onnx");
+        let cachedResponse = await cache.match(url);
+        if (cachedResponse == undefined) {
+          await cache.add(url);
+          cachedResponse = await cache.match(url);
+          log(`${url} (network)`);
+        } else {
+          log(`${url} (cached)`);
+        }
+        const data = await cachedResponse.arrayBuffer();
+        return data;
+      } catch (error) {
+        log(`${url} (network)`);
+        return await fetch(url).then(response => response.arrayBuffer());
+      }
+    }
+    function token_to_text(tokenizer, tokens, startidx) {
+      const txt = tokenizer.decode(tokens.slice(startidx), { skip_special_tokens: true, });
+      return txt;
+    }
+    const llm = new LLM();
+    async function main() {
+      const model = config.model;
+      await llm.load(model, {
+        provider: config.provider,
+        verbose: config.verbose,
+        profiler: config.profiler,
+        trace: config.trace,
+        local: config.local,
+      });
+      document.getElementById('status').innerText = "";
+      const query = "Tell me about Constantinople.";
+      let prompt;
+      if (model.name.includes('phi2')) {
+        prompt = `User:${query}\nAssistant:`;
+      } else {
+        prompt = `"<|system|>\nYou are a friendly assistant.</s>\n<|user|>\n${query}</s>\n<|assistant|>\n`;
+      }
+      const { input_ids } = await tokenizer(prompt, { return_tensor: false, padding: true, truncation: true });
+      const start_timer = performance.now();
+      const output_tokens = await llm.generate(input_ids, (output_tokens) => {
+        document.getElementById('result').innerText = token_to_text(tokenizer, output_tokens, input_ids.length);
+      }, {});
+      const took = (performance.now() - start_timer) / 1000;
+      const txt = token_to_text(tokenizer, output_tokens, input_ids.length);
+      const seqlen = output_tokens.length;
+      document.getElementById('result').innerText = txt;
+      const perf = `${seqlen} tokens in ${took.toFixed(1)}sec, ${(seqlen / took).toFixed(2)} tokens/sec`;
+      console.log(perf + " @@1");
+      document.getElementById('perf').innerText = perf;
+      if (config.csv) {
+        log(`${model.name},${took.toFixed(2)},${(seqlen / took).toFixed(3)},${seqlen},@@2`);
+      }
+    }
+    try {
+      await main();
+    } catch (error) {
+      console.error(error);
+      document.getElementById('result').innerText = error.message;
+    } finally {
+      create_download_link(cons_log);
+    }
+  </script>
+  <div id="status"></div>
+  <br />
+  <div id="result"></div>
+  <br />
+  <div id="perf"></div>
+  <br />
+  <div id="download"></div>
+  <br />
+</body>
+</html>

demo/ort-phi2/models/phi2-int4/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "microsoft/phi-2",
+  "architectures": [
+    "PhiForCausalLM"
+  ],
+  "auto_map": {
+    "AutoConfig": "configuration_phi.PhiConfig",
+    "AutoModelForCausalLM": "modeling_phi.PhiForCausalLM"
+  },
+  "attention_dropout": 0.0,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.0,
+  "eos_token_id": 50256,
+  "hidden_act": "gelu_new",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 10240,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 2048,
+  "model_type": "phi",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 32,
+  "partial_rotary_factor": 0.4,
+  "qk_layernorm": false,
+  "resid_pdrop": 0.1,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float16",
+  "transformers_version": "4.37.0",
+  "use_cache": true,
+  "vocab_size": 51200
+}

demo/ort-phi2/models/phi2-int4/phi2-int4.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d4321d1b34279940c9ba43aa984f6090ea5656380f415b7c87e71b6e3cbf977
+size 1770018731

demo/ort-phi2/models/phi2-int4/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

demo/ort-phi2/models/phi2-int4/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,323 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "50256": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50257": {
+      "content": "                               ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50258": {
+      "content": "                              ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50259": {
+      "content": "                             ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50260": {
+      "content": "                            ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50261": {
+      "content": "                           ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50262": {
+      "content": "                          ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50263": {
+      "content": "                         ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50264": {
+      "content": "                        ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50265": {
+      "content": "                       ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50266": {
+      "content": "                      ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50267": {
+      "content": "                     ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50268": {
+      "content": "                    ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50269": {
+      "content": "                   ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50270": {
+      "content": "                  ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50271": {
+      "content": "                 ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50272": {
+      "content": "                ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50273": {
+      "content": "               ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50274": {
+      "content": "              ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50275": {
+      "content": "             ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50276": {
+      "content": "            ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50277": {
+      "content": "           ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50278": {
+      "content": "          ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50279": {
+      "content": "         ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50280": {
+      "content": "        ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50281": {
+      "content": "       ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50282": {
+      "content": "      ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50283": {
+      "content": "     ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50284": {
+      "content": "    ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50285": {
+      "content": "   ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50286": {
+      "content": "  ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50287": {
+      "content": "\t\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50288": {
+      "content": "\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50289": {
+      "content": "\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50290": {
+      "content": "\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50291": {
+      "content": "\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50292": {
+      "content": "\t\t\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50293": {
+      "content": "\t\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50294": {
+      "content": "\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "model_max_length": 2048,
+  "tokenizer_class": "CodeGenTokenizer",
+  "unk_token": "<|endoftext|>"
+}

main.js CHANGED Viewed

@@ -53,51 +53,45 @@ function createElem(tag, attrs = {}, children = []) {
 const pageCategories = [
   {
-    title: `Computer Vision`,
-    description: `Computer Vision`,
     demos: {
       sam: {
         name: 'Segment Anything',
         description: `Segment Anything from https://github.com/guschmue/ort-webgpu/tree/master/segment-anything`,
-        filename: "sam",
       },
       sdturbo: {
         name: 'Stable Diffusion Turbo',
         description: `Stable Diffusion Turbo from https://github.com/guschmue/ort-webgpu/tree/master/sd-turbo`,
-        filename: "sdturbo",
       },
       yolo: {
         name: 'Yolo',
         description: `Yolo V9 from https://github.com/guschmue/ort-webgpu/tree/master/yolov9`,
-        filename: "yolo",
       },
     },
   },
   {
-    title: `MediaPipe and TFLite`,
-    description: `MediaPipe and TFLite`,
     demos: {
       gemma: {
         name: 'Gemma',
-        description: `Gemma with MediaPipe and TFLite from https://github.com/googlesamples/mediapipe/tree/main/examples/llm_inference/js, <a href=https://developers.googleblog.com/2024/03/running-large-language-models-on-device-with-mediapipe-andtensorflow-lite.html>more info.</a>`,
-        filename: "llm-inference",
-      },
-    },
-  },
-  {
-    title: `Natural Language Processing`,
-    description: `Natural Language Processing`,
-    demos: {
-      tinyllama: {
-        name: 'Tiny Llama',
-        description: `Tiny Llama from https://github.com/guschmue/ort-webgpu/tree/master/chat`,
-        filename: "tinyllama",
       },
     },
   },
   {
-    title: 'Transformers.js WebGPU',
-    description: 'Transformers.js WebGPU',
     demos: {
       benchmark: {
         name: 'Benchmark',
@@ -126,8 +120,8 @@ const pageCategories = [
     },
   },
   {
-    title: 'TVM WebGPU',
-    description: 'TVM WebGPU',
     demos: {
       sd: {
         name: 'Web Stable Diffusion',

 const pageCategories = [
   {
+    title: `ONNX Runtime`,
+    description: `ONNX Runtime`,
     demos: {
       sam: {
         name: 'Segment Anything',
         description: `Segment Anything from https://github.com/guschmue/ort-webgpu/tree/master/segment-anything`,
+        filename: "ort-sam",
       },
       sdturbo: {
         name: 'Stable Diffusion Turbo',
         description: `Stable Diffusion Turbo from https://github.com/guschmue/ort-webgpu/tree/master/sd-turbo`,
+        filename: "ort-sd-turbo",
+      },
+      tinyllama: {
+        name: 'Tiny Llama',
+        description: `Tiny Llama from https://github.com/guschmue/ort-webgpu/tree/master/chat`,
+        filename: "ort-tiny-llama",
       },
       yolo: {
         name: 'Yolo',
         description: `Yolo V9 from https://github.com/guschmue/ort-webgpu/tree/master/yolov9`,
+        filename: "ort-yolo",
       },
     },
   },
   {
+    title: `TFLite`,
+    description: `TFLite`,
     demos: {
       gemma: {
         name: 'Gemma',
+        description: `Gemma with TFLite and MediaPipe from https://github.com/googlesamples/mediapipe/tree/main/examples/llm_inference/js, <a href=https://developers.googleblog.com/2024/03/running-large-language-models-on-device-with-mediapipe-andtensorflow-lite.html>more info.</a>`,
+        filename: "tflite-gemma",
       },
     },
   },
   {
+    title: 'Transformers.js',
+    description: 'Transformers.js',
     demos: {
       benchmark: {
         name: 'Benchmark',
     },
   },
   {
+    title: 'TVM',
+    description: 'TVM',
     demos: {
       sd: {
         name: 'Web Stable Diffusion',

transformers/transformers.js ADDED Viewed

The diff for this file is too large to render. See raw diff