Spaces:
Running
Running
Yang Gu
commited on
Commit
•
e4aee93
1
Parent(s):
1322fe0
Add ort-phi2
Browse files
demo/ort-phi2/index.html
ADDED
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html>
|
3 |
+
|
4 |
+
<head>
|
5 |
+
<title>Example</title>
|
6 |
+
</head>
|
7 |
+
|
8 |
+
<body>
|
9 |
+
<!-- <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web@dev/dist/ort.webgpu.min.js"> </script> -->
|
10 |
+
<script src="https://wp-27.sh.intel.com/workspace/project/onnxruntime/js/web/dist/ort.webgpu.min.js"> </script>
|
11 |
+
|
12 |
+
<script type="module">
|
13 |
+
import { AutoTokenizer, env } from '../../transformers/transformers.js';
|
14 |
+
|
15 |
+
function log(i) { console.log(i); document.getElementById('status').innerText += `\n${i}`; }
|
16 |
+
|
17 |
+
const MODELS = {
|
18 |
+
"tinyllama": { name: "tinyllama", path: "schmuell/TinyLlama-1.1B-Chat-v1.0-int4" },
|
19 |
+
"tinyllama_fp16": { name: "tinyllama-fp16", path: "schmuell/TinyLlama-1.1B-Chat-v1.0-fp16", externaldata: true },
|
20 |
+
"phi2": { name: "phi2", path: "phi2-int4" },
|
21 |
+
"phi2-mb": { name: "phi2-mb", path: "schmuell/phi2-mb", externaldata: true },
|
22 |
+
"stablelm": { name: "stablelm", path: "schmuell/stablelm-2-zephyr-1_6b-int4" },
|
23 |
+
}
|
24 |
+
|
25 |
+
function getConfig() {
|
26 |
+
const query = window.location.search.substring(1);
|
27 |
+
var config = {
|
28 |
+
model: "phi2",
|
29 |
+
provider: "webgpu",
|
30 |
+
profiler: 0,
|
31 |
+
verbose: 0,
|
32 |
+
threads: 1,
|
33 |
+
trace: 0,
|
34 |
+
csv: 0,
|
35 |
+
max_tokens: 256,
|
36 |
+
local: 1,
|
37 |
+
}
|
38 |
+
let vars = query.split("&");
|
39 |
+
for (var i = 0; i < vars.length; i++) {
|
40 |
+
let pair = vars[i].split("=");
|
41 |
+
if (pair[0] in config) {
|
42 |
+
const key = pair[0];
|
43 |
+
const value = decodeURIComponent(pair[1]);
|
44 |
+
if (typeof config[key] == "number") {
|
45 |
+
config[key] = parseInt(value);
|
46 |
+
}
|
47 |
+
else {
|
48 |
+
config[key] = value;
|
49 |
+
}
|
50 |
+
} else if (pair[0].length > 0) {
|
51 |
+
throw new Error("unknown argument: " + pair[0]);
|
52 |
+
}
|
53 |
+
}
|
54 |
+
if (MODELS[config.model] !== undefined) {
|
55 |
+
config.model = MODELS[config.model];
|
56 |
+
}
|
57 |
+
return config;
|
58 |
+
}
|
59 |
+
|
60 |
+
class LLM {
|
61 |
+
sess = undefined;
|
62 |
+
profiler = false;
|
63 |
+
trace = false;
|
64 |
+
feed = {};
|
65 |
+
output_tokens = [];
|
66 |
+
eos = 2;
|
67 |
+
need_position_ids = true;
|
68 |
+
stop = false;
|
69 |
+
kv_dims = [];
|
70 |
+
dtype = "float16";
|
71 |
+
|
72 |
+
constructor() {
|
73 |
+
}
|
74 |
+
|
75 |
+
async load(model, options) {
|
76 |
+
const provider = options.provider || "webgpu";
|
77 |
+
const verbose = options.verbose;
|
78 |
+
const local = options.local;
|
79 |
+
this.profiler = options.profiler;
|
80 |
+
this.trace = options.trace;
|
81 |
+
|
82 |
+
const model_path = (local) ? "models/" + model.path : "https://huggingface.co/" + model.path + "/resolve/main";
|
83 |
+
|
84 |
+
log(`loading... ${model.name}, ${provider}`);
|
85 |
+
const json_bytes = await fetchAndCache(model_path + "/config.json");
|
86 |
+
let textDecoder = new TextDecoder();
|
87 |
+
const model_config = JSON.parse(textDecoder.decode(json_bytes));
|
88 |
+
|
89 |
+
const model_bytes = await fetchAndCache(model_path + "/phi2-int4.onnx");
|
90 |
+
const externaldata = (model.externaldata) ? await fetchAndCache(model_path + '/onnx/decoder_model_merged.onnx.data') : false;
|
91 |
+
let modelSize = model_bytes.byteLength;
|
92 |
+
if (externaldata) {
|
93 |
+
modelSize += externaldata.byteLength;
|
94 |
+
}
|
95 |
+
|
96 |
+
log(`model size ${Math.round(modelSize / 1024 / 1024)} MB`);
|
97 |
+
|
98 |
+
const opt = {
|
99 |
+
executionProviders: [provider],
|
100 |
+
preferredOutputLocation: {},
|
101 |
+
};
|
102 |
+
|
103 |
+
switch (provider) {
|
104 |
+
case "webgpu":
|
105 |
+
if (!("gpu" in navigator)) {
|
106 |
+
throw new Error("webgpu is NOT supported");
|
107 |
+
}
|
108 |
+
for (let i = 0; i < model_config.num_hidden_layers; ++i) {
|
109 |
+
opt.preferredOutputLocation[`present.${i}.key`] = 'gpu-buffer';
|
110 |
+
opt.preferredOutputLocation[`present.${i}.value`] = 'gpu-buffer';
|
111 |
+
}
|
112 |
+
break;
|
113 |
+
case "webnn":
|
114 |
+
if (!("ml" in navigator)) {
|
115 |
+
throw new Error("webnn is NOT supported");
|
116 |
+
}
|
117 |
+
break;
|
118 |
+
}
|
119 |
+
|
120 |
+
if (externaldata !== undefined) {
|
121 |
+
opt.externalData = [
|
122 |
+
{
|
123 |
+
data: externaldata,
|
124 |
+
path: 'decoder_model_merged.onnx.data'
|
125 |
+
},
|
126 |
+
]
|
127 |
+
}
|
128 |
+
if (verbose) {
|
129 |
+
opt.logSeverityLevel = 0;
|
130 |
+
opt.logVerbosityLevel = 0;
|
131 |
+
ort.env.logLevel = "verbose";
|
132 |
+
ort.env.debug = true;
|
133 |
+
}
|
134 |
+
|
135 |
+
ort.env.webgpu.profiling = {};
|
136 |
+
if (this.profiler) {
|
137 |
+
opt.enableProfiling = true;
|
138 |
+
ort.env.webgpu.profilingMode = 'default';
|
139 |
+
ort.env.webgpu.profiling.mode = 'default';
|
140 |
+
}
|
141 |
+
|
142 |
+
this.sess = await ort.InferenceSession.create(model_bytes, opt);
|
143 |
+
|
144 |
+
if (this.trace) {
|
145 |
+
ort.env.trace = true;
|
146 |
+
ort.env.webgpu.profiling.ondata = (version, inputsMetadata, outputsMetadata, kernelId, kernelType,
|
147 |
+
kernelName, programName, startTime, endTime) => { };
|
148 |
+
}
|
149 |
+
|
150 |
+
this.eos = model_config.eos_token_id;
|
151 |
+
this.kv_dims = [1, model_config.num_key_value_heads, 0, model_config.hidden_size / model_config.num_attention_heads];
|
152 |
+
this.dtype = config.model.dtype || "float16";
|
153 |
+
this.num_layers = model_config.num_hidden_layers;
|
154 |
+
this.initilize_feed();
|
155 |
+
}
|
156 |
+
|
157 |
+
initilize_feed() {
|
158 |
+
this.feed = {};
|
159 |
+
const empty = (this.dtype === "float16") ? new Uint16Array() : [];
|
160 |
+
for (let i = 0; i < this.num_layers; ++i) {
|
161 |
+
this.feed[`past_key_values.${i}.key`] = new ort.Tensor(this.dtype, empty, this.kv_dims)
|
162 |
+
this.feed[`past_key_values.${i}.value`] = new ort.Tensor(this.dtype, empty, this.kv_dims)
|
163 |
+
}
|
164 |
+
this.output_tokens = [];
|
165 |
+
}
|
166 |
+
|
167 |
+
|
168 |
+
argmax(t) {
|
169 |
+
const arr = t.data;
|
170 |
+
const start = t.dims[2] * (t.dims[1] - 1);
|
171 |
+
let max = arr[start];
|
172 |
+
let maxidx = 0;
|
173 |
+
|
174 |
+
for (let i = 0; i < t.dims[2]; i++) {
|
175 |
+
const val = arr[i + start];
|
176 |
+
if (!isFinite(val)) {
|
177 |
+
throw new Error("found infinitive in logits");
|
178 |
+
}
|
179 |
+
if (val > max) {
|
180 |
+
max = arr[i + start];
|
181 |
+
maxidx = i;
|
182 |
+
}
|
183 |
+
}
|
184 |
+
return maxidx;
|
185 |
+
}
|
186 |
+
|
187 |
+
update_kv_cache(feed, outputs) {
|
188 |
+
for (const name in outputs) {
|
189 |
+
if (name.startsWith('present')) {
|
190 |
+
let newName = name.replace('present', 'past_key_values');
|
191 |
+
// free old gpu buffer
|
192 |
+
const t = feed[newName];
|
193 |
+
if (t.location === 'gpu-buffer') {
|
194 |
+
t.dispose();
|
195 |
+
}
|
196 |
+
feed[newName] = outputs[name];
|
197 |
+
}
|
198 |
+
}
|
199 |
+
}
|
200 |
+
|
201 |
+
abort() {
|
202 |
+
this.stop = true;
|
203 |
+
}
|
204 |
+
|
205 |
+
async generate(tokens, callback, options) {
|
206 |
+
const keep_cache = options.keep_cache;
|
207 |
+
const max_tokens = options.max_tokens || 256;
|
208 |
+
const feed = this.feed;
|
209 |
+
const input_ids = new ort.Tensor('int64', BigInt64Array.from(tokens.map(BigInt)), [1, tokens.length]);
|
210 |
+
feed['input_ids'] = input_ids;
|
211 |
+
this.stop = false;
|
212 |
+
|
213 |
+
if (keep_cache) {
|
214 |
+
this.output_tokens.push(...input_ids)
|
215 |
+
} else {
|
216 |
+
this.initilize_feed();
|
217 |
+
this.output_tokens = Array.from(feed['input_ids'].data);
|
218 |
+
}
|
219 |
+
|
220 |
+
let last_token = 0n;
|
221 |
+
let seqlen = this.output_tokens.length;
|
222 |
+
if (this.need_position_ids) {
|
223 |
+
if (keep_cache) {
|
224 |
+
feed['position_ids'] = new ort.Tensor('int64', BigInt64Array.from({ length: seqlen }, (_, i) => BigInt(i)), [1, input_ids.length]);
|
225 |
+
} else {
|
226 |
+
feed['position_ids'] = new ort.Tensor('int64', BigInt64Array.from({ length: seqlen }, (_, i) => BigInt(i)), [1, seqlen]);
|
227 |
+
}
|
228 |
+
}
|
229 |
+
|
230 |
+
while (last_token != this.eos && seqlen < max_tokens && !this.stop) {
|
231 |
+
seqlen = this.output_tokens.length;
|
232 |
+
feed['attention_mask'] = new ort.Tensor('int64', BigInt64Array.from({ length: seqlen }, () => 1n), [1, seqlen]);
|
233 |
+
let outputs;
|
234 |
+
if (this.trace) {
|
235 |
+
console.timeStamp("RUN-BEGIN");
|
236 |
+
outputs = await this.sess.run(feed);
|
237 |
+
console.timeStamp("RUN-END");
|
238 |
+
} else {
|
239 |
+
outputs = await this.sess.run(feed);
|
240 |
+
}
|
241 |
+
last_token = BigInt(this.argmax(outputs.logits));
|
242 |
+
this.output_tokens.push(last_token);
|
243 |
+
if (callback && !this.profiler) {
|
244 |
+
callback(this.output_tokens);
|
245 |
+
}
|
246 |
+
this.update_kv_cache(feed, outputs);
|
247 |
+
feed['input_ids'] = new ort.Tensor('int64', BigInt64Array.from([last_token]), [1, 1]);
|
248 |
+
if (this.need_position_ids) {
|
249 |
+
feed['position_ids'] = new ort.Tensor('int64', BigInt64Array.from([BigInt(seqlen)]), [1, 1]);
|
250 |
+
}
|
251 |
+
}
|
252 |
+
if (this.profiler) {
|
253 |
+
this.sess.endProfiling();
|
254 |
+
}
|
255 |
+
return this.output_tokens;
|
256 |
+
}
|
257 |
+
}
|
258 |
+
|
259 |
+
const config = getConfig();
|
260 |
+
env.localModelPath = 'models';
|
261 |
+
env.allowRemoteModels = config.local == 0;
|
262 |
+
env.allowLocalModels = config.local == 1;
|
263 |
+
ort.env.wasm.numThreads = config.threads;
|
264 |
+
ort.env.wasm.simd = true;
|
265 |
+
|
266 |
+
const cons_log = [];
|
267 |
+
|
268 |
+
if (config.profiler === 2) {
|
269 |
+
console.log = function (message) {
|
270 |
+
if (!message.includes('_fence_')) {
|
271 |
+
cons_log.push(message);
|
272 |
+
}
|
273 |
+
};
|
274 |
+
}
|
275 |
+
|
276 |
+
const tokenizer = await AutoTokenizer.from_pretrained(config.model.path);
|
277 |
+
|
278 |
+
function create_download_link(cons_log) {
|
279 |
+
if (cons_log.length > 0) {
|
280 |
+
let link = document.getElementById('download').childNodes[0];
|
281 |
+
if (link === undefined) {
|
282 |
+
link = document.createElement("a", "download-link");
|
283 |
+
link.download = "profiler.log";
|
284 |
+
link.innerText = "Download";
|
285 |
+
document.getElementById('download').appendChild(link);
|
286 |
+
}
|
287 |
+
const base64 = btoa(cons_log.join('\n'));
|
288 |
+
link.href = `data:application/json;base64,${base64}`;
|
289 |
+
}
|
290 |
+
}
|
291 |
+
|
292 |
+
async function fetchAndCache(url) {
|
293 |
+
try {
|
294 |
+
const cache = await caches.open("onnx");
|
295 |
+
let cachedResponse = await cache.match(url);
|
296 |
+
if (cachedResponse == undefined) {
|
297 |
+
await cache.add(url);
|
298 |
+
cachedResponse = await cache.match(url);
|
299 |
+
log(`${url} (network)`);
|
300 |
+
} else {
|
301 |
+
log(`${url} (cached)`);
|
302 |
+
}
|
303 |
+
const data = await cachedResponse.arrayBuffer();
|
304 |
+
return data;
|
305 |
+
} catch (error) {
|
306 |
+
log(`${url} (network)`);
|
307 |
+
return await fetch(url).then(response => response.arrayBuffer());
|
308 |
+
}
|
309 |
+
}
|
310 |
+
|
311 |
+
function token_to_text(tokenizer, tokens, startidx) {
|
312 |
+
const txt = tokenizer.decode(tokens.slice(startidx), { skip_special_tokens: true, });
|
313 |
+
return txt;
|
314 |
+
}
|
315 |
+
|
316 |
+
const llm = new LLM();
|
317 |
+
|
318 |
+
async function main() {
|
319 |
+
|
320 |
+
const model = config.model;
|
321 |
+
|
322 |
+
await llm.load(model, {
|
323 |
+
provider: config.provider,
|
324 |
+
verbose: config.verbose,
|
325 |
+
profiler: config.profiler,
|
326 |
+
trace: config.trace,
|
327 |
+
local: config.local,
|
328 |
+
});
|
329 |
+
|
330 |
+
|
331 |
+
document.getElementById('status').innerText = "";
|
332 |
+
const query = "Tell me about Constantinople.";
|
333 |
+
let prompt;
|
334 |
+
|
335 |
+
if (model.name.includes('phi2')) {
|
336 |
+
prompt = `User:${query}\nAssistant:`;
|
337 |
+
} else {
|
338 |
+
prompt = `"<|system|>\nYou are a friendly assistant.</s>\n<|user|>\n${query}</s>\n<|assistant|>\n`;
|
339 |
+
}
|
340 |
+
const { input_ids } = await tokenizer(prompt, { return_tensor: false, padding: true, truncation: true });
|
341 |
+
|
342 |
+
const start_timer = performance.now();
|
343 |
+
const output_tokens = await llm.generate(input_ids, (output_tokens) => {
|
344 |
+
document.getElementById('result').innerText = token_to_text(tokenizer, output_tokens, input_ids.length);
|
345 |
+
}, {});
|
346 |
+
const took = (performance.now() - start_timer) / 1000;
|
347 |
+
const txt = token_to_text(tokenizer, output_tokens, input_ids.length);
|
348 |
+
const seqlen = output_tokens.length;
|
349 |
+
document.getElementById('result').innerText = txt;
|
350 |
+
const perf = `${seqlen} tokens in ${took.toFixed(1)}sec, ${(seqlen / took).toFixed(2)} tokens/sec`;
|
351 |
+
console.log(perf + " @@1");
|
352 |
+
document.getElementById('perf').innerText = perf;
|
353 |
+
if (config.csv) {
|
354 |
+
log(`${model.name},${took.toFixed(2)},${(seqlen / took).toFixed(3)},${seqlen},@@2`);
|
355 |
+
}
|
356 |
+
}
|
357 |
+
try {
|
358 |
+
await main();
|
359 |
+
} catch (error) {
|
360 |
+
console.error(error);
|
361 |
+
document.getElementById('result').innerText = error.message;
|
362 |
+
} finally {
|
363 |
+
create_download_link(cons_log);
|
364 |
+
}
|
365 |
+
</script>
|
366 |
+
|
367 |
+
<div id="status"></div>
|
368 |
+
<br />
|
369 |
+
<div id="result"></div>
|
370 |
+
<br />
|
371 |
+
<div id="perf"></div>
|
372 |
+
<br />
|
373 |
+
<div id="download"></div>
|
374 |
+
<br />
|
375 |
+
|
376 |
+
</body>
|
377 |
+
|
378 |
+
</html>
|
demo/ort-phi2/models/phi2-int4/config.json
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "microsoft/phi-2",
|
3 |
+
"architectures": [
|
4 |
+
"PhiForCausalLM"
|
5 |
+
],
|
6 |
+
"auto_map": {
|
7 |
+
"AutoConfig": "configuration_phi.PhiConfig",
|
8 |
+
"AutoModelForCausalLM": "modeling_phi.PhiForCausalLM"
|
9 |
+
},
|
10 |
+
"attention_dropout": 0.0,
|
11 |
+
"bos_token_id": 50256,
|
12 |
+
"embd_pdrop": 0.0,
|
13 |
+
"eos_token_id": 50256,
|
14 |
+
"hidden_act": "gelu_new",
|
15 |
+
"hidden_size": 2560,
|
16 |
+
"initializer_range": 0.02,
|
17 |
+
"intermediate_size": 10240,
|
18 |
+
"layer_norm_eps": 1e-05,
|
19 |
+
"max_position_embeddings": 2048,
|
20 |
+
"model_type": "phi",
|
21 |
+
"num_attention_heads": 32,
|
22 |
+
"num_hidden_layers": 32,
|
23 |
+
"num_key_value_heads": 32,
|
24 |
+
"partial_rotary_factor": 0.4,
|
25 |
+
"qk_layernorm": false,
|
26 |
+
"resid_pdrop": 0.1,
|
27 |
+
"rope_scaling": null,
|
28 |
+
"rope_theta": 10000.0,
|
29 |
+
"tie_word_embeddings": false,
|
30 |
+
"torch_dtype": "float16",
|
31 |
+
"transformers_version": "4.37.0",
|
32 |
+
"use_cache": true,
|
33 |
+
"vocab_size": 51200
|
34 |
+
}
|
demo/ort-phi2/models/phi2-int4/phi2-int4.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d4321d1b34279940c9ba43aa984f6090ea5656380f415b7c87e71b6e3cbf977
|
3 |
+
size 1770018731
|
demo/ort-phi2/models/phi2-int4/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
demo/ort-phi2/models/phi2-int4/tokenizer_config.json
ADDED
@@ -0,0 +1,323 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"added_tokens_decoder": {
|
4 |
+
"50256": {
|
5 |
+
"content": "<|endoftext|>",
|
6 |
+
"lstrip": false,
|
7 |
+
"normalized": false,
|
8 |
+
"rstrip": false,
|
9 |
+
"single_word": false,
|
10 |
+
"special": true
|
11 |
+
},
|
12 |
+
"50257": {
|
13 |
+
"content": " ",
|
14 |
+
"lstrip": false,
|
15 |
+
"normalized": true,
|
16 |
+
"rstrip": false,
|
17 |
+
"single_word": false,
|
18 |
+
"special": false
|
19 |
+
},
|
20 |
+
"50258": {
|
21 |
+
"content": " ",
|
22 |
+
"lstrip": false,
|
23 |
+
"normalized": true,
|
24 |
+
"rstrip": false,
|
25 |
+
"single_word": false,
|
26 |
+
"special": false
|
27 |
+
},
|
28 |
+
"50259": {
|
29 |
+
"content": " ",
|
30 |
+
"lstrip": false,
|
31 |
+
"normalized": true,
|
32 |
+
"rstrip": false,
|
33 |
+
"single_word": false,
|
34 |
+
"special": false
|
35 |
+
},
|
36 |
+
"50260": {
|
37 |
+
"content": " ",
|
38 |
+
"lstrip": false,
|
39 |
+
"normalized": true,
|
40 |
+
"rstrip": false,
|
41 |
+
"single_word": false,
|
42 |
+
"special": false
|
43 |
+
},
|
44 |
+
"50261": {
|
45 |
+
"content": " ",
|
46 |
+
"lstrip": false,
|
47 |
+
"normalized": true,
|
48 |
+
"rstrip": false,
|
49 |
+
"single_word": false,
|
50 |
+
"special": false
|
51 |
+
},
|
52 |
+
"50262": {
|
53 |
+
"content": " ",
|
54 |
+
"lstrip": false,
|
55 |
+
"normalized": true,
|
56 |
+
"rstrip": false,
|
57 |
+
"single_word": false,
|
58 |
+
"special": false
|
59 |
+
},
|
60 |
+
"50263": {
|
61 |
+
"content": " ",
|
62 |
+
"lstrip": false,
|
63 |
+
"normalized": true,
|
64 |
+
"rstrip": false,
|
65 |
+
"single_word": false,
|
66 |
+
"special": false
|
67 |
+
},
|
68 |
+
"50264": {
|
69 |
+
"content": " ",
|
70 |
+
"lstrip": false,
|
71 |
+
"normalized": true,
|
72 |
+
"rstrip": false,
|
73 |
+
"single_word": false,
|
74 |
+
"special": false
|
75 |
+
},
|
76 |
+
"50265": {
|
77 |
+
"content": " ",
|
78 |
+
"lstrip": false,
|
79 |
+
"normalized": true,
|
80 |
+
"rstrip": false,
|
81 |
+
"single_word": false,
|
82 |
+
"special": false
|
83 |
+
},
|
84 |
+
"50266": {
|
85 |
+
"content": " ",
|
86 |
+
"lstrip": false,
|
87 |
+
"normalized": true,
|
88 |
+
"rstrip": false,
|
89 |
+
"single_word": false,
|
90 |
+
"special": false
|
91 |
+
},
|
92 |
+
"50267": {
|
93 |
+
"content": " ",
|
94 |
+
"lstrip": false,
|
95 |
+
"normalized": true,
|
96 |
+
"rstrip": false,
|
97 |
+
"single_word": false,
|
98 |
+
"special": false
|
99 |
+
},
|
100 |
+
"50268": {
|
101 |
+
"content": " ",
|
102 |
+
"lstrip": false,
|
103 |
+
"normalized": true,
|
104 |
+
"rstrip": false,
|
105 |
+
"single_word": false,
|
106 |
+
"special": false
|
107 |
+
},
|
108 |
+
"50269": {
|
109 |
+
"content": " ",
|
110 |
+
"lstrip": false,
|
111 |
+
"normalized": true,
|
112 |
+
"rstrip": false,
|
113 |
+
"single_word": false,
|
114 |
+
"special": false
|
115 |
+
},
|
116 |
+
"50270": {
|
117 |
+
"content": " ",
|
118 |
+
"lstrip": false,
|
119 |
+
"normalized": true,
|
120 |
+
"rstrip": false,
|
121 |
+
"single_word": false,
|
122 |
+
"special": false
|
123 |
+
},
|
124 |
+
"50271": {
|
125 |
+
"content": " ",
|
126 |
+
"lstrip": false,
|
127 |
+
"normalized": true,
|
128 |
+
"rstrip": false,
|
129 |
+
"single_word": false,
|
130 |
+
"special": false
|
131 |
+
},
|
132 |
+
"50272": {
|
133 |
+
"content": " ",
|
134 |
+
"lstrip": false,
|
135 |
+
"normalized": true,
|
136 |
+
"rstrip": false,
|
137 |
+
"single_word": false,
|
138 |
+
"special": false
|
139 |
+
},
|
140 |
+
"50273": {
|
141 |
+
"content": " ",
|
142 |
+
"lstrip": false,
|
143 |
+
"normalized": true,
|
144 |
+
"rstrip": false,
|
145 |
+
"single_word": false,
|
146 |
+
"special": false
|
147 |
+
},
|
148 |
+
"50274": {
|
149 |
+
"content": " ",
|
150 |
+
"lstrip": false,
|
151 |
+
"normalized": true,
|
152 |
+
"rstrip": false,
|
153 |
+
"single_word": false,
|
154 |
+
"special": false
|
155 |
+
},
|
156 |
+
"50275": {
|
157 |
+
"content": " ",
|
158 |
+
"lstrip": false,
|
159 |
+
"normalized": true,
|
160 |
+
"rstrip": false,
|
161 |
+
"single_word": false,
|
162 |
+
"special": false
|
163 |
+
},
|
164 |
+
"50276": {
|
165 |
+
"content": " ",
|
166 |
+
"lstrip": false,
|
167 |
+
"normalized": true,
|
168 |
+
"rstrip": false,
|
169 |
+
"single_word": false,
|
170 |
+
"special": false
|
171 |
+
},
|
172 |
+
"50277": {
|
173 |
+
"content": " ",
|
174 |
+
"lstrip": false,
|
175 |
+
"normalized": true,
|
176 |
+
"rstrip": false,
|
177 |
+
"single_word": false,
|
178 |
+
"special": false
|
179 |
+
},
|
180 |
+
"50278": {
|
181 |
+
"content": " ",
|
182 |
+
"lstrip": false,
|
183 |
+
"normalized": true,
|
184 |
+
"rstrip": false,
|
185 |
+
"single_word": false,
|
186 |
+
"special": false
|
187 |
+
},
|
188 |
+
"50279": {
|
189 |
+
"content": " ",
|
190 |
+
"lstrip": false,
|
191 |
+
"normalized": true,
|
192 |
+
"rstrip": false,
|
193 |
+
"single_word": false,
|
194 |
+
"special": false
|
195 |
+
},
|
196 |
+
"50280": {
|
197 |
+
"content": " ",
|
198 |
+
"lstrip": false,
|
199 |
+
"normalized": true,
|
200 |
+
"rstrip": false,
|
201 |
+
"single_word": false,
|
202 |
+
"special": false
|
203 |
+
},
|
204 |
+
"50281": {
|
205 |
+
"content": " ",
|
206 |
+
"lstrip": false,
|
207 |
+
"normalized": true,
|
208 |
+
"rstrip": false,
|
209 |
+
"single_word": false,
|
210 |
+
"special": false
|
211 |
+
},
|
212 |
+
"50282": {
|
213 |
+
"content": " ",
|
214 |
+
"lstrip": false,
|
215 |
+
"normalized": true,
|
216 |
+
"rstrip": false,
|
217 |
+
"single_word": false,
|
218 |
+
"special": false
|
219 |
+
},
|
220 |
+
"50283": {
|
221 |
+
"content": " ",
|
222 |
+
"lstrip": false,
|
223 |
+
"normalized": true,
|
224 |
+
"rstrip": false,
|
225 |
+
"single_word": false,
|
226 |
+
"special": false
|
227 |
+
},
|
228 |
+
"50284": {
|
229 |
+
"content": " ",
|
230 |
+
"lstrip": false,
|
231 |
+
"normalized": true,
|
232 |
+
"rstrip": false,
|
233 |
+
"single_word": false,
|
234 |
+
"special": false
|
235 |
+
},
|
236 |
+
"50285": {
|
237 |
+
"content": " ",
|
238 |
+
"lstrip": false,
|
239 |
+
"normalized": true,
|
240 |
+
"rstrip": false,
|
241 |
+
"single_word": false,
|
242 |
+
"special": false
|
243 |
+
},
|
244 |
+
"50286": {
|
245 |
+
"content": " ",
|
246 |
+
"lstrip": false,
|
247 |
+
"normalized": true,
|
248 |
+
"rstrip": false,
|
249 |
+
"single_word": false,
|
250 |
+
"special": false
|
251 |
+
},
|
252 |
+
"50287": {
|
253 |
+
"content": "\t\t\t\t\t\t\t\t\t",
|
254 |
+
"lstrip": false,
|
255 |
+
"normalized": true,
|
256 |
+
"rstrip": false,
|
257 |
+
"single_word": false,
|
258 |
+
"special": false
|
259 |
+
},
|
260 |
+
"50288": {
|
261 |
+
"content": "\t\t\t\t\t\t\t\t",
|
262 |
+
"lstrip": false,
|
263 |
+
"normalized": true,
|
264 |
+
"rstrip": false,
|
265 |
+
"single_word": false,
|
266 |
+
"special": false
|
267 |
+
},
|
268 |
+
"50289": {
|
269 |
+
"content": "\t\t\t\t\t\t\t",
|
270 |
+
"lstrip": false,
|
271 |
+
"normalized": true,
|
272 |
+
"rstrip": false,
|
273 |
+
"single_word": false,
|
274 |
+
"special": false
|
275 |
+
},
|
276 |
+
"50290": {
|
277 |
+
"content": "\t\t\t\t\t\t",
|
278 |
+
"lstrip": false,
|
279 |
+
"normalized": true,
|
280 |
+
"rstrip": false,
|
281 |
+
"single_word": false,
|
282 |
+
"special": false
|
283 |
+
},
|
284 |
+
"50291": {
|
285 |
+
"content": "\t\t\t\t\t",
|
286 |
+
"lstrip": false,
|
287 |
+
"normalized": true,
|
288 |
+
"rstrip": false,
|
289 |
+
"single_word": false,
|
290 |
+
"special": false
|
291 |
+
},
|
292 |
+
"50292": {
|
293 |
+
"content": "\t\t\t\t",
|
294 |
+
"lstrip": false,
|
295 |
+
"normalized": true,
|
296 |
+
"rstrip": false,
|
297 |
+
"single_word": false,
|
298 |
+
"special": false
|
299 |
+
},
|
300 |
+
"50293": {
|
301 |
+
"content": "\t\t\t",
|
302 |
+
"lstrip": false,
|
303 |
+
"normalized": true,
|
304 |
+
"rstrip": false,
|
305 |
+
"single_word": false,
|
306 |
+
"special": false
|
307 |
+
},
|
308 |
+
"50294": {
|
309 |
+
"content": "\t\t",
|
310 |
+
"lstrip": false,
|
311 |
+
"normalized": true,
|
312 |
+
"rstrip": false,
|
313 |
+
"single_word": false,
|
314 |
+
"special": false
|
315 |
+
}
|
316 |
+
},
|
317 |
+
"bos_token": "<|endoftext|>",
|
318 |
+
"clean_up_tokenization_spaces": true,
|
319 |
+
"eos_token": "<|endoftext|>",
|
320 |
+
"model_max_length": 2048,
|
321 |
+
"tokenizer_class": "CodeGenTokenizer",
|
322 |
+
"unk_token": "<|endoftext|>"
|
323 |
+
}
|
main.js
CHANGED
@@ -53,51 +53,45 @@ function createElem(tag, attrs = {}, children = []) {
|
|
53 |
|
54 |
const pageCategories = [
|
55 |
{
|
56 |
-
title: `
|
57 |
-
description: `
|
58 |
demos: {
|
59 |
sam: {
|
60 |
name: 'Segment Anything',
|
61 |
description: `Segment Anything from https://github.com/guschmue/ort-webgpu/tree/master/segment-anything`,
|
62 |
-
filename: "sam",
|
63 |
},
|
64 |
sdturbo: {
|
65 |
name: 'Stable Diffusion Turbo',
|
66 |
description: `Stable Diffusion Turbo from https://github.com/guschmue/ort-webgpu/tree/master/sd-turbo`,
|
67 |
-
filename: "
|
|
|
|
|
|
|
|
|
|
|
68 |
},
|
69 |
yolo: {
|
70 |
name: 'Yolo',
|
71 |
description: `Yolo V9 from https://github.com/guschmue/ort-webgpu/tree/master/yolov9`,
|
72 |
-
filename: "yolo",
|
73 |
},
|
74 |
},
|
75 |
},
|
76 |
{
|
77 |
-
title: `
|
78 |
-
description: `
|
79 |
demos: {
|
80 |
gemma: {
|
81 |
name: 'Gemma',
|
82 |
-
description: `Gemma with
|
83 |
-
filename: "
|
84 |
-
},
|
85 |
-
},
|
86 |
-
},
|
87 |
-
{
|
88 |
-
title: `Natural Language Processing`,
|
89 |
-
description: `Natural Language Processing`,
|
90 |
-
demos: {
|
91 |
-
tinyllama: {
|
92 |
-
name: 'Tiny Llama',
|
93 |
-
description: `Tiny Llama from https://github.com/guschmue/ort-webgpu/tree/master/chat`,
|
94 |
-
filename: "tinyllama",
|
95 |
},
|
96 |
},
|
97 |
},
|
98 |
{
|
99 |
-
title: 'Transformers.js
|
100 |
-
description: 'Transformers.js
|
101 |
demos: {
|
102 |
benchmark: {
|
103 |
name: 'Benchmark',
|
@@ -126,8 +120,8 @@ const pageCategories = [
|
|
126 |
},
|
127 |
},
|
128 |
{
|
129 |
-
title: 'TVM
|
130 |
-
description: 'TVM
|
131 |
demos: {
|
132 |
sd: {
|
133 |
name: 'Web Stable Diffusion',
|
|
|
53 |
|
54 |
const pageCategories = [
|
55 |
{
|
56 |
+
title: `ONNX Runtime`,
|
57 |
+
description: `ONNX Runtime`,
|
58 |
demos: {
|
59 |
sam: {
|
60 |
name: 'Segment Anything',
|
61 |
description: `Segment Anything from https://github.com/guschmue/ort-webgpu/tree/master/segment-anything`,
|
62 |
+
filename: "ort-sam",
|
63 |
},
|
64 |
sdturbo: {
|
65 |
name: 'Stable Diffusion Turbo',
|
66 |
description: `Stable Diffusion Turbo from https://github.com/guschmue/ort-webgpu/tree/master/sd-turbo`,
|
67 |
+
filename: "ort-sd-turbo",
|
68 |
+
},
|
69 |
+
tinyllama: {
|
70 |
+
name: 'Tiny Llama',
|
71 |
+
description: `Tiny Llama from https://github.com/guschmue/ort-webgpu/tree/master/chat`,
|
72 |
+
filename: "ort-tiny-llama",
|
73 |
},
|
74 |
yolo: {
|
75 |
name: 'Yolo',
|
76 |
description: `Yolo V9 from https://github.com/guschmue/ort-webgpu/tree/master/yolov9`,
|
77 |
+
filename: "ort-yolo",
|
78 |
},
|
79 |
},
|
80 |
},
|
81 |
{
|
82 |
+
title: `TFLite`,
|
83 |
+
description: `TFLite`,
|
84 |
demos: {
|
85 |
gemma: {
|
86 |
name: 'Gemma',
|
87 |
+
description: `Gemma with TFLite and MediaPipe from https://github.com/googlesamples/mediapipe/tree/main/examples/llm_inference/js, <a href=https://developers.googleblog.com/2024/03/running-large-language-models-on-device-with-mediapipe-andtensorflow-lite.html>more info.</a>`,
|
88 |
+
filename: "tflite-gemma",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
},
|
90 |
},
|
91 |
},
|
92 |
{
|
93 |
+
title: 'Transformers.js',
|
94 |
+
description: 'Transformers.js',
|
95 |
demos: {
|
96 |
benchmark: {
|
97 |
name: 'Benchmark',
|
|
|
120 |
},
|
121 |
},
|
122 |
{
|
123 |
+
title: 'TVM',
|
124 |
+
description: 'TVM',
|
125 |
demos: {
|
126 |
sd: {
|
127 |
name: 'Web Stable Diffusion',
|
transformers/transformers.js
ADDED
The diff for this file is too large to render.
See raw diff
|
|