radames commited on
Commit
31b13da
1 Parent(s): b442f82

Upload 6 files

Browse files
Files changed (6) hide show
  1. build/m.d.ts +65 -0
  2. build/m.js +476 -0
  3. build/m_bg.wasm +3 -0
  4. build/m_bg.wasm.d.ts +14 -0
  5. index.html +363 -16
  6. phiWorker.js +131 -0
build/m.d.ts ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* tslint:disable */
2
+ /* eslint-disable */
3
+ /**
4
+ */
5
+ export class Model {
6
+ free(): void;
7
+ /**
8
+ * @param {Uint8Array} weights
9
+ * @param {Uint8Array} tokenizer
10
+ * @param {boolean} quantized
11
+ */
12
+ constructor(weights: Uint8Array, tokenizer: Uint8Array, quantized: boolean);
13
+ /**
14
+ * @param {string} prompt
15
+ * @param {number} temp
16
+ * @param {number} top_p
17
+ * @param {number} repeat_penalty
18
+ * @param {number} repeat_last_n
19
+ * @param {bigint} seed
20
+ * @returns {string}
21
+ */
22
+ init_with_prompt(prompt: string, temp: number, top_p: number, repeat_penalty: number, repeat_last_n: number, seed: bigint): string;
23
+ /**
24
+ * @returns {string}
25
+ */
26
+ next_token(): string;
27
+ }
28
+
29
+ export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module;
30
+
31
+ export interface InitOutput {
32
+ readonly memory: WebAssembly.Memory;
33
+ readonly __wbg_model_free: (a: number) => void;
34
+ readonly model_load: (a: number, b: number, c: number, d: number, e: number, f: number) => void;
35
+ readonly model_init_with_prompt: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number, i: number) => void;
36
+ readonly model_next_token: (a: number, b: number) => void;
37
+ readonly main: (a: number, b: number) => number;
38
+ readonly __wbindgen_add_to_stack_pointer: (a: number) => number;
39
+ readonly __wbindgen_malloc: (a: number, b: number) => number;
40
+ readonly __wbindgen_realloc: (a: number, b: number, c: number, d: number) => number;
41
+ readonly __wbindgen_free: (a: number, b: number, c: number) => void;
42
+ readonly __wbindgen_exn_store: (a: number) => void;
43
+ readonly __wbindgen_start: () => void;
44
+ }
45
+
46
+ export type SyncInitInput = BufferSource | WebAssembly.Module;
47
+ /**
48
+ * Instantiates the given `module`, which can either be bytes or
49
+ * a precompiled `WebAssembly.Module`.
50
+ *
51
+ * @param {SyncInitInput} module
52
+ *
53
+ * @returns {InitOutput}
54
+ */
55
+ export function initSync(module: SyncInitInput): InitOutput;
56
+
57
+ /**
58
+ * If `module_or_path` is {RequestInfo} or {URL}, makes a request and
59
+ * for everything else, calls `WebAssembly.instantiate` directly.
60
+ *
61
+ * @param {InitInput | Promise<InitInput>} module_or_path
62
+ *
63
+ * @returns {Promise<InitOutput>}
64
+ */
65
+ export default function __wbg_init (module_or_path?: InitInput | Promise<InitInput>): Promise<InitOutput>;
build/m.js ADDED
@@ -0,0 +1,476 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ let wasm;
2
+
3
+ const cachedTextDecoder = (typeof TextDecoder !== 'undefined' ? new TextDecoder('utf-8', { ignoreBOM: true, fatal: true }) : { decode: () => { throw Error('TextDecoder not available') } } );
4
+
5
+ if (typeof TextDecoder !== 'undefined') { cachedTextDecoder.decode(); };
6
+
7
+ let cachedUint8Memory0 = null;
8
+
9
+ function getUint8Memory0() {
10
+ if (cachedUint8Memory0 === null || cachedUint8Memory0.byteLength === 0) {
11
+ cachedUint8Memory0 = new Uint8Array(wasm.memory.buffer);
12
+ }
13
+ return cachedUint8Memory0;
14
+ }
15
+
16
+ function getStringFromWasm0(ptr, len) {
17
+ ptr = ptr >>> 0;
18
+ return cachedTextDecoder.decode(getUint8Memory0().subarray(ptr, ptr + len));
19
+ }
20
+
21
+ const heap = new Array(128).fill(undefined);
22
+
23
+ heap.push(undefined, null, true, false);
24
+
25
+ let heap_next = heap.length;
26
+
27
+ function addHeapObject(obj) {
28
+ if (heap_next === heap.length) heap.push(heap.length + 1);
29
+ const idx = heap_next;
30
+ heap_next = heap[idx];
31
+
32
+ heap[idx] = obj;
33
+ return idx;
34
+ }
35
+
36
+ function getObject(idx) { return heap[idx]; }
37
+
38
+ function dropObject(idx) {
39
+ if (idx < 132) return;
40
+ heap[idx] = heap_next;
41
+ heap_next = idx;
42
+ }
43
+
44
+ function takeObject(idx) {
45
+ const ret = getObject(idx);
46
+ dropObject(idx);
47
+ return ret;
48
+ }
49
+
50
+ let WASM_VECTOR_LEN = 0;
51
+
52
+ function passArray8ToWasm0(arg, malloc) {
53
+ const ptr = malloc(arg.length * 1, 1) >>> 0;
54
+ getUint8Memory0().set(arg, ptr / 1);
55
+ WASM_VECTOR_LEN = arg.length;
56
+ return ptr;
57
+ }
58
+
59
+ let cachedInt32Memory0 = null;
60
+
61
+ function getInt32Memory0() {
62
+ if (cachedInt32Memory0 === null || cachedInt32Memory0.byteLength === 0) {
63
+ cachedInt32Memory0 = new Int32Array(wasm.memory.buffer);
64
+ }
65
+ return cachedInt32Memory0;
66
+ }
67
+
68
+ const cachedTextEncoder = (typeof TextEncoder !== 'undefined' ? new TextEncoder('utf-8') : { encode: () => { throw Error('TextEncoder not available') } } );
69
+
70
+ const encodeString = (typeof cachedTextEncoder.encodeInto === 'function'
71
+ ? function (arg, view) {
72
+ return cachedTextEncoder.encodeInto(arg, view);
73
+ }
74
+ : function (arg, view) {
75
+ const buf = cachedTextEncoder.encode(arg);
76
+ view.set(buf);
77
+ return {
78
+ read: arg.length,
79
+ written: buf.length
80
+ };
81
+ });
82
+
83
+ function passStringToWasm0(arg, malloc, realloc) {
84
+
85
+ if (realloc === undefined) {
86
+ const buf = cachedTextEncoder.encode(arg);
87
+ const ptr = malloc(buf.length, 1) >>> 0;
88
+ getUint8Memory0().subarray(ptr, ptr + buf.length).set(buf);
89
+ WASM_VECTOR_LEN = buf.length;
90
+ return ptr;
91
+ }
92
+
93
+ let len = arg.length;
94
+ let ptr = malloc(len, 1) >>> 0;
95
+
96
+ const mem = getUint8Memory0();
97
+
98
+ let offset = 0;
99
+
100
+ for (; offset < len; offset++) {
101
+ const code = arg.charCodeAt(offset);
102
+ if (code > 0x7F) break;
103
+ mem[ptr + offset] = code;
104
+ }
105
+
106
+ if (offset !== len) {
107
+ if (offset !== 0) {
108
+ arg = arg.slice(offset);
109
+ }
110
+ ptr = realloc(ptr, len, len = offset + arg.length * 3, 1) >>> 0;
111
+ const view = getUint8Memory0().subarray(ptr + offset, ptr + len);
112
+ const ret = encodeString(arg, view);
113
+
114
+ offset += ret.written;
115
+ }
116
+
117
+ WASM_VECTOR_LEN = offset;
118
+ return ptr;
119
+ }
120
+
121
+ function handleError(f, args) {
122
+ try {
123
+ return f.apply(this, args);
124
+ } catch (e) {
125
+ wasm.__wbindgen_exn_store(addHeapObject(e));
126
+ }
127
+ }
128
+ /**
129
+ */
130
+ export class Model {
131
+
132
+ static __wrap(ptr) {
133
+ ptr = ptr >>> 0;
134
+ const obj = Object.create(Model.prototype);
135
+ obj.__wbg_ptr = ptr;
136
+
137
+ return obj;
138
+ }
139
+
140
+ __destroy_into_raw() {
141
+ const ptr = this.__wbg_ptr;
142
+ this.__wbg_ptr = 0;
143
+
144
+ return ptr;
145
+ }
146
+
147
+ free() {
148
+ const ptr = this.__destroy_into_raw();
149
+ wasm.__wbg_model_free(ptr);
150
+ }
151
+ /**
152
+ * @param {Uint8Array} weights
153
+ * @param {Uint8Array} tokenizer
154
+ * @param {boolean} quantized
155
+ */
156
+ constructor(weights, tokenizer, quantized) {
157
+ try {
158
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
159
+ const ptr0 = passArray8ToWasm0(weights, wasm.__wbindgen_malloc);
160
+ const len0 = WASM_VECTOR_LEN;
161
+ const ptr1 = passArray8ToWasm0(tokenizer, wasm.__wbindgen_malloc);
162
+ const len1 = WASM_VECTOR_LEN;
163
+ wasm.model_load(retptr, ptr0, len0, ptr1, len1, quantized);
164
+ var r0 = getInt32Memory0()[retptr / 4 + 0];
165
+ var r1 = getInt32Memory0()[retptr / 4 + 1];
166
+ var r2 = getInt32Memory0()[retptr / 4 + 2];
167
+ if (r2) {
168
+ throw takeObject(r1);
169
+ }
170
+ return Model.__wrap(r0);
171
+ } finally {
172
+ wasm.__wbindgen_add_to_stack_pointer(16);
173
+ }
174
+ }
175
+ /**
176
+ * @param {string} prompt
177
+ * @param {number} temp
178
+ * @param {number} top_p
179
+ * @param {number} repeat_penalty
180
+ * @param {number} repeat_last_n
181
+ * @param {bigint} seed
182
+ * @returns {string}
183
+ */
184
+ init_with_prompt(prompt, temp, top_p, repeat_penalty, repeat_last_n, seed) {
185
+ let deferred3_0;
186
+ let deferred3_1;
187
+ try {
188
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
189
+ const ptr0 = passStringToWasm0(prompt, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
190
+ const len0 = WASM_VECTOR_LEN;
191
+ wasm.model_init_with_prompt(retptr, this.__wbg_ptr, ptr0, len0, temp, top_p, repeat_penalty, repeat_last_n, seed);
192
+ var r0 = getInt32Memory0()[retptr / 4 + 0];
193
+ var r1 = getInt32Memory0()[retptr / 4 + 1];
194
+ var r2 = getInt32Memory0()[retptr / 4 + 2];
195
+ var r3 = getInt32Memory0()[retptr / 4 + 3];
196
+ var ptr2 = r0;
197
+ var len2 = r1;
198
+ if (r3) {
199
+ ptr2 = 0; len2 = 0;
200
+ throw takeObject(r2);
201
+ }
202
+ deferred3_0 = ptr2;
203
+ deferred3_1 = len2;
204
+ return getStringFromWasm0(ptr2, len2);
205
+ } finally {
206
+ wasm.__wbindgen_add_to_stack_pointer(16);
207
+ wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
208
+ }
209
+ }
210
+ /**
211
+ * @returns {string}
212
+ */
213
+ next_token() {
214
+ let deferred2_0;
215
+ let deferred2_1;
216
+ try {
217
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
218
+ wasm.model_next_token(retptr, this.__wbg_ptr);
219
+ var r0 = getInt32Memory0()[retptr / 4 + 0];
220
+ var r1 = getInt32Memory0()[retptr / 4 + 1];
221
+ var r2 = getInt32Memory0()[retptr / 4 + 2];
222
+ var r3 = getInt32Memory0()[retptr / 4 + 3];
223
+ var ptr1 = r0;
224
+ var len1 = r1;
225
+ if (r3) {
226
+ ptr1 = 0; len1 = 0;
227
+ throw takeObject(r2);
228
+ }
229
+ deferred2_0 = ptr1;
230
+ deferred2_1 = len1;
231
+ return getStringFromWasm0(ptr1, len1);
232
+ } finally {
233
+ wasm.__wbindgen_add_to_stack_pointer(16);
234
+ wasm.__wbindgen_free(deferred2_0, deferred2_1, 1);
235
+ }
236
+ }
237
+ }
238
+
239
+ async function __wbg_load(module, imports) {
240
+ if (typeof Response === 'function' && module instanceof Response) {
241
+ if (typeof WebAssembly.instantiateStreaming === 'function') {
242
+ try {
243
+ return await WebAssembly.instantiateStreaming(module, imports);
244
+
245
+ } catch (e) {
246
+ if (module.headers.get('Content-Type') != 'application/wasm') {
247
+ console.warn("`WebAssembly.instantiateStreaming` failed because your server does not serve wasm with `application/wasm` MIME type. Falling back to `WebAssembly.instantiate` which is slower. Original error:\n", e);
248
+
249
+ } else {
250
+ throw e;
251
+ }
252
+ }
253
+ }
254
+
255
+ const bytes = await module.arrayBuffer();
256
+ return await WebAssembly.instantiate(bytes, imports);
257
+
258
+ } else {
259
+ const instance = await WebAssembly.instantiate(module, imports);
260
+
261
+ if (instance instanceof WebAssembly.Instance) {
262
+ return { instance, module };
263
+
264
+ } else {
265
+ return instance;
266
+ }
267
+ }
268
+ }
269
+
270
+ function __wbg_get_imports() {
271
+ const imports = {};
272
+ imports.wbg = {};
273
+ imports.wbg.__wbindgen_error_new = function(arg0, arg1) {
274
+ const ret = new Error(getStringFromWasm0(arg0, arg1));
275
+ return addHeapObject(ret);
276
+ };
277
+ imports.wbg.__wbg_new_abda76e883ba8a5f = function() {
278
+ const ret = new Error();
279
+ return addHeapObject(ret);
280
+ };
281
+ imports.wbg.__wbg_stack_658279fe44541cf6 = function(arg0, arg1) {
282
+ const ret = getObject(arg1).stack;
283
+ const ptr1 = passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
284
+ const len1 = WASM_VECTOR_LEN;
285
+ getInt32Memory0()[arg0 / 4 + 1] = len1;
286
+ getInt32Memory0()[arg0 / 4 + 0] = ptr1;
287
+ };
288
+ imports.wbg.__wbg_error_f851667af71bcfc6 = function(arg0, arg1) {
289
+ let deferred0_0;
290
+ let deferred0_1;
291
+ try {
292
+ deferred0_0 = arg0;
293
+ deferred0_1 = arg1;
294
+ console.error(getStringFromWasm0(arg0, arg1));
295
+ } finally {
296
+ wasm.__wbindgen_free(deferred0_0, deferred0_1, 1);
297
+ }
298
+ };
299
+ imports.wbg.__wbindgen_object_drop_ref = function(arg0) {
300
+ takeObject(arg0);
301
+ };
302
+ imports.wbg.__wbg_log_ff7e0b5e6573cdff = function(arg0, arg1) {
303
+ console.log(getStringFromWasm0(arg0, arg1));
304
+ };
305
+ imports.wbg.__wbg_crypto_c48a774b022d20ac = function(arg0) {
306
+ const ret = getObject(arg0).crypto;
307
+ return addHeapObject(ret);
308
+ };
309
+ imports.wbg.__wbindgen_is_object = function(arg0) {
310
+ const val = getObject(arg0);
311
+ const ret = typeof(val) === 'object' && val !== null;
312
+ return ret;
313
+ };
314
+ imports.wbg.__wbg_process_298734cf255a885d = function(arg0) {
315
+ const ret = getObject(arg0).process;
316
+ return addHeapObject(ret);
317
+ };
318
+ imports.wbg.__wbg_versions_e2e78e134e3e5d01 = function(arg0) {
319
+ const ret = getObject(arg0).versions;
320
+ return addHeapObject(ret);
321
+ };
322
+ imports.wbg.__wbg_node_1cd7a5d853dbea79 = function(arg0) {
323
+ const ret = getObject(arg0).node;
324
+ return addHeapObject(ret);
325
+ };
326
+ imports.wbg.__wbindgen_is_string = function(arg0) {
327
+ const ret = typeof(getObject(arg0)) === 'string';
328
+ return ret;
329
+ };
330
+ imports.wbg.__wbg_msCrypto_bcb970640f50a1e8 = function(arg0) {
331
+ const ret = getObject(arg0).msCrypto;
332
+ return addHeapObject(ret);
333
+ };
334
+ imports.wbg.__wbg_require_8f08ceecec0f4fee = function() { return handleError(function () {
335
+ const ret = module.require;
336
+ return addHeapObject(ret);
337
+ }, arguments) };
338
+ imports.wbg.__wbindgen_is_function = function(arg0) {
339
+ const ret = typeof(getObject(arg0)) === 'function';
340
+ return ret;
341
+ };
342
+ imports.wbg.__wbindgen_string_new = function(arg0, arg1) {
343
+ const ret = getStringFromWasm0(arg0, arg1);
344
+ return addHeapObject(ret);
345
+ };
346
+ imports.wbg.__wbg_getRandomValues_37fa2ca9e4e07fab = function() { return handleError(function (arg0, arg1) {
347
+ getObject(arg0).getRandomValues(getObject(arg1));
348
+ }, arguments) };
349
+ imports.wbg.__wbg_randomFillSync_dc1e9a60c158336d = function() { return handleError(function (arg0, arg1) {
350
+ getObject(arg0).randomFillSync(takeObject(arg1));
351
+ }, arguments) };
352
+ imports.wbg.__wbg_newnoargs_581967eacc0e2604 = function(arg0, arg1) {
353
+ const ret = new Function(getStringFromWasm0(arg0, arg1));
354
+ return addHeapObject(ret);
355
+ };
356
+ imports.wbg.__wbg_call_cb65541d95d71282 = function() { return handleError(function (arg0, arg1) {
357
+ const ret = getObject(arg0).call(getObject(arg1));
358
+ return addHeapObject(ret);
359
+ }, arguments) };
360
+ imports.wbg.__wbindgen_object_clone_ref = function(arg0) {
361
+ const ret = getObject(arg0);
362
+ return addHeapObject(ret);
363
+ };
364
+ imports.wbg.__wbg_self_1ff1d729e9aae938 = function() { return handleError(function () {
365
+ const ret = self.self;
366
+ return addHeapObject(ret);
367
+ }, arguments) };
368
+ imports.wbg.__wbg_window_5f4faef6c12b79ec = function() { return handleError(function () {
369
+ const ret = window.window;
370
+ return addHeapObject(ret);
371
+ }, arguments) };
372
+ imports.wbg.__wbg_globalThis_1d39714405582d3c = function() { return handleError(function () {
373
+ const ret = globalThis.globalThis;
374
+ return addHeapObject(ret);
375
+ }, arguments) };
376
+ imports.wbg.__wbg_global_651f05c6a0944d1c = function() { return handleError(function () {
377
+ const ret = global.global;
378
+ return addHeapObject(ret);
379
+ }, arguments) };
380
+ imports.wbg.__wbindgen_is_undefined = function(arg0) {
381
+ const ret = getObject(arg0) === undefined;
382
+ return ret;
383
+ };
384
+ imports.wbg.__wbg_call_01734de55d61e11d = function() { return handleError(function (arg0, arg1, arg2) {
385
+ const ret = getObject(arg0).call(getObject(arg1), getObject(arg2));
386
+ return addHeapObject(ret);
387
+ }, arguments) };
388
+ imports.wbg.__wbg_now_9c5990bda04c7e53 = function() {
389
+ const ret = Date.now();
390
+ return ret;
391
+ };
392
+ imports.wbg.__wbg_buffer_085ec1f694018c4f = function(arg0) {
393
+ const ret = getObject(arg0).buffer;
394
+ return addHeapObject(ret);
395
+ };
396
+ imports.wbg.__wbg_newwithbyteoffsetandlength_6da8e527659b86aa = function(arg0, arg1, arg2) {
397
+ const ret = new Uint8Array(getObject(arg0), arg1 >>> 0, arg2 >>> 0);
398
+ return addHeapObject(ret);
399
+ };
400
+ imports.wbg.__wbg_new_8125e318e6245eed = function(arg0) {
401
+ const ret = new Uint8Array(getObject(arg0));
402
+ return addHeapObject(ret);
403
+ };
404
+ imports.wbg.__wbg_set_5cf90238115182c3 = function(arg0, arg1, arg2) {
405
+ getObject(arg0).set(getObject(arg1), arg2 >>> 0);
406
+ };
407
+ imports.wbg.__wbg_newwithlength_e5d69174d6984cd7 = function(arg0) {
408
+ const ret = new Uint8Array(arg0 >>> 0);
409
+ return addHeapObject(ret);
410
+ };
411
+ imports.wbg.__wbg_subarray_13db269f57aa838d = function(arg0, arg1, arg2) {
412
+ const ret = getObject(arg0).subarray(arg1 >>> 0, arg2 >>> 0);
413
+ return addHeapObject(ret);
414
+ };
415
+ imports.wbg.__wbindgen_throw = function(arg0, arg1) {
416
+ throw new Error(getStringFromWasm0(arg0, arg1));
417
+ };
418
+ imports.wbg.__wbindgen_memory = function() {
419
+ const ret = wasm.memory;
420
+ return addHeapObject(ret);
421
+ };
422
+
423
+ return imports;
424
+ }
425
+
426
+ function __wbg_init_memory(imports, maybe_memory) {
427
+
428
+ }
429
+
430
+ function __wbg_finalize_init(instance, module) {
431
+ wasm = instance.exports;
432
+ __wbg_init.__wbindgen_wasm_module = module;
433
+ cachedInt32Memory0 = null;
434
+ cachedUint8Memory0 = null;
435
+
436
+ wasm.__wbindgen_start();
437
+ return wasm;
438
+ }
439
+
440
+ function initSync(module) {
441
+ if (wasm !== undefined) return wasm;
442
+
443
+ const imports = __wbg_get_imports();
444
+
445
+ __wbg_init_memory(imports);
446
+
447
+ if (!(module instanceof WebAssembly.Module)) {
448
+ module = new WebAssembly.Module(module);
449
+ }
450
+
451
+ const instance = new WebAssembly.Instance(module, imports);
452
+
453
+ return __wbg_finalize_init(instance, module);
454
+ }
455
+
456
+ async function __wbg_init(input) {
457
+ if (wasm !== undefined) return wasm;
458
+
459
+ if (typeof input === 'undefined') {
460
+ input = new URL('m_bg.wasm', import.meta.url);
461
+ }
462
+ const imports = __wbg_get_imports();
463
+
464
+ if (typeof input === 'string' || (typeof Request === 'function' && input instanceof Request) || (typeof URL === 'function' && input instanceof URL)) {
465
+ input = fetch(input);
466
+ }
467
+
468
+ __wbg_init_memory(imports);
469
+
470
+ const { instance, module } = await __wbg_load(await input, imports);
471
+
472
+ return __wbg_finalize_init(instance, module);
473
+ }
474
+
475
+ export { initSync }
476
+ export default __wbg_init;
build/m_bg.wasm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a62ccdcce6e333016d5f26d45ceaa3066123a2396f3ea46d419a046f02363ae6
3
+ size 4349308
build/m_bg.wasm.d.ts ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* tslint:disable */
2
+ /* eslint-disable */
3
+ export const memory: WebAssembly.Memory;
4
+ export function __wbg_model_free(a: number): void;
5
+ export function model_load(a: number, b: number, c: number, d: number, e: number, f: number): void;
6
+ export function model_init_with_prompt(a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number, i: number): void;
7
+ export function model_next_token(a: number, b: number): void;
8
+ export function main(a: number, b: number): number;
9
+ export function __wbindgen_add_to_stack_pointer(a: number): number;
10
+ export function __wbindgen_malloc(a: number, b: number): number;
11
+ export function __wbindgen_realloc(a: number, b: number, c: number, d: number): number;
12
+ export function __wbindgen_free(a: number, b: number, c: number): void;
13
+ export function __wbindgen_exn_store(a: number): void;
14
+ export function __wbindgen_start(): void;
index.html CHANGED
@@ -1,19 +1,366 @@
 
 
 
 
 
 
 
 
1
  <!DOCTYPE html>
2
  <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  </html>
 
1
+ <html>
2
+ <head>
3
+ <meta content="text/html;charset=utf-8" http-equiv="Content-Type" />
4
+ <title>Candle Phi 1.5 Rust/WASM</title>
5
+ </head>
6
+ <body></body>
7
+ </html>
8
+
9
  <!DOCTYPE html>
10
  <html>
11
+ <head>
12
+ <meta charset="UTF-8" />
13
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
14
+ <link
15
+ rel="stylesheet"
16
+ href="https://cdn.jsdelivr.net/gh/highlightjs/[email protected]/build/styles/default.min.css" />
17
+ <style>
18
+ @import url("https://fonts.googleapis.com/css2?family=Source+Code+Pro:wght@200;300;400&family=Source+Sans+3:wght@100;200;300;400;500;600;700;800;900&display=swap");
19
+ html,
20
+ body {
21
+ font-family: "Source Sans 3", sans-serif;
22
+ }
23
+ code,
24
+ output,
25
+ select,
26
+ pre {
27
+ font-family: "Source Code Pro", monospace;
28
+ }
29
+ </style>
30
+ <style type="text/tailwindcss">
31
+ .link {
32
+ @apply underline hover:text-blue-500 hover:no-underline;
33
+ }
34
+ </style>
35
+ <script src="https://cdn.tailwindcss.com"></script>
36
+ <script type="module">
37
+ import snarkdown from "https://cdn.skypack.dev/snarkdown";
38
+ import hljs from "https://cdn.skypack.dev/highlight.js";
39
+
40
+ const TOKENIZER_URL =
41
+ "https://huggingface.co/microsoft/phi-1_5/raw/main/tokenizer.json";
42
+ // models base url
43
+ const MODELS = {
44
+ phi_1_5_quantized: {
45
+ base_url:
46
+ "https://huggingface.co/lmz/candle-quantized-phi/resolve/main/",
47
+ model: "model-q4k.gguf",
48
+ quantized: true,
49
+ seq_len: 2048,
50
+ },
51
+ phi_1_5_quantized_2: {
52
+ base_url:
53
+ "https://huggingface.co/lmz/candle-quantized-phi/resolve/main/",
54
+ model: "model-q80.gguf",
55
+ quantized: true,
56
+ seq_len: 2048,
57
+ },
58
+ };
59
+
60
+ const phiWorker = new Worker("./phiWorker.js", {
61
+ type: "module",
62
+ });
63
+ async function generateSequence(controller) {
64
+ const getValue = (id) => document.querySelector(`#${id}`).value;
65
+ const modelID = getValue("model");
66
+ const model = MODELS[modelID];
67
+ const weightsURL = model.base_url + model.model;
68
+
69
+ const prompt = getValue("prompt").trim();
70
+ const temperature = getValue("temperature");
71
+ const topP = getValue("top-p");
72
+ const repeatPenalty = getValue("repeat_penalty");
73
+ const seed = getValue("seed");
74
+ const maxSeqLen = getValue("max-seq");
75
+
76
+ function updateStatus(data) {
77
+ const outStatus = document.querySelector("#output-status");
78
+ const outGen = document.querySelector("#output-generation");
79
+ const outCounter = document.querySelector("#output-counter");
80
+
81
+ switch (data.status) {
82
+ case "loading":
83
+ outStatus.hidden = false;
84
+ outStatus.textContent = data.message;
85
+ outGen.hidden = true;
86
+ outCounter.hidden = true;
87
+ break;
88
+ case "generating":
89
+ const { message, prompt, sentence, tokensSec, totalTime } = data;
90
+ outStatus.hidden = true;
91
+ outCounter.hidden = false;
92
+ outGen.hidden = false;
93
+
94
+ const p = prompt.replace(/\n|\\n/g, " \n");
95
+ const generated = sentence.replace(/\n|\\n/g, " \n");
96
+ outGen.innerHTML = snarkdown(p + generated);
97
+ outCounter.innerHTML = `${(totalTime / 1000).toFixed(
98
+ 2
99
+ )}s (${tokensSec.toFixed(2)} tok/s)`;
100
+ hljs.highlightAll();
101
+ break;
102
+ case "complete":
103
+ outStatus.hidden = true;
104
+ outGen.hidden = false;
105
+ break;
106
+ }
107
+ }
108
+
109
+ return new Promise((resolve, reject) => {
110
+ phiWorker.postMessage({
111
+ weightsURL,
112
+ modelID,
113
+ tokenizerURL: TOKENIZER_URL,
114
+ quantized: model.quantized,
115
+ prompt,
116
+ temp: temperature,
117
+ top_p: topP,
118
+ repeatPenalty,
119
+ seed: seed,
120
+ maxSeqLen,
121
+ command: "start",
122
+ });
123
+
124
+ const handleAbort = () => {
125
+ phiWorker.postMessage({ command: "abort" });
126
+ };
127
+ const handleMessage = (event) => {
128
+ const { status, error, message, prompt, sentence } = event.data;
129
+ if (status) updateStatus(event.data);
130
+ if (error) {
131
+ phiWorker.removeEventListener("message", handleMessage);
132
+ reject(new Error(error));
133
+ }
134
+ if (status === "abort") {
135
+ phiWorker.removeEventListener("message", handleMessage);
136
+ resolve(event.data);
137
+ }
138
+ if (status === "complete") {
139
+ phiWorker.removeEventListener("message", handleMessage);
140
+ resolve(event.data);
141
+ }
142
+ };
143
+
144
+ controller.signal.addEventListener("abort", handleAbort);
145
+ phiWorker.addEventListener("message", handleMessage);
146
+ });
147
+ }
148
+
149
+ const form = document.querySelector("#form");
150
+ const prompt = document.querySelector("#prompt");
151
+ const clearBtn = document.querySelector("#clear-btn");
152
+ const runBtn = document.querySelector("#run");
153
+ const modelSelect = document.querySelector("#model");
154
+ let runController = new AbortController();
155
+ let isRunning = false;
156
+
157
+ modelSelect.addEventListener("change", (e) => {
158
+ const model = MODELS[e.target.value];
159
+ document.querySelector("#max-seq").max = model.seq_len;
160
+ document.querySelector("#max-seq").nextElementSibling.value = 200;
161
+ });
162
+
163
+ form.addEventListener("submit", async (e) => {
164
+ e.preventDefault();
165
+ if (isRunning) {
166
+ stopRunning();
167
+ } else {
168
+ startRunning();
169
+ await generateSequence(runController);
170
+ stopRunning();
171
+ }
172
+ });
173
+
174
+ function startRunning() {
175
+ isRunning = true;
176
+ runBtn.textContent = "Stop";
177
+ }
178
+
179
+ function stopRunning() {
180
+ runController.abort();
181
+ runController = new AbortController();
182
+ runBtn.textContent = "Run";
183
+ isRunning = false;
184
+ }
185
+ clearBtn.addEventListener("click", (e) => {
186
+ e.preventDefault();
187
+ prompt.value = "";
188
+ clearBtn.classList.add("invisible");
189
+ runBtn.disabled = true;
190
+ stopRunning();
191
+ });
192
+ prompt.addEventListener("input", (e) => {
193
+ runBtn.disabled = false;
194
+ if (e.target.value.length > 0) {
195
+ clearBtn.classList.remove("invisible");
196
+ } else {
197
+ clearBtn.classList.add("invisible");
198
+ }
199
+ });
200
+ </script>
201
+ </head>
202
+ <body class="container max-w-4xl mx-auto p-4 text-gray-800">
203
+ <main class="grid grid-cols-1 gap-8 relative">
204
+ <span class="absolute text-5xl -ml-[1em]"> 🕯️ </span>
205
+ <div>
206
+ <h1 class="text-5xl font-bold">Candle Phi 1.5</h1>
207
+ <h2 class="text-2xl font-bold">Rust/WASM Demo</h2>
208
+ <p class="max-w-lg">
209
+ The
210
+ <a
211
+ href="https://huggingface.co/microsoft/phi-1_5"
212
+ class="link"
213
+ target="_blank">Phi-1.5</a
214
+ >
215
+ model achieves state-of-the-art performance with only 1.3 billion
216
+ parameters, compared to models with up to 10 billion. You can try the
217
+ quantized version of the model here. Additional prompt examples are
218
+ available in the
219
+ <a
220
+ href="https://arxiv.org/pdf/2309.05463.pdf#page=8"
221
+ class="link"
222
+ target="_blank">
223
+ technical report </a
224
+ >.
225
+ </p>
226
+ </div>
227
+
228
+ <div>
229
+ <label for="model" class="font-medium">Models Options: </label>
230
+ <select
231
+ id="model"
232
+ class="border-2 border-gray-500 rounded-md font-light">
233
+ <option value="phi_1_5_quantized" selected>
234
+ phi 1.5 quantized q4k (800 MB)
235
+ </option>
236
+ <option value="phi_1_5_quantized_2">
237
+ phi 1.5 quantized q80 (1.51 GB)
238
+ </option>
239
+ <!-- <option value="phi_1_5">phi 1.5 (2.84 GB)</option> -->
240
+ </select>
241
+ </div>
242
+ <form
243
+ id="form"
244
+ class="flex text-normal px-1 py-1 border border-gray-700 rounded-md items-center">
245
+ <input type="submit" hidden />
246
+ <textarea
247
+ type="text"
248
+ id="prompt"
249
+ class="font-light w-full px-3 py-2 mx-1 resize-none outline-none"
250
+ oninput="this.style.height = 0;this.style.height = this.scrollHeight + 'px'"
251
+ placeholder="Add your prompt here...">
252
+ Write a detailed analogy between mathematics and a lighthouse.
253
+ Answer:</textarea
254
+ >
255
+ <button id="clear-btn">
256
+ <svg
257
+ fill="none"
258
+ xmlns="http://www.w3.org/2000/svg"
259
+ width="40"
260
+ viewBox="0 0 70 40">
261
+ <path opacity=".5" d="M39 .2v40.2" stroke="#1F2937" />
262
+ <path
263
+ d="M1.5 11.5 19 29.1m0-17.6L1.5 29.1"
264
+ opacity=".5"
265
+ stroke="#1F2937"
266
+ stroke-width="2" />
267
+ </svg>
268
+ </button>
269
+ <button
270
+ id="run"
271
+ class="bg-gray-700 hover:bg-gray-800 text-white font-normal py-2 w-16 rounded disabled:bg-gray-300 disabled:cursor-not-allowed">
272
+ Run
273
+ </button>
274
+ </form>
275
+ <div class="grid grid-cols-3 max-w-md items-center gap-3">
276
+ <label class="text-sm font-medium" for="max-seq">Maximum length </label>
277
+ <input
278
+ type="range"
279
+ id="max-seq"
280
+ name="max-seq"
281
+ min="1"
282
+ max="2048"
283
+ step="1"
284
+ value="200"
285
+ oninput="this.nextElementSibling.value = Number(this.value)" />
286
+ <output
287
+ class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md">
288
+ 200</output
289
+ >
290
+ <label class="text-sm font-medium" for="temperature">Temperature</label>
291
+ <input
292
+ type="range"
293
+ id="temperature"
294
+ name="temperature"
295
+ min="0"
296
+ max="2"
297
+ step="0.01"
298
+ value="0.00"
299
+ oninput="this.nextElementSibling.value = Number(this.value).toFixed(2)" />
300
+ <output
301
+ class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md">
302
+ 0.00</output
303
+ >
304
+ <label class="text-sm font-medium" for="top-p">Top-p</label>
305
+ <input
306
+ type="range"
307
+ id="top-p"
308
+ name="top-p"
309
+ min="0"
310
+ max="1"
311
+ step="0.01"
312
+ value="1.00"
313
+ oninput="this.nextElementSibling.value = Number(this.value).toFixed(2)" />
314
+ <output
315
+ class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md">
316
+ 1.00</output
317
+ >
318
+
319
+ <label class="text-sm font-medium" for="repeat_penalty"
320
+ >Repeat Penalty</label
321
+ >
322
+
323
+ <input
324
+ type="range"
325
+ id="repeat_penalty"
326
+ name="repeat_penalty"
327
+ min="1"
328
+ max="2"
329
+ step="0.01"
330
+ value="1.10"
331
+ oninput="this.nextElementSibling.value = Number(this.value).toFixed(2)" />
332
+ <output
333
+ class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md"
334
+ >1.10</output
335
+ >
336
+ <label class="text-sm font-medium" for="seed">Seed</label>
337
+ <input
338
+ type="number"
339
+ id="seed"
340
+ name="seed"
341
+ value="299792458"
342
+ class="font-light border border-gray-700 text-right rounded-md p-2" />
343
+ <button
344
+ id="run"
345
+ onclick="document.querySelector('#seed').value = Math.floor(Math.random() * Number.MAX_SAFE_INTEGER)"
346
+ class="bg-gray-700 hover:bg-gray-800 text-white font-normal py-1 w-[50px] rounded disabled:bg-gray-300 disabled:cursor-not-allowed text-sm">
347
+ Rand
348
+ </button>
349
+ </div>
350
+ <div>
351
+ <h3 class="font-medium">Generation:</h3>
352
+ <div
353
+ class="min-h-[250px] bg-slate-100 text-gray-500 p-4 rounded-md flex flex-col gap-2">
354
+ <div
355
+ id="output-counter"
356
+ hidden
357
+ class="ml-auto font-semibold grid-rows-1 text-sm"></div>
358
+ <p hidden id="output-generation" class="grid-rows-2"></p>
359
+ <span id="output-status" class="m-auto font-light"
360
+ >No output yet</span
361
+ >
362
+ </div>
363
+ </div>
364
+ </main>
365
+ </body>
366
  </html>
phiWorker.js ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import init, { Model } from "./build/m.js";
2
+
3
+ async function fetchArrayBuffer(url) {
4
+ const cacheName = "phi-mixformer-candle-cache";
5
+ const cache = await caches.open(cacheName);
6
+ const cachedResponse = await cache.match(url);
7
+ if (cachedResponse) {
8
+ const data = await cachedResponse.arrayBuffer();
9
+ return new Uint8Array(data);
10
+ }
11
+ const res = await fetch(url, { cache: "force-cache" });
12
+ cache.put(url, res.clone());
13
+ return new Uint8Array(await res.arrayBuffer());
14
+ }
15
+ class Phi {
16
+ static instance = {};
17
+
18
+ static async getInstance(weightsURL, modelID, tokenizerURL, quantized) {
19
+ // load individual modelID only once
20
+ if (!this.instance[modelID]) {
21
+ await init();
22
+
23
+ self.postMessage({ status: "loading", message: "Loading Model" });
24
+
25
+ const [weightsArrayU8, tokenizerArrayU8] = await Promise.all([
26
+ fetchArrayBuffer(weightsURL),
27
+ fetchArrayBuffer(tokenizerURL),
28
+ ]);
29
+
30
+ this.instance[modelID] = new Model(
31
+ weightsArrayU8,
32
+ tokenizerArrayU8,
33
+ quantized
34
+ );
35
+ }
36
+ return this.instance[modelID];
37
+ }
38
+ }
39
+
40
+ let controller = null;
41
+ self.addEventListener("message", (event) => {
42
+ if (event.data.command === "start") {
43
+ controller = new AbortController();
44
+ generate(event.data);
45
+ } else if (event.data.command === "abort") {
46
+ controller.abort();
47
+ }
48
+ });
49
+
50
+ async function generate(data) {
51
+ const {
52
+ weightsURL,
53
+ modelID,
54
+ tokenizerURL,
55
+ quantized,
56
+ prompt,
57
+ temp,
58
+ top_p,
59
+ repeatPenalty,
60
+ seed,
61
+ maxSeqLen,
62
+ } = data;
63
+ try {
64
+ self.postMessage({ status: "loading", message: "Starting Phi" });
65
+ const model = await Phi.getInstance(
66
+ weightsURL,
67
+ modelID,
68
+ tokenizerURL,
69
+ quantized
70
+ );
71
+
72
+ self.postMessage({ status: "loading", message: "Initializing model" });
73
+ model.init_with_prompt(
74
+ prompt,
75
+ temp,
76
+ top_p,
77
+ repeatPenalty,
78
+ 64,
79
+ BigInt(seed)
80
+ );
81
+ const seq_len = 2048;
82
+
83
+ let sentence = "";
84
+ let maxTokens = maxSeqLen ? maxSeqLen : seq_len - prompt.length - 1;
85
+ let startTime = performance.now();
86
+ let tokensCount = 0;
87
+ while (tokensCount < maxTokens) {
88
+ await new Promise(async (resolve) => {
89
+ if (controller && controller.signal.aborted) {
90
+ self.postMessage({
91
+ status: "aborted",
92
+ message: "Aborted",
93
+ output: prompt + sentence,
94
+ });
95
+ return;
96
+ }
97
+ const token = await model.next_token();
98
+ if (token === "<|endoftext|>") {
99
+ self.postMessage({
100
+ status: "complete",
101
+ message: "complete",
102
+ output: prompt + sentence,
103
+ });
104
+ return;
105
+ }
106
+ const tokensSec =
107
+ ((tokensCount + 1) / (performance.now() - startTime)) * 1000;
108
+
109
+ sentence += token;
110
+ self.postMessage({
111
+ status: "generating",
112
+ message: "Generating token",
113
+ token: token,
114
+ sentence: sentence,
115
+ totalTime: performance.now() - startTime,
116
+ tokensSec,
117
+ prompt: prompt,
118
+ });
119
+ setTimeout(resolve, 0);
120
+ });
121
+ tokensCount++;
122
+ }
123
+ self.postMessage({
124
+ status: "complete",
125
+ message: "complete",
126
+ output: prompt + sentence,
127
+ });
128
+ } catch (e) {
129
+ self.postMessage({ error: e });
130
+ }
131
+ }