diff --git "a/demo/ort-phi3/dist/esm/ort.webgpu.js" "b/demo/ort-phi3/dist/esm/ort.webgpu.js" new file mode 100644--- /dev/null +++ "b/demo/ort-phi3/dist/esm/ort.webgpu.js" @@ -0,0 +1,19787 @@ +/*! + * ONNX Runtime Web v1.18.0 + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. + */ +var __defProp = Object.defineProperty; +var __getOwnPropDesc = Object.getOwnPropertyDescriptor; +var __getOwnPropNames = Object.getOwnPropertyNames; +var __hasOwnProp = Object.prototype.hasOwnProperty; +var __esm = (fn, res) => function __init() { + return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res; +}; +var __commonJS = (cb, mod) => function __require() { + return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports; +}; +var __export = (target, all) => { + for (var name in all) + __defProp(target, name, { get: all[name], enumerable: true }); +}; +var __copyProps = (to, from, except, desc) => { + if (from && typeof from === "object" || typeof from === "function") { + for (let key of __getOwnPropNames(from)) + if (!__hasOwnProp.call(to, key) && key !== except) + __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); + } + return to; +}; +var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); + +// common/dist/esm/backend-impl.js +var backends, backendsSortedByPriority, registerBackend, tryResolveAndInitializeBackend, resolveBackendAndExecutionProviders; +var init_backend_impl = __esm({ + "common/dist/esm/backend-impl.js"() { + "use strict"; + backends = /* @__PURE__ */ new Map(); + backendsSortedByPriority = []; + registerBackend = (name, backend, priority) => { + if (backend && typeof backend.init === "function" && typeof backend.createInferenceSessionHandler === "function") { + const currentBackend = backends.get(name); + if (currentBackend === void 0) { + backends.set(name, { backend, priority }); + } else if (currentBackend.priority > priority) { + return; + } else if (currentBackend.priority === priority) { + if (currentBackend.backend !== backend) { + throw new Error(`cannot register backend "${name}" using priority ${priority}`); + } + } + if (priority >= 0) { + const i = backendsSortedByPriority.indexOf(name); + if (i !== -1) { + backendsSortedByPriority.splice(i, 1); + } + for (let i2 = 0; i2 < backendsSortedByPriority.length; i2++) { + if (backends.get(backendsSortedByPriority[i2]).priority <= priority) { + backendsSortedByPriority.splice(i2, 0, name); + return; + } + } + backendsSortedByPriority.push(name); + } + return; + } + throw new TypeError("not a valid backend"); + }; + tryResolveAndInitializeBackend = async (backendName) => { + const backendInfo = backends.get(backendName); + if (!backendInfo) { + return "backend not found."; + } + if (backendInfo.initialized) { + return backendInfo.backend; + } else if (backendInfo.aborted) { + return backendInfo.error; + } else { + const isInitializing = !!backendInfo.initPromise; + try { + if (!isInitializing) { + backendInfo.initPromise = backendInfo.backend.init(backendName); + } + await backendInfo.initPromise; + backendInfo.initialized = true; + return backendInfo.backend; + } catch (e) { + if (!isInitializing) { + backendInfo.error = `${e}`; + backendInfo.aborted = true; + } + return backendInfo.error; + } finally { + delete backendInfo.initPromise; + } + } + }; + resolveBackendAndExecutionProviders = async (options) => { + const eps = options.executionProviders || []; + const backendHints = eps.map((i) => typeof i === "string" ? i : i.name); + const backendNames = backendHints.length === 0 ? backendsSortedByPriority : backendHints; + let backend; + const errors = []; + const availableBackendNames = /* @__PURE__ */ new Set(); + for (const backendName of backendNames) { + const resolveResult = await tryResolveAndInitializeBackend(backendName); + if (typeof resolveResult === "string") { + errors.push({ name: backendName, err: resolveResult }); + } else { + if (!backend) { + backend = resolveResult; + } + if (backend === resolveResult) { + availableBackendNames.add(backendName); + } + } + } + if (!backend) { + throw new Error(`no available backend found. ERR: ${errors.map((e) => `[${e.name}] ${e.err}`).join(", ")}`); + } + for (const { name, err } of errors) { + if (backendHints.includes(name)) { + console.warn(`removing requested execution provider "${name}" from session options because it is not available: ${err}`); + } + } + const filteredEps = eps.filter((i) => availableBackendNames.has(typeof i === "string" ? i : i.name)); + return [ + backend, + new Proxy(options, { + get: (target, prop) => { + if (prop === "executionProviders") { + return filteredEps; + } + return Reflect.get(target, prop); + } + }) + ]; + }; + } +}); + +// common/dist/esm/backend.js +var init_backend = __esm({ + "common/dist/esm/backend.js"() { + "use strict"; + init_backend_impl(); + } +}); + +// common/dist/esm/version.js +var version; +var init_version = __esm({ + "common/dist/esm/version.js"() { + "use strict"; + version = "1.18.0"; + } +}); + +// common/dist/esm/env-impl.js +var logLevelValue, env; +var init_env_impl = __esm({ + "common/dist/esm/env-impl.js"() { + "use strict"; + init_version(); + logLevelValue = "warning"; + env = { + wasm: {}, + webgl: {}, + webgpu: {}, + versions: { common: version }, + set logLevel(value) { + if (value === void 0) { + return; + } + if (typeof value !== "string" || ["verbose", "info", "warning", "error", "fatal"].indexOf(value) === -1) { + throw new Error(`Unsupported logging level: ${value}`); + } + logLevelValue = value; + }, + get logLevel() { + return logLevelValue; + } + }; + Object.defineProperty(env, "logLevel", { enumerable: true }); + } +}); + +// common/dist/esm/env.js +var env2; +var init_env = __esm({ + "common/dist/esm/env.js"() { + "use strict"; + init_env_impl(); + env2 = env; + } +}); + +// common/dist/esm/tensor-conversion-impl.js +var tensorToDataURL, tensorToImageData; +var init_tensor_conversion_impl = __esm({ + "common/dist/esm/tensor-conversion-impl.js"() { + "use strict"; + tensorToDataURL = (tensor, options) => { + const canvas = typeof document !== "undefined" ? document.createElement("canvas") : new OffscreenCanvas(1, 1); + canvas.width = tensor.dims[3]; + canvas.height = tensor.dims[2]; + const pixels2DContext = canvas.getContext("2d"); + if (pixels2DContext != null) { + let width; + let height; + if (options?.tensorLayout !== void 0 && options.tensorLayout === "NHWC") { + width = tensor.dims[2]; + height = tensor.dims[3]; + } else { + width = tensor.dims[3]; + height = tensor.dims[2]; + } + const inputformat = options?.format !== void 0 ? options.format : "RGB"; + const norm = options?.norm; + let normMean; + let normBias; + if (norm === void 0 || norm.mean === void 0) { + normMean = [255, 255, 255, 255]; + } else { + if (typeof norm.mean === "number") { + normMean = [norm.mean, norm.mean, norm.mean, norm.mean]; + } else { + normMean = [norm.mean[0], norm.mean[1], norm.mean[2], 0]; + if (norm.mean[3] !== void 0) { + normMean[3] = norm.mean[3]; + } + } + } + if (norm === void 0 || norm.bias === void 0) { + normBias = [0, 0, 0, 0]; + } else { + if (typeof norm.bias === "number") { + normBias = [norm.bias, norm.bias, norm.bias, norm.bias]; + } else { + normBias = [norm.bias[0], norm.bias[1], norm.bias[2], 0]; + if (norm.bias[3] !== void 0) { + normBias[3] = norm.bias[3]; + } + } + } + const stride = height * width; + let rTensorPointer = 0, gTensorPointer = stride, bTensorPointer = stride * 2, aTensorPointer = -1; + if (inputformat === "RGBA") { + rTensorPointer = 0; + gTensorPointer = stride; + bTensorPointer = stride * 2; + aTensorPointer = stride * 3; + } else if (inputformat === "RGB") { + rTensorPointer = 0; + gTensorPointer = stride; + bTensorPointer = stride * 2; + } else if (inputformat === "RBG") { + rTensorPointer = 0; + bTensorPointer = stride; + gTensorPointer = stride * 2; + } + for (let i = 0; i < height; i++) { + for (let j = 0; j < width; j++) { + const R = (tensor.data[rTensorPointer++] - normBias[0]) * normMean[0]; + const G = (tensor.data[gTensorPointer++] - normBias[1]) * normMean[1]; + const B = (tensor.data[bTensorPointer++] - normBias[2]) * normMean[2]; + const A = aTensorPointer === -1 ? 255 : (tensor.data[aTensorPointer++] - normBias[3]) * normMean[3]; + pixels2DContext.fillStyle = "rgba(" + R + "," + G + "," + B + "," + A + ")"; + pixels2DContext.fillRect(j, i, 1, 1); + } + } + if ("toDataURL" in canvas) { + return canvas.toDataURL(); + } else { + throw new Error("toDataURL is not supported"); + } + } else { + throw new Error("Can not access image data"); + } + }; + tensorToImageData = (tensor, options) => { + const pixels2DContext = typeof document !== "undefined" ? document.createElement("canvas").getContext("2d") : new OffscreenCanvas(1, 1).getContext("2d"); + let image; + if (pixels2DContext != null) { + let width; + let height; + let channels; + if (options?.tensorLayout !== void 0 && options.tensorLayout === "NHWC") { + width = tensor.dims[2]; + height = tensor.dims[1]; + channels = tensor.dims[3]; + } else { + width = tensor.dims[3]; + height = tensor.dims[2]; + channels = tensor.dims[1]; + } + const inputformat = options !== void 0 ? options.format !== void 0 ? options.format : "RGB" : "RGB"; + const norm = options?.norm; + let normMean; + let normBias; + if (norm === void 0 || norm.mean === void 0) { + normMean = [255, 255, 255, 255]; + } else { + if (typeof norm.mean === "number") { + normMean = [norm.mean, norm.mean, norm.mean, norm.mean]; + } else { + normMean = [norm.mean[0], norm.mean[1], norm.mean[2], 255]; + if (norm.mean[3] !== void 0) { + normMean[3] = norm.mean[3]; + } + } + } + if (norm === void 0 || norm.bias === void 0) { + normBias = [0, 0, 0, 0]; + } else { + if (typeof norm.bias === "number") { + normBias = [norm.bias, norm.bias, norm.bias, norm.bias]; + } else { + normBias = [norm.bias[0], norm.bias[1], norm.bias[2], 0]; + if (norm.bias[3] !== void 0) { + normBias[3] = norm.bias[3]; + } + } + } + const stride = height * width; + if (options !== void 0) { + if (options.format !== void 0 && (channels === 4 && options.format !== "RGBA") || channels === 3 && (options.format !== "RGB" && options.format !== "BGR")) { + throw new Error("Tensor format doesn't match input tensor dims"); + } + } + const step = 4; + let rImagePointer = 0, gImagePointer = 1, bImagePointer = 2, aImagePointer = 3; + let rTensorPointer = 0, gTensorPointer = stride, bTensorPointer = stride * 2, aTensorPointer = -1; + if (inputformat === "RGBA") { + rTensorPointer = 0; + gTensorPointer = stride; + bTensorPointer = stride * 2; + aTensorPointer = stride * 3; + } else if (inputformat === "RGB") { + rTensorPointer = 0; + gTensorPointer = stride; + bTensorPointer = stride * 2; + } else if (inputformat === "RBG") { + rTensorPointer = 0; + bTensorPointer = stride; + gTensorPointer = stride * 2; + } + image = pixels2DContext.createImageData(width, height); + for (let i = 0; i < height * width; rImagePointer += step, gImagePointer += step, bImagePointer += step, aImagePointer += step, i++) { + image.data[rImagePointer] = (tensor.data[rTensorPointer++] - normBias[0]) * normMean[0]; + image.data[gImagePointer] = (tensor.data[gTensorPointer++] - normBias[1]) * normMean[1]; + image.data[bImagePointer] = (tensor.data[bTensorPointer++] - normBias[2]) * normMean[2]; + image.data[aImagePointer] = aTensorPointer === -1 ? 255 : (tensor.data[aTensorPointer++] - normBias[3]) * normMean[3]; + } + } else { + throw new Error("Can not access image data"); + } + return image; + }; + } +}); + +// common/dist/esm/tensor-factory-impl.js +var bufferToTensor, tensorFromImage, tensorFromTexture, tensorFromGpuBuffer, tensorFromPinnedBuffer; +var init_tensor_factory_impl = __esm({ + "common/dist/esm/tensor-factory-impl.js"() { + "use strict"; + init_tensor_impl(); + bufferToTensor = (buffer, options) => { + if (buffer === void 0) { + throw new Error("Image buffer must be defined"); + } + if (options.height === void 0 || options.width === void 0) { + throw new Error("Image height and width must be defined"); + } + if (options.tensorLayout === "NHWC") { + throw new Error("NHWC Tensor layout is not supported yet"); + } + const { height, width } = options; + const norm = options.norm ?? { mean: 255, bias: 0 }; + let normMean; + let normBias; + if (typeof norm.mean === "number") { + normMean = [norm.mean, norm.mean, norm.mean, norm.mean]; + } else { + normMean = [norm.mean[0], norm.mean[1], norm.mean[2], norm.mean[3] ?? 255]; + } + if (typeof norm.bias === "number") { + normBias = [norm.bias, norm.bias, norm.bias, norm.bias]; + } else { + normBias = [norm.bias[0], norm.bias[1], norm.bias[2], norm.bias[3] ?? 0]; + } + const inputformat = options.format !== void 0 ? options.format : "RGBA"; + const outputformat = options.tensorFormat !== void 0 ? options.tensorFormat !== void 0 ? options.tensorFormat : "RGB" : "RGB"; + const stride = height * width; + const float32Data = outputformat === "RGBA" ? new Float32Array(stride * 4) : new Float32Array(stride * 3); + let step = 4, rImagePointer = 0, gImagePointer = 1, bImagePointer = 2, aImagePointer = 3; + let rTensorPointer = 0, gTensorPointer = stride, bTensorPointer = stride * 2, aTensorPointer = -1; + if (inputformat === "RGB") { + step = 3; + rImagePointer = 0; + gImagePointer = 1; + bImagePointer = 2; + aImagePointer = -1; + } + if (outputformat === "RGBA") { + aTensorPointer = stride * 3; + } else if (outputformat === "RBG") { + rTensorPointer = 0; + bTensorPointer = stride; + gTensorPointer = stride * 2; + } else if (outputformat === "BGR") { + bTensorPointer = 0; + gTensorPointer = stride; + rTensorPointer = stride * 2; + } + for (let i = 0; i < stride; i++, rImagePointer += step, bImagePointer += step, gImagePointer += step, aImagePointer += step) { + float32Data[rTensorPointer++] = (buffer[rImagePointer] + normBias[0]) / normMean[0]; + float32Data[gTensorPointer++] = (buffer[gImagePointer] + normBias[1]) / normMean[1]; + float32Data[bTensorPointer++] = (buffer[bImagePointer] + normBias[2]) / normMean[2]; + if (aTensorPointer !== -1 && aImagePointer !== -1) { + float32Data[aTensorPointer++] = (buffer[aImagePointer] + normBias[3]) / normMean[3]; + } + } + const outputTensor = outputformat === "RGBA" ? new Tensor("float32", float32Data, [1, 4, height, width]) : new Tensor("float32", float32Data, [1, 3, height, width]); + return outputTensor; + }; + tensorFromImage = async (image, options) => { + const isHTMLImageEle = typeof HTMLImageElement !== "undefined" && image instanceof HTMLImageElement; + const isImageDataEle = typeof ImageData !== "undefined" && image instanceof ImageData; + const isImageBitmap = typeof ImageBitmap !== "undefined" && image instanceof ImageBitmap; + const isString = typeof image === "string"; + let data; + let bufferToTensorOptions = options ?? {}; + const createCanvas = () => { + if (typeof document !== "undefined") { + return document.createElement("canvas"); + } else if (typeof OffscreenCanvas !== "undefined") { + return new OffscreenCanvas(1, 1); + } else { + throw new Error("Canvas is not supported"); + } + }; + const createCanvasContext = (canvas) => { + if (canvas instanceof HTMLCanvasElement) { + return canvas.getContext("2d"); + } else if (canvas instanceof OffscreenCanvas) { + return canvas.getContext("2d"); + } else { + return null; + } + }; + if (isHTMLImageEle) { + const canvas = createCanvas(); + canvas.width = image.width; + canvas.height = image.height; + const pixels2DContext = createCanvasContext(canvas); + if (pixels2DContext != null) { + let height = image.height; + let width = image.width; + if (options !== void 0 && options.resizedHeight !== void 0 && options.resizedWidth !== void 0) { + height = options.resizedHeight; + width = options.resizedWidth; + } + if (options !== void 0) { + bufferToTensorOptions = options; + if (options.tensorFormat !== void 0) { + throw new Error("Image input config format must be RGBA for HTMLImageElement"); + } else { + bufferToTensorOptions.tensorFormat = "RGBA"; + } + bufferToTensorOptions.height = height; + bufferToTensorOptions.width = width; + } else { + bufferToTensorOptions.tensorFormat = "RGBA"; + bufferToTensorOptions.height = height; + bufferToTensorOptions.width = width; + } + pixels2DContext.drawImage(image, 0, 0); + data = pixels2DContext.getImageData(0, 0, width, height).data; + } else { + throw new Error("Can not access image data"); + } + } else if (isImageDataEle) { + let height; + let width; + if (options !== void 0 && options.resizedWidth !== void 0 && options.resizedHeight !== void 0) { + height = options.resizedHeight; + width = options.resizedWidth; + } else { + height = image.height; + width = image.width; + } + if (options !== void 0) { + bufferToTensorOptions = options; + } + bufferToTensorOptions.format = "RGBA"; + bufferToTensorOptions.height = height; + bufferToTensorOptions.width = width; + if (options !== void 0) { + const tempCanvas = createCanvas(); + tempCanvas.width = width; + tempCanvas.height = height; + const pixels2DContext = createCanvasContext(tempCanvas); + if (pixels2DContext != null) { + pixels2DContext.putImageData(image, 0, 0); + data = pixels2DContext.getImageData(0, 0, width, height).data; + } else { + throw new Error("Can not access image data"); + } + } else { + data = image.data; + } + } else if (isImageBitmap) { + if (options === void 0) { + throw new Error("Please provide image config with format for Imagebitmap"); + } + const canvas = createCanvas(); + canvas.width = image.width; + canvas.height = image.height; + const pixels2DContext = createCanvasContext(canvas); + if (pixels2DContext != null) { + const height = image.height; + const width = image.width; + pixels2DContext.drawImage(image, 0, 0, width, height); + data = pixels2DContext.getImageData(0, 0, width, height).data; + bufferToTensorOptions.height = height; + bufferToTensorOptions.width = width; + return bufferToTensor(data, bufferToTensorOptions); + } else { + throw new Error("Can not access image data"); + } + } else if (isString) { + return new Promise((resolve, reject) => { + const canvas = createCanvas(); + const context = createCanvasContext(canvas); + if (!image || !context) { + return reject(); + } + const newImage = new Image(); + newImage.crossOrigin = "Anonymous"; + newImage.src = image; + newImage.onload = () => { + canvas.width = newImage.width; + canvas.height = newImage.height; + context.drawImage(newImage, 0, 0, canvas.width, canvas.height); + const img = context.getImageData(0, 0, canvas.width, canvas.height); + bufferToTensorOptions.height = canvas.height; + bufferToTensorOptions.width = canvas.width; + resolve(bufferToTensor(img.data, bufferToTensorOptions)); + }; + }); + } else { + throw new Error("Input data provided is not supported - aborted tensor creation"); + } + if (data !== void 0) { + return bufferToTensor(data, bufferToTensorOptions); + } else { + throw new Error("Input data provided is not supported - aborted tensor creation"); + } + }; + tensorFromTexture = (texture, options) => { + const { width, height, download, dispose } = options; + const dims = [1, height, width, 4]; + return new Tensor({ location: "texture", type: "float32", texture, dims, download, dispose }); + }; + tensorFromGpuBuffer = (gpuBuffer, options) => { + const { dataType, dims, download, dispose } = options; + return new Tensor({ location: "gpu-buffer", type: dataType ?? "float32", gpuBuffer, dims, download, dispose }); + }; + tensorFromPinnedBuffer = (type, buffer, dims) => new Tensor({ location: "cpu-pinned", type, data: buffer, dims: dims ?? [buffer.length] }); + } +}); + +// common/dist/esm/tensor-impl-type-mapping.js +var NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP, NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP, isTypedArrayChecked, checkTypedArray; +var init_tensor_impl_type_mapping = __esm({ + "common/dist/esm/tensor-impl-type-mapping.js"() { + "use strict"; + NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP = /* @__PURE__ */ new Map([ + ["float32", Float32Array], + ["uint8", Uint8Array], + ["int8", Int8Array], + ["uint16", Uint16Array], + ["int16", Int16Array], + ["int32", Int32Array], + ["bool", Uint8Array], + ["float64", Float64Array], + ["uint32", Uint32Array] + ]); + NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP = /* @__PURE__ */ new Map([ + [Float32Array, "float32"], + [Uint8Array, "uint8"], + [Int8Array, "int8"], + [Uint16Array, "uint16"], + [Int16Array, "int16"], + [Int32Array, "int32"], + [Float64Array, "float64"], + [Uint32Array, "uint32"] + ]); + isTypedArrayChecked = false; + checkTypedArray = () => { + if (!isTypedArrayChecked) { + isTypedArrayChecked = true; + const isBigInt64ArrayAvailable = typeof BigInt64Array !== "undefined" && BigInt64Array.from; + const isBigUint64ArrayAvailable = typeof BigUint64Array !== "undefined" && BigUint64Array.from; + const isFloat16ArrayAvailable = typeof Float16Array !== "undefined" && Float16Array.from; + if (isBigInt64ArrayAvailable) { + NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP.set("int64", BigInt64Array); + NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP.set(BigInt64Array, "int64"); + } + if (isBigUint64ArrayAvailable) { + NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP.set("uint64", BigUint64Array); + NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP.set(BigUint64Array, "uint64"); + } + if (isFloat16ArrayAvailable) { + NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP.set("float16", Float16Array); + NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP.set(Float16Array, "float16"); + } else { + NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP.set("float16", Uint16Array); + } + } + }; + } +}); + +// common/dist/esm/tensor-utils-impl.js +var calculateSize, tensorReshape; +var init_tensor_utils_impl = __esm({ + "common/dist/esm/tensor-utils-impl.js"() { + "use strict"; + init_tensor_impl(); + calculateSize = (dims) => { + let size = 1; + for (let i = 0; i < dims.length; i++) { + const dim = dims[i]; + if (typeof dim !== "number" || !Number.isSafeInteger(dim)) { + throw new TypeError(`dims[${i}] must be an integer, got: ${dim}`); + } + if (dim < 0) { + throw new RangeError(`dims[${i}] must be a non-negative integer, got: ${dim}`); + } + size *= dim; + } + return size; + }; + tensorReshape = (tensor, dims) => { + switch (tensor.location) { + case "cpu": + return new Tensor(tensor.type, tensor.data, dims); + case "cpu-pinned": + return new Tensor({ + location: "cpu-pinned", + data: tensor.data, + type: tensor.type, + dims + }); + case "texture": + return new Tensor({ + location: "texture", + texture: tensor.texture, + type: tensor.type, + dims + }); + case "gpu-buffer": + return new Tensor({ + location: "gpu-buffer", + gpuBuffer: tensor.gpuBuffer, + type: tensor.type, + dims + }); + default: + throw new Error(`tensorReshape: tensor location ${tensor.location} is not supported`); + } + }; + } +}); + +// common/dist/esm/tensor-impl.js +var Tensor; +var init_tensor_impl = __esm({ + "common/dist/esm/tensor-impl.js"() { + "use strict"; + init_tensor_conversion_impl(); + init_tensor_factory_impl(); + init_tensor_impl_type_mapping(); + init_tensor_utils_impl(); + Tensor = class { + /** + * implementation. + */ + constructor(arg0, arg1, arg2) { + checkTypedArray(); + let type; + let dims; + if (typeof arg0 === "object" && "location" in arg0) { + this.dataLocation = arg0.location; + type = arg0.type; + dims = arg0.dims; + switch (arg0.location) { + case "cpu-pinned": { + const expectedTypedArrayConstructor = NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP.get(type); + if (!expectedTypedArrayConstructor) { + throw new TypeError(`unsupported type "${type}" to create tensor from pinned buffer`); + } + if (!(arg0.data instanceof expectedTypedArrayConstructor)) { + throw new TypeError(`buffer should be of type ${expectedTypedArrayConstructor.name}`); + } + this.cpuData = arg0.data; + break; + } + case "texture": { + if (type !== "float32") { + throw new TypeError(`unsupported type "${type}" to create tensor from texture`); + } + this.gpuTextureData = arg0.texture; + this.downloader = arg0.download; + this.disposer = arg0.dispose; + break; + } + case "gpu-buffer": { + if (type !== "float32" && type !== "float16" && type !== "int32" && type !== "int64" && type !== "uint32" && type !== "uint8" && type !== "bool") { + throw new TypeError(`unsupported type "${type}" to create tensor from gpu buffer`); + } + this.gpuBufferData = arg0.gpuBuffer; + this.downloader = arg0.download; + this.disposer = arg0.dispose; + break; + } + default: + throw new Error(`Tensor constructor: unsupported location '${this.dataLocation}'`); + } + } else { + let data; + let maybeDims; + if (typeof arg0 === "string") { + type = arg0; + maybeDims = arg2; + if (arg0 === "string") { + if (!Array.isArray(arg1)) { + throw new TypeError("A string tensor's data must be a string array."); + } + data = arg1; + } else { + const typedArrayConstructor = NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP.get(arg0); + if (typedArrayConstructor === void 0) { + throw new TypeError(`Unsupported tensor type: ${arg0}.`); + } + if (Array.isArray(arg1)) { + if (arg0 === "float16" && typedArrayConstructor === Uint16Array) { + throw new TypeError("Creating a float16 tensor from number array is not supported. Please use Uint16Array as data."); + } else if (arg0 === "uint64" || arg0 === "int64") { + data = typedArrayConstructor.from(arg1, BigInt); + } else { + data = typedArrayConstructor.from(arg1); + } + } else if (arg1 instanceof typedArrayConstructor) { + data = arg1; + } else { + throw new TypeError(`A ${type} tensor's data must be type of ${typedArrayConstructor}`); + } + } + } else { + maybeDims = arg1; + if (Array.isArray(arg0)) { + if (arg0.length === 0) { + throw new TypeError("Tensor type cannot be inferred from an empty array."); + } + const firstElementType = typeof arg0[0]; + if (firstElementType === "string") { + type = "string"; + data = arg0; + } else if (firstElementType === "boolean") { + type = "bool"; + data = Uint8Array.from(arg0); + } else { + throw new TypeError(`Invalid element type of data array: ${firstElementType}.`); + } + } else { + const mappedType = NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP.get(arg0.constructor); + if (mappedType === void 0) { + throw new TypeError(`Unsupported type for tensor data: ${arg0.constructor}.`); + } + type = mappedType; + data = arg0; + } + } + if (maybeDims === void 0) { + maybeDims = [data.length]; + } else if (!Array.isArray(maybeDims)) { + throw new TypeError("A tensor's dims must be a number array"); + } + dims = maybeDims; + this.cpuData = data; + this.dataLocation = "cpu"; + } + const size = calculateSize(dims); + if (this.cpuData && size !== this.cpuData.length) { + throw new Error(`Tensor's size(${size}) does not match data length(${this.cpuData.length}).`); + } + this.type = type; + this.dims = dims; + this.size = size; + } + // #endregion + // #region factory + static async fromImage(image, options) { + return tensorFromImage(image, options); + } + static fromTexture(texture, options) { + return tensorFromTexture(texture, options); + } + static fromGpuBuffer(gpuBuffer, options) { + return tensorFromGpuBuffer(gpuBuffer, options); + } + static fromPinnedBuffer(type, buffer, dims) { + return tensorFromPinnedBuffer(type, buffer, dims); + } + // #endregion + // #region conversions + toDataURL(options) { + return tensorToDataURL(this, options); + } + toImageData(options) { + return tensorToImageData(this, options); + } + // #endregion + // #region properties + get data() { + this.ensureValid(); + if (!this.cpuData) { + throw new Error("The data is not on CPU. Use `getData()` to download GPU data to CPU, or use `texture` or `gpuBuffer` property to access the GPU data directly."); + } + return this.cpuData; + } + get location() { + return this.dataLocation; + } + get texture() { + this.ensureValid(); + if (!this.gpuTextureData) { + throw new Error("The data is not stored as a WebGL texture."); + } + return this.gpuTextureData; + } + get gpuBuffer() { + this.ensureValid(); + if (!this.gpuBufferData) { + throw new Error("The data is not stored as a WebGPU buffer."); + } + return this.gpuBufferData; + } + // #endregion + // #region methods + async getData(releaseData) { + this.ensureValid(); + switch (this.dataLocation) { + case "cpu": + case "cpu-pinned": + return this.data; + case "texture": + case "gpu-buffer": { + if (!this.downloader) { + throw new Error("The current tensor is not created with a specified data downloader."); + } + if (this.isDownloading) { + throw new Error("The current tensor is being downloaded."); + } + try { + this.isDownloading = true; + const data = await this.downloader(); + this.downloader = void 0; + this.dataLocation = "cpu"; + this.cpuData = data; + if (releaseData && this.disposer) { + this.disposer(); + this.disposer = void 0; + } + return data; + } finally { + this.isDownloading = false; + } + } + default: + throw new Error(`cannot get data from location: ${this.dataLocation}`); + } + } + dispose() { + if (this.isDownloading) { + throw new Error("The current tensor is being downloaded."); + } + if (this.disposer) { + this.disposer(); + this.disposer = void 0; + } + this.cpuData = void 0; + this.gpuTextureData = void 0; + this.gpuBufferData = void 0; + this.downloader = void 0; + this.isDownloading = void 0; + this.dataLocation = "none"; + } + // #endregion + // #region tensor utilities + ensureValid() { + if (this.dataLocation === "none") { + throw new Error("The tensor is disposed."); + } + } + reshape(dims) { + this.ensureValid(); + if (this.downloader || this.disposer) { + throw new Error("Cannot reshape a tensor that owns GPU resource."); + } + return tensorReshape(this, dims); + } + }; + } +}); + +// common/dist/esm/tensor.js +var Tensor2; +var init_tensor = __esm({ + "common/dist/esm/tensor.js"() { + "use strict"; + init_tensor_impl(); + Tensor2 = Tensor; + } +}); + +// common/dist/esm/trace.js +var TRACE, TRACE_FUNC, TRACE_FUNC_BEGIN, TRACE_FUNC_END; +var init_trace = __esm({ + "common/dist/esm/trace.js"() { + "use strict"; + init_env_impl(); + TRACE = (deviceType, label) => { + if (typeof env.trace === "undefined" ? !env.wasm.trace : !env.trace) { + return; + } + console.timeStamp(`${deviceType}::ORT::${label}`); + }; + TRACE_FUNC = (msg, extraMsg) => { + const stack = new Error().stack?.split(/\r\n|\r|\n/g) || []; + let hasTraceFunc = false; + for (let i = 0; i < stack.length; i++) { + if (hasTraceFunc && !stack[i].includes("TRACE_FUNC")) { + let label = `FUNC_${msg}::${stack[i].trim().split(" ")[1]}`; + if (extraMsg) { + label += `::${extraMsg}`; + } + TRACE("CPU", label); + return; + } + if (stack[i].includes("TRACE_FUNC")) { + hasTraceFunc = true; + } + } + }; + TRACE_FUNC_BEGIN = (extraMsg) => { + if (typeof env.trace === "undefined" ? !env.wasm.trace : !env.trace) { + return; + } + TRACE_FUNC("BEGIN", extraMsg); + }; + TRACE_FUNC_END = (extraMsg) => { + if (typeof env.trace === "undefined" ? !env.wasm.trace : !env.trace) { + return; + } + TRACE_FUNC("END", extraMsg); + }; + } +}); + +// common/dist/esm/inference-session-impl.js +var InferenceSession; +var init_inference_session_impl = __esm({ + "common/dist/esm/inference-session-impl.js"() { + "use strict"; + init_backend_impl(); + init_tensor(); + init_trace(); + InferenceSession = class _InferenceSession { + constructor(handler) { + this.handler = handler; + } + async run(feeds, arg1, arg2) { + TRACE_FUNC_BEGIN(); + const fetches = {}; + let options = {}; + if (typeof feeds !== "object" || feeds === null || feeds instanceof Tensor2 || Array.isArray(feeds)) { + throw new TypeError("'feeds' must be an object that use input names as keys and OnnxValue as corresponding values."); + } + let isFetchesEmpty = true; + if (typeof arg1 === "object") { + if (arg1 === null) { + throw new TypeError("Unexpected argument[1]: cannot be null."); + } + if (arg1 instanceof Tensor2) { + throw new TypeError("'fetches' cannot be a Tensor"); + } + if (Array.isArray(arg1)) { + if (arg1.length === 0) { + throw new TypeError("'fetches' cannot be an empty array."); + } + isFetchesEmpty = false; + for (const name of arg1) { + if (typeof name !== "string") { + throw new TypeError("'fetches' must be a string array or an object."); + } + if (this.outputNames.indexOf(name) === -1) { + throw new RangeError(`'fetches' contains invalid output name: ${name}.`); + } + fetches[name] = null; + } + if (typeof arg2 === "object" && arg2 !== null) { + options = arg2; + } else if (typeof arg2 !== "undefined") { + throw new TypeError("'options' must be an object."); + } + } else { + let isFetches = false; + const arg1Keys = Object.getOwnPropertyNames(arg1); + for (const name of this.outputNames) { + if (arg1Keys.indexOf(name) !== -1) { + const v = arg1[name]; + if (v === null || v instanceof Tensor2) { + isFetches = true; + isFetchesEmpty = false; + fetches[name] = v; + } + } + } + if (isFetches) { + if (typeof arg2 === "object" && arg2 !== null) { + options = arg2; + } else if (typeof arg2 !== "undefined") { + throw new TypeError("'options' must be an object."); + } + } else { + options = arg1; + } + } + } else if (typeof arg1 !== "undefined") { + throw new TypeError("Unexpected argument[1]: must be 'fetches' or 'options'."); + } + for (const name of this.inputNames) { + if (typeof feeds[name] === "undefined") { + throw new Error(`input '${name}' is missing in 'feeds'.`); + } + } + if (isFetchesEmpty) { + for (const name of this.outputNames) { + fetches[name] = null; + } + } + const results = await this.handler.run(feeds, fetches, options); + const returnValue = {}; + for (const key in results) { + if (Object.hasOwnProperty.call(results, key)) { + const result = results[key]; + if (result instanceof Tensor2) { + returnValue[key] = result; + } else { + returnValue[key] = new Tensor2(result.type, result.data, result.dims); + } + } + } + TRACE_FUNC_END(); + return returnValue; + } + async release() { + return this.handler.dispose(); + } + static async create(arg0, arg1, arg2, arg3) { + TRACE_FUNC_BEGIN(); + let filePathOrUint8Array; + let options = {}; + if (typeof arg0 === "string") { + filePathOrUint8Array = arg0; + if (typeof arg1 === "object" && arg1 !== null) { + options = arg1; + } else if (typeof arg1 !== "undefined") { + throw new TypeError("'options' must be an object."); + } + } else if (arg0 instanceof Uint8Array) { + filePathOrUint8Array = arg0; + if (typeof arg1 === "object" && arg1 !== null) { + options = arg1; + } else if (typeof arg1 !== "undefined") { + throw new TypeError("'options' must be an object."); + } + } else if (arg0 instanceof ArrayBuffer || typeof SharedArrayBuffer !== "undefined" && arg0 instanceof SharedArrayBuffer) { + const buffer = arg0; + let byteOffset = 0; + let byteLength = arg0.byteLength; + if (typeof arg1 === "object" && arg1 !== null) { + options = arg1; + } else if (typeof arg1 === "number") { + byteOffset = arg1; + if (!Number.isSafeInteger(byteOffset)) { + throw new RangeError("'byteOffset' must be an integer."); + } + if (byteOffset < 0 || byteOffset >= buffer.byteLength) { + throw new RangeError(`'byteOffset' is out of range [0, ${buffer.byteLength}).`); + } + byteLength = arg0.byteLength - byteOffset; + if (typeof arg2 === "number") { + byteLength = arg2; + if (!Number.isSafeInteger(byteLength)) { + throw new RangeError("'byteLength' must be an integer."); + } + if (byteLength <= 0 || byteOffset + byteLength > buffer.byteLength) { + throw new RangeError(`'byteLength' is out of range (0, ${buffer.byteLength - byteOffset}].`); + } + if (typeof arg3 === "object" && arg3 !== null) { + options = arg3; + } else if (typeof arg3 !== "undefined") { + throw new TypeError("'options' must be an object."); + } + } else if (typeof arg2 !== "undefined") { + throw new TypeError("'byteLength' must be a number."); + } + } else if (typeof arg1 !== "undefined") { + throw new TypeError("'options' must be an object."); + } + filePathOrUint8Array = new Uint8Array(buffer, byteOffset, byteLength); + } else { + throw new TypeError("Unexpected argument[0]: must be 'path' or 'buffer'."); + } + const [backend, optionsWithValidatedEPs] = await resolveBackendAndExecutionProviders(options); + const handler = await backend.createInferenceSessionHandler(filePathOrUint8Array, optionsWithValidatedEPs); + TRACE_FUNC_END(); + return new _InferenceSession(handler); + } + startProfiling() { + this.handler.startProfiling(); + } + endProfiling() { + this.handler.endProfiling(); + } + get inputNames() { + return this.handler.inputNames; + } + get outputNames() { + return this.handler.outputNames; + } + }; + } +}); + +// common/dist/esm/inference-session.js +var InferenceSession2; +var init_inference_session = __esm({ + "common/dist/esm/inference-session.js"() { + "use strict"; + init_inference_session_impl(); + InferenceSession2 = InferenceSession; + } +}); + +// common/dist/esm/tensor-conversion.js +var init_tensor_conversion = __esm({ + "common/dist/esm/tensor-conversion.js"() { + "use strict"; + } +}); + +// common/dist/esm/tensor-factory.js +var init_tensor_factory = __esm({ + "common/dist/esm/tensor-factory.js"() { + "use strict"; + } +}); + +// common/dist/esm/onnx-model.js +var init_onnx_model = __esm({ + "common/dist/esm/onnx-model.js"() { + "use strict"; + } +}); + +// common/dist/esm/onnx-value.js +var init_onnx_value = __esm({ + "common/dist/esm/onnx-value.js"() { + "use strict"; + } +}); + +// common/dist/esm/training-session-impl.js +var noBackendErrMsg, TrainingSession; +var init_training_session_impl = __esm({ + "common/dist/esm/training-session-impl.js"() { + "use strict"; + init_backend_impl(); + init_tensor(); + noBackendErrMsg = "Training backend could not be resolved. Make sure you're using the correct configuration & WebAssembly files."; + TrainingSession = class _TrainingSession { + constructor(handler, hasOptimizerModel, hasEvalModel) { + this.handler = handler; + this.hasOptimizerModel = hasOptimizerModel; + this.hasEvalModel = hasEvalModel; + } + get trainingInputNames() { + return this.handler.inputNames; + } + get trainingOutputNames() { + return this.handler.outputNames; + } + get evalInputNames() { + if (this.hasEvalModel) { + return this.handler.evalInputNames; + } else { + throw new Error("This training session has no evalModel loaded."); + } + } + get evalOutputNames() { + if (this.hasEvalModel) { + return this.handler.evalOutputNames; + } else { + throw new Error("This training session has no evalModel loaded."); + } + } + static async create(trainingOptions, sessionOptions) { + const evalModel = trainingOptions.evalModel || ""; + const optimizerModel = trainingOptions.optimizerModel || ""; + const options = sessionOptions || {}; + const [backend, optionsWithValidatedEPs] = await resolveBackendAndExecutionProviders(options); + if (backend.createTrainingSessionHandler) { + const handler = await backend.createTrainingSessionHandler(trainingOptions.checkpointState, trainingOptions.trainModel, evalModel, optimizerModel, optionsWithValidatedEPs); + return new _TrainingSession(handler, !!trainingOptions.optimizerModel, !!trainingOptions.evalModel); + } else { + throw new Error(noBackendErrMsg); + } + } + /** + * Helper function for runTrainStep and future runStep methods that handles the type-narrowing conversion from + * the given parameters to SessionHandler.FetchesType and RunOptions. + * + * @param inputNames the feeds object is checked that they contain all input names in the provided list of input + * names. + * @param outputNames the fetches object is checked that their keys match up with valid names in the list of output + * names. + * @param feeds the required input + * @param arg1 narrowed & converted into the SessionHandler.FetchesType or RunOptions object + * @param arg2 optional RunOptions object. + * @returns + */ + typeNarrowingForRunStep(inputNames, outputNames, feeds, arg1, arg2) { + const fetches = {}; + let options = {}; + if (typeof feeds !== "object" || feeds === null || feeds instanceof Tensor2 || Array.isArray(feeds)) { + throw new TypeError("'feeds' must be an object that use input names as keys and OnnxValue as corresponding values."); + } + let isFetchesEmpty = true; + if (typeof arg1 === "object") { + if (arg1 === null) { + throw new TypeError("Unexpected argument[1]: cannot be null."); + } + if (arg1 instanceof Tensor2) { + throw new TypeError("'fetches' cannot be a Tensor"); + } + if (Array.isArray(arg1)) { + if (arg1.length === 0) { + throw new TypeError("'fetches' cannot be an empty array."); + } + isFetchesEmpty = false; + for (const name of arg1) { + if (typeof name !== "string") { + throw new TypeError("'fetches' must be a string array or an object."); + } + if (outputNames.indexOf(name) === -1) { + throw new RangeError(`'fetches' contains invalid output name: ${name}.`); + } + fetches[name] = null; + } + if (typeof arg2 === "object" && arg2 !== null) { + options = arg2; + } else if (typeof arg2 !== "undefined") { + throw new TypeError("'options' must be an object."); + } + } else { + let isFetches = false; + const arg1Keys = Object.getOwnPropertyNames(arg1); + for (const name of outputNames) { + if (arg1Keys.indexOf(name) !== -1) { + const v = arg1[name]; + if (v === null || v instanceof Tensor2) { + isFetches = true; + isFetchesEmpty = false; + fetches[name] = v; + } + } + } + if (isFetches) { + if (typeof arg2 === "object" && arg2 !== null) { + options = arg2; + } else if (typeof arg2 !== "undefined") { + throw new TypeError("'options' must be an object."); + } + } else { + options = arg1; + } + } + } else if (typeof arg1 !== "undefined") { + throw new TypeError("Unexpected argument[1]: must be 'fetches' or 'options'."); + } + for (const name of inputNames) { + if (typeof feeds[name] === "undefined") { + throw new Error(`input '${name}' is missing in 'feeds'.`); + } + } + if (isFetchesEmpty) { + for (const name of outputNames) { + fetches[name] = null; + } + } + return [fetches, options]; + } + /** + * Helper method for runTrainStep and any other runStep methods. Takes the ReturnType result from the SessionHandler + * and changes it into a map of Tensors. + * + * @param results + * @returns + */ + convertHandlerReturnTypeToMapOfTensors(results) { + const returnValue = {}; + for (const key in results) { + if (Object.hasOwnProperty.call(results, key)) { + const result = results[key]; + if (result instanceof Tensor2) { + returnValue[key] = result; + } else { + returnValue[key] = new Tensor2(result.type, result.data, result.dims); + } + } + } + return returnValue; + } + async lazyResetGrad() { + await this.handler.lazyResetGrad(); + } + async runTrainStep(feeds, arg1, arg2) { + const [fetches, options] = this.typeNarrowingForRunStep(this.trainingInputNames, this.trainingOutputNames, feeds, arg1, arg2); + const results = await this.handler.runTrainStep(feeds, fetches, options); + return this.convertHandlerReturnTypeToMapOfTensors(results); + } + async runOptimizerStep(options) { + if (this.hasOptimizerModel) { + await this.handler.runOptimizerStep(options || {}); + } else { + throw new Error("This TrainingSession has no OptimizerModel loaded."); + } + } + async runEvalStep(feeds, arg1, arg2) { + if (this.hasEvalModel) { + const [fetches, options] = this.typeNarrowingForRunStep(this.evalInputNames, this.evalOutputNames, feeds, arg1, arg2); + const results = await this.handler.runEvalStep(feeds, fetches, options); + return this.convertHandlerReturnTypeToMapOfTensors(results); + } else { + throw new Error("This TrainingSession has no EvalModel loaded."); + } + } + async getParametersSize(trainableOnly = true) { + return this.handler.getParametersSize(trainableOnly); + } + async loadParametersBuffer(array, trainableOnly = true) { + const paramsSize = await this.getParametersSize(trainableOnly); + if (array.length !== 4 * paramsSize) { + throw new Error("Size of the buffer passed into loadParametersBuffer must match the number of parameters in the model. Please use getParametersSize method to check."); + } + return this.handler.loadParametersBuffer(array, trainableOnly); + } + async getContiguousParameters(trainableOnly = true) { + return this.handler.getContiguousParameters(trainableOnly); + } + async release() { + return this.handler.dispose(); + } + }; + } +}); + +// common/dist/esm/training-session.js +var TrainingSession2; +var init_training_session = __esm({ + "common/dist/esm/training-session.js"() { + "use strict"; + init_training_session_impl(); + TrainingSession2 = TrainingSession; + } +}); + +// common/dist/esm/index.js +var esm_exports = {}; +__export(esm_exports, { + InferenceSession: () => InferenceSession2, + TRACE: () => TRACE, + TRACE_FUNC_BEGIN: () => TRACE_FUNC_BEGIN, + TRACE_FUNC_END: () => TRACE_FUNC_END, + Tensor: () => Tensor2, + TrainingSession: () => TrainingSession2, + env: () => env2, + registerBackend: () => registerBackend +}); +var init_esm = __esm({ + "common/dist/esm/index.js"() { + "use strict"; + init_backend(); + init_env(); + init_inference_session(); + init_tensor(); + init_tensor_conversion(); + init_tensor_factory(); + init_trace(); + init_onnx_model(); + init_onnx_value(); + init_training_session(); + } +}); + +// nodejs-ignore:node:os +var cpus; +var init_node_os = __esm({ + "nodejs-ignore:node:os"() { + cpus = void 0; + } +}); + +// nodejs-ignore:node:path +var join; +var init_node_path = __esm({ + "nodejs-ignore:node:path"() { + join = void 0; + } +}); + +// nodejs-ignore:fs +var fs_exports = {}; +__export(fs_exports, { + createReadStream: () => createReadStream, + readFile: () => readFile, + readFileSync: () => readFileSync +}); +var readFile, readFileSync, createReadStream; +var init_fs = __esm({ + "nodejs-ignore:fs"() { + readFile = void 0; + readFileSync = void 0; + createReadStream = void 0; + } +}); + +// nodejs-ignore:path +var path_exports = {}; +__export(path_exports, { + join: () => join2 +}); +var join2; +var init_path = __esm({ + "nodejs-ignore:path"() { + join2 = void 0; + } +}); + +// web/lib/wasm/binding/ort-wasm-simd.jsep.js +var require_ort_wasm_simd_jsep = __commonJS({ + "web/lib/wasm/binding/ort-wasm-simd.jsep.js"(exports, module) { + "use strict"; + var ortWasm = (() => { + var _scriptDir = typeof document != "undefined" ? document.currentScript?.src : void 0; + if (typeof __filename != "undefined") + _scriptDir ||= __filename; + return function(moduleArg = {}) { + var g = moduleArg, aa, ba, readyPromise = new Promise((a, b) => { + aa = a; + ba = b; + }); + "use strict"; + g.mountExternalData = (a, b) => { + (g.Ph || (g.Ph = /* @__PURE__ */ new Map())).set(a, b); + }; + g.unmountExternalData = () => { + delete g.Ph; + }; + let da = () => { + const a = (c, d, e) => (...f) => { + const h = t, k = d?.(); + f = c(...f); + const l = d?.(); + k !== l && (c = l, e(k), d = e = null); + return t != h ? ca() : f; + }, b = (c) => async (...d) => { + try { + if (g.Oh) + throw Error("Session already started"); + const e = g.Oh = { gi: d[0], errors: [] }, f = await c(...d); + if (g.Oh !== e) + throw Error("Session mismatch"); + g.Uh?.flush(); + const h = e.errors; + if (0 < h.length) { + let k = await Promise.all(h); + k = k.filter((l) => l); + if (0 < k.length) + throw Error(k.join("\n")); + } + return f; + } finally { + g.Oh = null; + } + }; + g._OrtCreateSession = a( + g._OrtCreateSession, + () => g._OrtCreateSession, + (c) => g._OrtCreateSession = c + ); + g._OrtRun = b(a(g._OrtRun, () => g._OrtRun, (c) => g._OrtRun = c)); + g._OrtRunWithBinding = b(a(g._OrtRunWithBinding, () => g._OrtRunWithBinding, (c) => g._OrtRunWithBinding = c)); + g._OrtBindInput = a(g._OrtBindInput, () => g._OrtBindInput, (c) => g._OrtBindInput = c); + da = void 0; + }; + g.jsepInit = (a, b) => { + da?.(); + if ("webgpu" === a) { + [g.Uh, g.Zh, g.ci, g.Vh, g.bi, g.je, g.di, g.fi, g.$h, g.ai, g.ei] = b; + const c = g.Uh; + g.jsepRegisterBuffer = (d, e, f, h) => c.registerBuffer(d, e, f, h); + g.jsepGetBuffer = (d) => c.getBuffer(d); + g.jsepCreateDownloader = (d, e, f) => c.createDownloader(d, e, f); + g.jsepOnReleaseSession = (d) => { + c.onReleaseSession(d); + }; + g.jsepOnRunStart = (d) => c.onRunStart(d); + } + }; + var ea = Object.assign({}, g), fa = "./this.program", ha = (a, b) => { + throw b; + }, ia = "object" == typeof window, ja = "function" == typeof importScripts, ka = "object" == typeof process && "object" == typeof process.versions && "string" == typeof process.versions.node, v = "", la, ma, na; + if (ka) { + var fs = (init_fs(), __toCommonJS(fs_exports)), oa = (init_path(), __toCommonJS(path_exports)); + v = ja ? oa.dirname(v) + "/" : __dirname + "/"; + la = (a, b) => { + a = pa(a) ? new URL(a) : oa.normalize(a); + return fs.readFileSync(a, b ? void 0 : "utf8"); + }; + na = (a) => { + a = la(a, true); + a.buffer || (a = new Uint8Array(a)); + return a; + }; + ma = (a, b, c, d = true) => { + a = pa(a) ? new URL(a) : oa.normalize(a); + fs.readFile(a, d ? void 0 : "utf8", (e, f) => { + e ? c(e) : b(d ? f.buffer : f); + }); + }; + !g.thisProgram && 1 < process.argv.length && (fa = process.argv[1].replace(/\\/g, "/")); + process.argv.slice(2); + ha = (a, b) => { + process.exitCode = a; + throw b; + }; + } else if (ia || ja) + ja ? v = self.location.href : "undefined" != typeof document && document.currentScript && (v = document.currentScript.src), _scriptDir && (v = _scriptDir), v.startsWith("blob:") ? v = "" : v = v.substr(0, v.replace(/[?#].*/, "").lastIndexOf("/") + 1), la = (a) => { + var b = new XMLHttpRequest(); + b.open("GET", a, false); + b.send(null); + return b.responseText; + }, ja && (na = (a) => { + var b = new XMLHttpRequest(); + b.open("GET", a, false); + b.responseType = "arraybuffer"; + b.send(null); + return new Uint8Array(b.response); + }), ma = (a, b, c) => { + var d = new XMLHttpRequest(); + d.open("GET", a, true); + d.responseType = "arraybuffer"; + d.onload = () => { + 200 == d.status || 0 == d.status && d.response ? b(d.response) : c(); + }; + d.onerror = c; + d.send(null); + }; + var qa = console.log.bind(console), w = console.error.bind(console); + Object.assign(g, ea); + ea = null; + var ra, x = false, sa, z, E, ta, ua, G, I, va, wa, xa, ya; + function za() { + var a = ra.buffer; + g.HEAP8 = z = new Int8Array(a); + g.HEAP16 = ta = new Int16Array(a); + g.HEAPU8 = E = new Uint8Array(a); + g.HEAPU16 = ua = new Uint16Array(a); + g.HEAP32 = G = new Int32Array(a); + g.HEAPU32 = I = new Uint32Array(a); + g.HEAPF32 = va = new Float32Array(a); + g.HEAPF64 = ya = new Float64Array(a); + g.HEAP64 = wa = new BigInt64Array(a); + g.HEAPU64 = xa = new BigUint64Array(a); + } + var Aa = [], Ba = [], Ca = [], Da = 0, Ea = null, Fa = null; + function Ga(a) { + a = "Aborted(" + a + ")"; + w(a); + x = true; + sa = 1; + a = new WebAssembly.RuntimeError(a + ". Build with -sASSERTIONS for more info."); + ba(a); + throw a; + } + var Ha = (a) => a.startsWith("data:application/octet-stream;base64,"), pa = (a) => a.startsWith("file://"), Ia; + Ia = "ort-wasm-simd.wasm"; + if (!Ha(Ia)) { + var Ja = Ia; + Ia = g.locateFile ? g.locateFile(Ja, v) : v + Ja; + } + function Ka(a) { + if (na) + return na(a); + throw "both async and sync fetching of the wasm failed"; + } + function La(a) { + if (ia || ja) { + if ("function" == typeof fetch && !pa(a)) + return fetch(a, { credentials: "same-origin" }).then((b) => { + if (!b.ok) + throw `failed to load wasm binary file at '${a}'`; + return b.arrayBuffer(); + }).catch(() => Ka(a)); + if (ma) + return new Promise((b, c) => { + ma(a, (d) => b(new Uint8Array(d)), c); + }); + } + return Promise.resolve().then(() => Ka(a)); + } + function Ma(a, b, c) { + return La(a).then((d) => WebAssembly.instantiate(d, b)).then(c, (d) => { + w(`failed to asynchronously prepare wasm: ${d}`); + Ga(d); + }); + } + function Na(a, b) { + var c = Ia; + return "function" != typeof WebAssembly.instantiateStreaming || Ha(c) || pa(c) || ka || "function" != typeof fetch ? Ma(c, a, b) : fetch(c, { credentials: "same-origin" }).then((d) => WebAssembly.instantiateStreaming(d, a).then(b, function(e) { + w(`wasm streaming compile failed: ${e}`); + w("falling back to ArrayBuffer instantiation"); + return Ma(c, a, b); + })); + } + var Oa = { + 1261504: (a, b, c, d) => { + if ("undefined" == typeof g || !g.Ph) + return 1; + a = J(a >>> 0); + a.startsWith("./") && (a = a.substring(2)); + a = g.Ph.get(a); + if (!a) + return 2; + b >>>= 0; + c >>>= 0; + if (b + c > a.byteLength) + return 3; + try { + return E.set(a.subarray(b, b + c), d >>> 0 >>> 0), 0; + } catch { + return 4; + } + }, + 1262005: (a, b, c) => { + c = J(c); + const d = new Uint8Array(b); + d.set(E.subarray(a >>> 0, a + b >>> 0)); + "object" == typeof process && "object" == typeof process.versions && "string" == typeof process.versions.node ? (init_fs(), __toCommonJS(fs_exports)).writeFileSync(c, d) : (a = new File([d], c, { type: "application/octet-stream" }), a = URL.createObjectURL(a), window.open(a, "_blank")); + }, + 1262513: () => { + g.$h(); + }, + 1262544: () => { + g.ai(); + }, + 1262573: () => { + g.ei(); + }, + 1262598: (a) => g.Zh(a), + 1262631: (a) => g.ci(a), + 1262663: (a, b, c) => { + g.Vh(a, b, c, true); + }, + 1262702: (a, b, c) => { + g.Vh(a, b, c); + }, + 1262735: (a) => { + g.je("Abs", a, void 0); + }, + 1262786: (a) => { + g.je("Neg", a, void 0); + }, + 1262837: (a) => { + g.je("Floor", a, void 0); + }, + 1262890: (a) => { + g.je("Ceil", a, void 0); + }, + 1262942: (a) => { + g.je("Reciprocal", a, void 0); + }, + 1263e3: (a) => { + g.je("Sqrt", a, void 0); + }, + 1263052: (a) => { + g.je("Exp", a, void 0); + }, + 1263103: (a) => { + g.je("Erf", a, void 0); + }, + 1263154: (a) => { + g.je("Sigmoid", a, void 0); + }, + 1263209: (a, b, c) => { + g.je("HardSigmoid", a, { alpha: b, beta: c }); + }, + 1263288: (a) => { + g.je("Log", a, void 0); + }, + 1263339: (a) => { + g.je("Sin", a, void 0); + }, + 1263390: (a) => { + g.je("Cos", a, void 0); + }, + 1263441: (a) => { + g.je("Tan", a, void 0); + }, + 1263492: (a) => { + g.je("Asin", a, void 0); + }, + 1263544: (a) => { + g.je("Acos", a, void 0); + }, + 1263596: (a) => { + g.je("Atan", a, void 0); + }, + 1263648: (a) => { + g.je("Sinh", a, void 0); + }, + 1263700: (a) => { + g.je("Cosh", a, void 0); + }, + 1263752: (a) => { + g.je("Asinh", a, void 0); + }, + 1263805: (a) => { + g.je("Acosh", a, void 0); + }, + 1263858: (a) => { + g.je("Atanh", a, void 0); + }, + 1263911: (a) => { + g.je("Tanh", a, void 0); + }, + 1263963: (a) => { + g.je("Not", a, void 0); + }, + 1264014: (a, b, c) => { + g.je("Clip", a, { min: b, max: c }); + }, + 1264083: (a) => { + g.je("Clip", a, void 0); + }, + 1264135: (a, b) => { + g.je("Elu", a, { alpha: b }); + }, + 1264193: (a) => { + g.je("Relu", a, void 0); + }, + 1264245: (a, b) => { + g.je("LeakyRelu", a, { alpha: b }); + }, + 1264309: (a, b) => { + g.je("ThresholdedRelu", a, { alpha: b }); + }, + 1264379: (a, b) => { + g.je("Cast", a, { to: b }); + }, + 1264437: (a) => { + g.je("Add", a, void 0); + }, + 1264488: (a) => { + g.je("Sub", a, void 0); + }, + 1264539: (a) => { + g.je("Mul", a, void 0); + }, + 1264590: (a) => { + g.je("Div", a, void 0); + }, + 1264641: (a) => { + g.je("Pow", a, void 0); + }, + 1264692: (a) => { + g.je("Equal", a, void 0); + }, + 1264745: (a) => { + g.je("Greater", a, void 0); + }, + 1264800: (a) => { + g.je("GreaterOrEqual", a, void 0); + }, + 1264862: (a) => { + g.je("Less", a, void 0); + }, + 1264914: (a) => { + g.je("LessOrEqual", a, void 0); + }, + 1264973: (a, b, c, d, e) => { + g.je("ReduceMean", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: d ? Array.from(G.subarray(d >>> 0, e >>> 0)) : [] }); + }, + 1265132: (a, b, c, d, e) => { + g.je("ReduceMax", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: d ? Array.from(G.subarray(d >>> 0, e >>> 0)) : [] }); + }, + 1265290: (a, b, c, d, e) => { + g.je("ReduceMin", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: d ? Array.from(G.subarray(d >>> 0, e >>> 0)) : [] }); + }, + 1265448: (a, b, c, d, e) => { + g.je("ReduceProd", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: d ? Array.from(G.subarray(d >>> 0, e >>> 0)) : [] }); + }, + 1265607: (a, b, c, d, e) => { + g.je("ReduceSum", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: d ? Array.from(G.subarray(d >>> 0, e >>> 0)) : [] }); + }, + 1265765: (a, b, c, d, e) => { + g.je("ReduceL1", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: d ? Array.from(G.subarray(d >>> 0, e >>> 0)) : [] }); + }, + 1265922: (a, b, c, d, e) => { + g.je("ReduceL2", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: d ? Array.from(G.subarray(d >>> 0, e >>> 0)) : [] }); + }, + 1266079: (a, b, c, d, e) => { + g.je("ReduceLogSum", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: d ? Array.from(G.subarray(d >>> 0, e >>> 0)) : [] }); + }, + 1266240: (a, b, c, d, e) => { + g.je("ReduceSumSquare", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: d ? Array.from(G.subarray(d >>> 0, e >>> 0)) : [] }); + }, + 1266404: (a, b, c, d, e) => { + g.je("ReduceLogSumExp", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: d ? Array.from(G.subarray(d >>> 0, e >>> 0)) : [] }); + }, + 1266568: (a) => { + g.je("Where", a, void 0); + }, + 1266621: (a, b, c) => { + g.je("Transpose", a, { perm: b ? Array.from(G.subarray(b >>> 0, c >>> 0)) : [] }); + }, + 1266729: (a, b, c, d) => { + g.je("DepthToSpace", a, { blocksize: b, mode: J(c), format: d ? "NHWC" : "NCHW" }); + }, + 1266862: (a, b, c, d) => { + g.je("DepthToSpace", a, { blocksize: b, mode: J(c), format: d ? "NHWC" : "NCHW" }); + }, + 1266995: (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u) => { + g.je("ConvTranspose", a, { format: l ? "NHWC" : "NCHW", autoPad: b, dilations: [c], group: d, kernelShape: [e], pads: [f, h], strides: [k], wIsConst: () => !!z[m >>> 0], outputPadding: n ? Array.from(G.subarray(n >>> 0, q >>> 0)) : [], outputShape: r ? Array.from(G.subarray(r >>> 0, p >>> 0)) : [], activation: J(u) }); + }, + 1267396: (a, b, c, d, e, f, h, k, l, m, n, q, r, p) => { + g.je("ConvTranspose", a, { format: k ? "NHWC" : "NCHW", autoPad: b, dilations: Array.from(G.subarray(c >>> 0, (c >>> 0) + 2 >>> 0)), group: d, kernelShape: Array.from(G.subarray(e >>> 0, (e >>> 0) + 2 >>> 0)), pads: Array.from(G.subarray(f >>> 0, (f >>> 0) + 4 >>> 0)), strides: Array.from(G.subarray(h >>> 0, (h >>> 0) + 2 >>> 0)), wIsConst: () => !!z[l >>> 0], outputPadding: m ? Array.from(G.subarray(m >>> 0, n >>> 0)) : [], outputShape: q ? Array.from(G.subarray(q >>> 0, r >>> 0)) : [], activation: J(p) }); + }, + 1267961: (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u) => { + g.je("ConvTranspose", a, { format: l ? "NHWC" : "NCHW", autoPad: b, dilations: [c], group: d, kernelShape: [e], pads: [f, h], strides: [k], wIsConst: () => !!z[m >>> 0], outputPadding: n ? Array.from(G.subarray(n >>> 0, q >>> 0)) : [], outputShape: r ? Array.from(G.subarray(r >>> 0, p >>> 0)) : [], activation: J(u) }); + }, + 1268362: (a, b, c, d, e, f, h, k, l, m, n, q, r, p) => { + g.je("ConvTranspose", a, { + format: k ? "NHWC" : "NCHW", + autoPad: b, + dilations: Array.from(G.subarray(c >>> 0, (c >>> 0) + 2 >>> 0)), + group: d, + kernelShape: Array.from(G.subarray(e >>> 0, (e >>> 0) + 2 >>> 0)), + pads: Array.from(G.subarray(f >>> 0, (f >>> 0) + 4 >>> 0)), + strides: Array.from(G.subarray(h >>> 0, (h >>> 0) + 2 >>> 0)), + wIsConst: () => !!z[l >>> 0], + outputPadding: m ? Array.from(G.subarray(m >>> 0, n >>> 0)) : [], + outputShape: q ? Array.from(G.subarray(q >>> 0, r >>> 0)) : [], + activation: J(p) + }); + }, + 1268927: (a, b) => { + g.je("GlobalAveragePool", a, { format: b ? "NHWC" : "NCHW" }); + }, + 1269018: (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y) => { + g.je("AveragePool", a, { + format: y ? "NHWC" : "NCHW", + auto_pad: b, + ceil_mode: c, + count_include_pad: d, + storage_order: e, + dilations: [f, h], + kernel_shape: [k, l], + pads: [m, n, q, r], + strides: [p, u] + }); + }, + 1269302: (a, b) => { + g.je("GlobalAveragePool", a, { format: b ? "NHWC" : "NCHW" }); + }, + 1269393: (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y) => { + g.je("AveragePool", a, { format: y ? "NHWC" : "NCHW", auto_pad: b, ceil_mode: c, count_include_pad: d, storage_order: e, dilations: [f, h], kernel_shape: [k, l], pads: [m, n, q, r], strides: [p, u] }); + }, + 1269677: (a, b) => { + g.je("GlobalMaxPool", a, { format: b ? "NHWC" : "NCHW" }); + }, + 1269764: (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y) => { + g.je("MaxPool", a, { format: y ? "NHWC" : "NCHW", auto_pad: b, ceil_mode: c, count_include_pad: d, storage_order: e, dilations: [f, h], kernel_shape: [k, l], pads: [m, n, q, r], strides: [p, u] }); + }, + 1270044: (a, b) => { + g.je("GlobalMaxPool", a, { format: b ? "NHWC" : "NCHW" }); + }, + 1270131: (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y) => { + g.je("MaxPool", a, { format: y ? "NHWC" : "NCHW", auto_pad: b, ceil_mode: c, count_include_pad: d, storage_order: e, dilations: [f, h], kernel_shape: [k, l], pads: [m, n, q, r], strides: [p, u] }); + }, + 1270411: (a, b, c, d, e) => { + g.je("Gemm", a, { alpha: b, beta: c, transA: d, transB: e }); + }, + 1270515: (a) => { + g.je("MatMul", a, void 0); + }, + 1270569: (a, b, c, d) => { + g.je("ArgMax", a, { keepDims: !!b, selectLastIndex: !!c, axis: d }); + }, + 1270677: (a, b, c, d) => { + g.je("ArgMin", a, { keepDims: !!b, selectLastIndex: !!c, axis: d }); + }, + 1270785: (a, b) => { + g.je("Softmax", a, { axis: b }); + }, + 1270848: (a, b) => { + g.je("Concat", a, { axis: b }); + }, + 1270908: (a, b, c, d, e) => { + g.je("Split", a, { axis: b, numOutputs: c, splitSizes: d ? Array.from(G.subarray(d >>> 0, e >>> 0)) : [] }); + }, + 1271048: (a) => { + g.je("Expand", a, void 0); + }, + 1271102: (a, b) => { + g.je("Gather", a, { axis: Number(b) }); + }, + 1271173: (a, b) => { + g.je( + "GatherElements", + a, + { axis: Number(b) } + ); + }, + 1271252: (a, b, c, d, e, f, h, k, l, m, n) => { + g.je("Resize", a, { antialias: b, axes: c ? Array.from(G.subarray(c >>> 0, d >>> 0)) : [], coordinateTransformMode: J(e), cubicCoeffA: f, excludeOutside: h, extrapolationValue: k, keepAspectRatioPolicy: J(l), mode: J(m), nearestMode: J(n) }); + }, + 1271598: (a, b, c, d, e, f, h) => { + g.je("Slice", a, { starts: b ? Array.from(G.subarray(b >>> 0, c >>> 0)) : [], ends: d ? Array.from(G.subarray(d >>> 0, e >>> 0)) : [], axes: f ? Array.from(G.subarray(f >>> 0, h >>> 0)) : [] }); + }, + 1271814: (a) => { + g.je("Tile", a, void 0); + }, + 1271866: (a, b, c, d) => { + g.je("LayerNormalization", a, { axis: b, epsilon: c, simplified: !!d }); + }, + 1271977: (a, b, c) => { + g.je("InstanceNormalization", a, { epsilon: b, format: c ? "NHWC" : "NCHW" }); + }, + 1272091: (a, b, c) => { + g.je("InstanceNormalization", a, { epsilon: b, format: c ? "NHWC" : "NCHW" }); + }, + 1272205: (a) => { + g.je("Range", a, void 0); + }, + 1272258: (a, b) => { + g.je("Einsum", a, { equation: J(b) }); + }, + 1272339: (a, b, c, d, e) => { + g.je("Pad", a, { mode: b, value: c, pads: d ? Array.from(G.subarray(d >>> 0, e >>> 0)) : [] }); + }, + 1272466: (a, b, c, d, e, f) => { + g.je("BatchNormalization", a, { + epsilon: b, + momentum: c, + spatial: !!e, + trainingMode: !!d, + format: f ? "NHWC" : "NCHW" + }); + }, + 1272635: (a, b, c, d, e, f) => { + g.je("BatchNormalization", a, { epsilon: b, momentum: c, spatial: !!e, trainingMode: !!d, format: f ? "NHWC" : "NCHW" }); + }, + 1272804: (a, b, c) => { + g.je("CumSum", a, { exclusive: Number(b), reverse: Number(c) }); + }, + 1272901: (a, b, c, d, e, f, h, k, l) => { + g.je("Attention", a, { numHeads: b, isUnidirectional: c, maskFilterValue: d, scale: e, doRotary: f, qkvHiddenSizes: h ? Array.from(G.subarray(Number(k) >>> 0, Number(k) + h >>> 0)) : [], pastPresentShareBuffer: !!l }); + }, + 1273173: (a) => { + g.je( + "BiasAdd", + a, + void 0 + ); + }, + 1273228: (a) => { + g.je("BiasSplitGelu", a, void 0); + }, + 1273289: (a) => { + g.je("FastGelu", a, void 0); + }, + 1273345: (a, b, c, d, e, f, h, k, l, m, n, q, r) => { + g.je("Conv", a, { format: l ? "NHWC" : "NCHW", auto_pad: b, dilations: [c], group: d, kernel_shape: [e], pads: f ? Array.from(G.subarray(f >>> 0, h >>> 0)) : [], strides: [k], w_is_const: () => !!z[m >>> 0], activation: J(n), activation_params: q ? Array.from(va.subarray(q >>> 0, r >>> 0)) : [] }); + }, + 1273715: (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y) => { + g.je("Conv", a, { format: q ? "NHWC" : "NCHW", auto_pad: b, dilations: [c, d], group: e, kernel_shape: [ + f, + h + ], pads: k ? Array.from(G.subarray(k >>> 0, l >>> 0)) : [], strides: [m, n], w_is_const: () => !!z[r >>> 0], activation: J(p), activation_params: u ? Array.from(va.subarray(u >>> 0, y >>> 0)) : [] }); + }, + 1274106: (a) => { + g.je("Gelu", a, void 0); + }, + 1274158: (a, b, c, d, e, f) => { + g.je("MatMulNBits", a, { k: b, n: c, accuracyLevel: d, bits: e, blockSize: f }); + }, + 1274285: (a, b, c, d, e, f) => { + g.je("MultiHeadAttention", a, { numHeads: b, isUnidirectional: c, maskFilterValue: d, scale: e, doRotary: f }); + }, + 1274444: (a, b, c, d, e) => { + g.je("RotaryEmbedding", a, { + interleaved: !!b, + numHeads: c, + rotaryEmbeddingDim: d, + scale: e + }); + }, + 1274583: (a, b, c) => { + g.je("SkipLayerNormalization", a, { epsilon: b, simplified: !!c }); + }, + 1274685: (a, b, c) => { + g.je("SkipLayerNormalization", a, { epsilon: b, simplified: !!c }); + }, + 1274787: (a, b, c, d) => { + g.je("LayerNormalization", a, { axis: b, epsilon: c, simplified: !!d }); + }, + 1274898: (a) => { + g.di(a); + }, + 1274932: (a, b) => g.fi(a, b, g.Oh.gi, g.Oh.errors) + }; + function Pa(a) { + this.name = "ExitStatus"; + this.message = `Program terminated with exit(${a})`; + this.status = a; + } + var Qa = [], Ra = 0, L = 0; + class Sa { + constructor(a) { + this.Nh = a; + this.Ih = a - 24; + } + } + var Za = (a) => { + var b = L; + if (!b) + return Ta(0), 0; + var c = new Sa(b); + I[c.Ih + 16 >>> 2 >>> 0] = b; + var d = I[c.Ih + 4 >>> 2 >>> 0]; + if (!d) + return Ta(0), b; + for (var e in a) { + var f = a[e]; + if (0 === f || f === d) + break; + if (Ua(f, d, c.Ih + 16)) + return Ta(f), b; + } + Ta(d); + return b; + }, $a = "undefined" != typeof TextDecoder ? new TextDecoder("utf8") : void 0, ab = (a, b, c) => { + b >>>= 0; + var d = b + c; + for (c = b; a[c] && !(c >= d); ) + ++c; + if (16 < c - b && a.buffer && $a) + return $a.decode(a.subarray(b, c)); + for (d = ""; b < c; ) { + var e = a[b++]; + if (e & 128) { + var f = a[b++] & 63; + if (192 == (e & 224)) + d += String.fromCharCode((e & 31) << 6 | f); + else { + var h = a[b++] & 63; + e = 224 == (e & 240) ? (e & 15) << 12 | f << 6 | h : (e & 7) << 18 | f << 12 | h << 6 | a[b++] & 63; + 65536 > e ? d += String.fromCharCode(e) : (e -= 65536, d += String.fromCharCode(55296 | e >> 10, 56320 | e & 1023)); + } + } else + d += String.fromCharCode(e); + } + return d; + }, J = (a, b) => (a >>>= 0) ? ab(E, a, b) : "", bb = (a) => { + for (var b = 0, c = 0; c < a.length; ++c) { + var d = a.charCodeAt(c); + 127 >= d ? b++ : 2047 >= d ? b += 2 : 55296 <= d && 57343 >= d ? (b += 4, ++c) : b += 3; + } + return b; + }, M = (a, b, c, d) => { + c >>>= 0; + if (!(0 < d)) + return 0; + var e = c; + d = c + d - 1; + for (var f = 0; f < a.length; ++f) { + var h = a.charCodeAt(f); + if (55296 <= h && 57343 >= h) { + var k = a.charCodeAt(++f); + h = 65536 + ((h & 1023) << 10) | k & 1023; + } + if (127 >= h) { + if (c >= d) + break; + b[c++ >>> 0] = h; + } else { + if (2047 >= h) { + if (c + 1 >= d) + break; + b[c++ >>> 0] = 192 | h >> 6; + } else { + if (65535 >= h) { + if (c + 2 >= d) + break; + b[c++ >>> 0] = 224 | h >> 12; + } else { + if (c + 3 >= d) + break; + b[c++ >>> 0] = 240 | h >> 18; + b[c++ >>> 0] = 128 | h >> 12 & 63; + } + b[c++ >>> 0] = 128 | h >> 6 & 63; + } + b[c++ >>> 0] = 128 | h & 63; + } + } + b[c >>> 0] = 0; + return c - e; + }, cb, N = (a) => { + for (var b = ""; E[a >>> 0]; ) + b += cb[E[a++ >>> 0]]; + return b; + }, db = {}, eb = {}, fb = {}, O; + function gb(a, b, c = {}) { + var d = b.name; + if (!a) + throw new O(`type "${d}" must have a positive integer typeid pointer`); + if (eb.hasOwnProperty(a)) { + if (c.Xh) + return; + throw new O(`Cannot register type '${d}' twice`); + } + eb[a] = b; + delete fb[a]; + db.hasOwnProperty(a) && (b = db[a], delete db[a], b.forEach((e) => e())); + } + function P(a, b, c = {}) { + if (!("argPackAdvance" in b)) + throw new TypeError("registerType registeredInstance requires argPackAdvance"); + return gb(a, b, c); + } + var hb = (a, b, c) => { + switch (b) { + case 1: + return c ? (d) => z[d >>> 0] : (d) => E[d >>> 0]; + case 2: + return c ? (d) => ta[d >>> 1 >>> 0] : (d) => ua[d >>> 1 >>> 0]; + case 4: + return c ? (d) => G[d >>> 2 >>> 0] : (d) => I[d >>> 2 >>> 0]; + case 8: + return c ? (d) => wa[d >>> 3] : (d) => xa[d >>> 3]; + default: + throw new TypeError(`invalid integer width (${b}): ${a}`); + } + }, ib = [], Q = []; + function jb(a) { + a >>>= 0; + 9 < a && 0 === --Q[a + 1] && (Q[a] = void 0, ib.push(a)); + } + var R = (a) => { + if (!a) + throw new O("Cannot use deleted val. handle = " + a); + return Q[a]; + }, S = (a) => { + switch (a) { + case void 0: + return 2; + case null: + return 4; + case true: + return 6; + case false: + return 8; + default: + const b = ib.pop() || Q.length; + Q[b] = a; + Q[b + 1] = 1; + return b; + } + }; + function kb(a) { + return this.fromWireType(I[a >>> 2 >>> 0]); + } + var lb = { name: "emscripten::val", fromWireType: (a) => { + var b = R(a); + jb(a); + return b; + }, toWireType: (a, b) => S(b), argPackAdvance: 8, readValueFromPointer: kb, Mh: null }, mb = (a, b) => { + switch (b) { + case 4: + return function(c) { + return this.fromWireType(va[c >>> 2 >>> 0]); + }; + case 8: + return function(c) { + return this.fromWireType(ya[c >>> 3 >>> 0]); + }; + default: + throw new TypeError(`invalid float width (${b}): ${a}`); + } + }, nb = "undefined" != typeof TextDecoder ? new TextDecoder("utf-16le") : void 0, ob = (a, b) => { + var c = a >> 1; + for (var d = c + b / 2; !(c >= d) && ua[c >>> 0]; ) + ++c; + c <<= 1; + if (32 < c - a && nb) + return nb.decode(E.subarray(a >>> 0, c >>> 0)); + c = ""; + for (d = 0; !(d >= b / 2); ++d) { + var e = ta[a + 2 * d >>> 1 >>> 0]; + if (0 == e) + break; + c += String.fromCharCode(e); + } + return c; + }, pb = (a, b, c) => { + c ??= 2147483647; + if (2 > c) + return 0; + c -= 2; + var d = b; + c = c < 2 * a.length ? c / 2 : a.length; + for (var e = 0; e < c; ++e) + ta[b >>> 1 >>> 0] = a.charCodeAt(e), b += 2; + ta[b >>> 1 >>> 0] = 0; + return b - d; + }, qb = (a) => 2 * a.length, rb = (a, b) => { + for (var c = 0, d = ""; !(c >= b / 4); ) { + var e = G[a + 4 * c >>> 2 >>> 0]; + if (0 == e) + break; + ++c; + 65536 <= e ? (e -= 65536, d += String.fromCharCode(55296 | e >> 10, 56320 | e & 1023)) : d += String.fromCharCode(e); + } + return d; + }, sb = (a, b, c) => { + b >>>= 0; + c ??= 2147483647; + if (4 > c) + return 0; + var d = b; + c = d + c - 4; + for (var e = 0; e < a.length; ++e) { + var f = a.charCodeAt(e); + if (55296 <= f && 57343 >= f) { + var h = a.charCodeAt(++e); + f = 65536 + ((f & 1023) << 10) | h & 1023; + } + G[b >>> 2 >>> 0] = f; + b += 4; + if (b + 4 > c) + break; + } + G[b >>> 2 >>> 0] = 0; + return b - d; + }, tb = (a) => { + for (var b = 0, c = 0; c < a.length; ++c) { + var d = a.charCodeAt(c); + 55296 <= d && 57343 >= d && ++c; + b += 4; + } + return b; + }, vb = (a, b) => { + var c = eb[a]; + if (void 0 === c) + throw a = ub(a), c = N(a), T(a), new O(`${b} has unknown type ${c}`); + return c; + }, wb = (a, b, c) => { + var d = []; + a = a.toWireType(d, c); + d.length && (I[b >>> 2 >>> 0] = S(d)); + return a; + }, xb = (a) => { + try { + a(); + } catch (b) { + Ga(b); + } + }, yb = (a) => { + if (!x) + try { + a(); + try { + sa = sa = a = sa, g.onExit?.(a), x = true, ha(a, new Pa(a)); + } catch (b) { + b instanceof Pa || "unwind" == b || ha(1, b); + } + } catch (b) { + b instanceof Pa || "unwind" == b || ha(1, b); + } + }; + function zb() { + var a = U, b = {}; + for (let [c, d] of Object.entries(a)) + b[c] = "function" == typeof d ? (...e) => { + Ab.push(c); + try { + return d(...e); + } finally { + x || (Ab.pop(), t && 1 === V && 0 === Ab.length && (V = 0, xb(Bb), "undefined" != typeof Fibers && Fibers.mi())); + } + } : d; + return b; + } + var V = 0, t = null, Cb = 0, Ab = [], Db = {}, Eb = {}, Fb = 0, Gb = null, Hb = []; + function ca() { + return new Promise((a, b) => { + Gb = { resolve: a, reject: b }; + }); + } + function Ib() { + var a = Jb(65548), b = a + 12; + I[a >>> 2 >>> 0] = b; + I[a + 4 >>> 2 >>> 0] = b + 65536; + b = Ab[0]; + var c = Db[b]; + void 0 === c && (c = Fb++, Db[b] = c, Eb[c] = b); + G[a + 8 >>> 2 >>> 0] = c; + return a; + } + function Kb(a) { + if (!x) { + if (0 === V) { + var b = false, c = false; + a((d = 0) => { + if (!x && (Cb = d, b = true, c)) { + V = 2; + xb(() => Lb(t)); + "undefined" != typeof Browser && Browser.Sh.Wh && Browser.Sh.resume(); + d = false; + try { + var e = (0, U[Eb[G[t + 8 >>> 2 >>> 0]]])(); + } catch (k) { + e = k, d = true; + } + var f = false; + if (!t) { + var h = Gb; + h && (Gb = null, (d ? h.reject : h.resolve)(e), f = true); + } + if (d && !f) + throw e; + } + }); + c = true; + b || (V = 1, t = Ib(), "undefined" != typeof Browser && Browser.Sh.Wh && Browser.Sh.pause(), xb(() => Mb(t))); + } else + 2 === V ? (V = 0, xb(Nb), T(t), t = null, Hb.forEach(yb)) : Ga(`invalid state: ${V}`); + return Cb; + } + } + function Ob(a) { + return Kb((b) => { + a().then(b); + }); + } + var Pb = [], Qb = {}, Rb = (a) => { + var b = Qb[a]; + return void 0 === b ? N(a) : b; + }, Sb = () => "object" == typeof globalThis ? globalThis : Function("return this")(), Tb = (a) => { + var b = Pb.length; + Pb.push(a); + return b; + }, Ub = (a, b) => { + for (var c = Array(a), d = 0; d < a; ++d) + c[d] = vb(I[b + 4 * d >>> 2 >>> 0], "parameter " + d); + return c; + }, Vb = (a, b) => Object.defineProperty(b, "name", { value: a }); + function Wb(a) { + var b = Function; + if (!(b instanceof Function)) + throw new TypeError(`new_ called with constructor type ${typeof b} which is not a function`); + var c = Vb(b.name || "unknownFunctionName", function() { + }); + c.prototype = b.prototype; + c = new c(); + a = b.apply(c, a); + return a instanceof Object ? a : c; + } + var W = (a) => 0 === a % 4 && (0 !== a % 100 || 0 === a % 400), Xb = [0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335], Yb = [0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334], Zb = [], $b = (a, b) => { + Zb.length = 0; + for (var c; c = E[a++ >>> 0]; ) { + var d = 105 != c; + d &= 112 != c; + b += d && b % 8 ? 4 : 0; + Zb.push(112 == c ? I[b >>> 2 >>> 0] : 106 == c ? wa[b >>> 3] : 105 == c ? G[b >>> 2 >>> 0] : ya[b >>> 3 >>> 0]); + b += d ? 8 : 4; + } + return Zb; + }, ac = {}, cc = () => { + if (!bc) { + var a = { USER: "web_user", LOGNAME: "web_user", PATH: "/", PWD: "/", HOME: "/home/web_user", LANG: ("object" == typeof navigator && navigator.languages && navigator.languages[0] || "C").replace("-", "_") + ".UTF-8", _: fa || "./this.program" }, b; + for (b in ac) + void 0 === ac[b] ? delete a[b] : a[b] = ac[b]; + var c = []; + for (b in a) + c.push(`${b}=${a[b]}`); + bc = c; + } + return bc; + }, bc, dc = [null, [], []], ec = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], fc = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]; + function gc(a) { + var b = Array(bb(a) + 1); + M(a, b, 0, b.length); + return b; + } + function hc(a, b, c, d) { + function e(p, u, y) { + for (p = "number" == typeof p ? p.toString() : p || ""; p.length < u; ) + p = y[0] + p; + return p; + } + function f(p, u) { + return e(p, u, "0"); + } + function h(p, u) { + function y(B) { + return 0 > B ? -1 : 0 < B ? 1 : 0; + } + var A; + 0 === (A = y(p.getFullYear() - u.getFullYear())) && 0 === (A = y(p.getMonth() - u.getMonth())) && (A = y(p.getDate() - u.getDate())); + return A; + } + function k(p) { + switch (p.getDay()) { + case 0: + return new Date(p.getFullYear() - 1, 11, 29); + case 1: + return p; + case 2: + return new Date(p.getFullYear(), 0, 3); + case 3: + return new Date( + p.getFullYear(), + 0, + 2 + ); + case 4: + return new Date(p.getFullYear(), 0, 1); + case 5: + return new Date(p.getFullYear() - 1, 11, 31); + case 6: + return new Date(p.getFullYear() - 1, 11, 30); + } + } + function l(p) { + var u = p.Kh; + for (p = new Date(new Date(p.Lh + 1900, 0, 1).getTime()); 0 < u; ) { + var y = p.getMonth(), A = (W(p.getFullYear()) ? ec : fc)[y]; + if (u > A - p.getDate()) + u -= A - p.getDate() + 1, p.setDate(1), 11 > y ? p.setMonth(y + 1) : (p.setMonth(0), p.setFullYear(p.getFullYear() + 1)); + else { + p.setDate(p.getDate() + u); + break; + } + } + y = new Date(p.getFullYear() + 1, 0, 4); + u = k(new Date( + p.getFullYear(), + 0, + 4 + )); + y = k(y); + return 0 >= h(u, p) ? 0 >= h(y, p) ? p.getFullYear() + 1 : p.getFullYear() : p.getFullYear() - 1; + } + a >>>= 0; + b >>>= 0; + c >>>= 0; + d >>>= 0; + var m = I[d + 40 >>> 2 >>> 0]; + d = { ji: G[d >>> 2 >>> 0], ii: G[d + 4 >>> 2 >>> 0], Qh: G[d + 8 >>> 2 >>> 0], Th: G[d + 12 >>> 2 >>> 0], Rh: G[d + 16 >>> 2 >>> 0], Lh: G[d + 20 >>> 2 >>> 0], Jh: G[d + 24 >>> 2 >>> 0], Kh: G[d + 28 >>> 2 >>> 0], li: G[d + 32 >>> 2 >>> 0], hi: G[d + 36 >>> 2 >>> 0], ki: m ? J(m) : "" }; + c = J(c); + m = { + "%c": "%a %b %d %H:%M:%S %Y", + "%D": "%m/%d/%y", + "%F": "%Y-%m-%d", + "%h": "%b", + "%r": "%I:%M:%S %p", + "%R": "%H:%M", + "%T": "%H:%M:%S", + "%x": "%m/%d/%y", + "%X": "%H:%M:%S", + "%Ec": "%c", + "%EC": "%C", + "%Ex": "%m/%d/%y", + "%EX": "%H:%M:%S", + "%Ey": "%y", + "%EY": "%Y", + "%Od": "%d", + "%Oe": "%e", + "%OH": "%H", + "%OI": "%I", + "%Om": "%m", + "%OM": "%M", + "%OS": "%S", + "%Ou": "%u", + "%OU": "%U", + "%OV": "%V", + "%Ow": "%w", + "%OW": "%W", + "%Oy": "%y" + }; + for (var n in m) + c = c.replace(new RegExp(n, "g"), m[n]); + var q = "Sunday Monday Tuesday Wednesday Thursday Friday Saturday".split(" "), r = "January February March April May June July August September October November December".split(" "); + m = { "%a": (p) => q[p.Jh].substring(0, 3), "%A": (p) => q[p.Jh], "%b": (p) => r[p.Rh].substring(0, 3), "%B": (p) => r[p.Rh], "%C": (p) => f((p.Lh + 1900) / 100 | 0, 2), "%d": (p) => f(p.Th, 2), "%e": (p) => e(p.Th, 2, " "), "%g": (p) => l(p).toString().substring(2), "%G": l, "%H": (p) => f(p.Qh, 2), "%I": (p) => { + p = p.Qh; + 0 == p ? p = 12 : 12 < p && (p -= 12); + return f(p, 2); + }, "%j": (p) => { + for (var u = 0, y = 0; y <= p.Rh - 1; u += (W(p.Lh + 1900) ? ec : fc)[y++]) + ; + return f(p.Th + u, 3); + }, "%m": (p) => f(p.Rh + 1, 2), "%M": (p) => f(p.ii, 2), "%n": () => "\n", "%p": (p) => 0 <= p.Qh && 12 > p.Qh ? "AM" : "PM", "%S": (p) => f(p.ji, 2), "%t": () => " ", "%u": (p) => p.Jh || 7, "%U": (p) => f(Math.floor((p.Kh + 7 - p.Jh) / 7), 2), "%V": (p) => { + var u = Math.floor((p.Kh + 7 - (p.Jh + 6) % 7) / 7); + 2 >= (p.Jh + 371 - p.Kh - 2) % 7 && u++; + if (u) + 53 == u && (y = (p.Jh + 371 - p.Kh) % 7, 4 == y || 3 == y && W(p.Lh) || (u = 1)); + else { + u = 52; + var y = (p.Jh + 7 - p.Kh - 1) % 7; + (4 == y || 5 == y && W(p.Lh % 400 - 1)) && u++; + } + return f(u, 2); + }, "%w": (p) => p.Jh, "%W": (p) => f(Math.floor((p.Kh + 7 - (p.Jh + 6) % 7) / 7), 2), "%y": (p) => (p.Lh + 1900).toString().substring(2), "%Y": (p) => p.Lh + 1900, "%z": (p) => { + p = p.hi; + var u = 0 <= p; + p = Math.abs(p) / 60; + return (u ? "+" : "-") + String("0000" + (p / 60 * 100 + p % 60)).slice(-4); + }, "%Z": (p) => p.ki, "%%": () => "%" }; + c = c.replace(/%%/g, "\0\0"); + for (n in m) + c.includes(n) && (c = c.replace(new RegExp(n, "g"), m[n](d))); + c = c.replace(/\0\0/g, "%"); + n = gc(c); + if (n.length > b) + return 0; + z.set(n, a >>> 0); + return n.length - 1; + } + for (var ic = Array(256), jc = 0; 256 > jc; ++jc) + ic[jc] = String.fromCharCode(jc); + cb = ic; + O = g.BindingError = class extends Error { + constructor(a) { + super(a); + this.name = "BindingError"; + } + }; + g.InternalError = class extends Error { + constructor(a) { + super(a); + this.name = "InternalError"; + } + }; + Q.push(0, 1, void 0, 1, null, 1, true, 1, false, 1); + g.count_emval_handles = () => Q.length / 2 - 5 - ib.length; + var Cf = { + bd: function(a, b, c) { + return Ob(async () => { + await g.bi(a, b, c); + }); + }, + v: function(a) { + a = new Sa(a >>> 0); + 0 == z[a.Ih + 12 >>> 0] && (z[a.Ih + 12 >>> 0] = 1, Ra--); + z[a.Ih + 13 >>> 0] = 0; + Qa.push(a); + kc(a.Nh); + if (lc(I[a.Ih + 4 >>> 2 >>> 0])) + a = I[a.Nh >>> 2 >>> 0]; + else { + var b = I[a.Ih + 16 >>> 2 >>> 0]; + a = 0 !== b ? b : a.Nh; + } + return a; + }, + N: () => { + X(0, 0); + var a = Qa.pop(); + mc(a.Nh); + L = 0; + }, + a: function() { + return Za([]); + }, + m: function(a) { + return Za([a >>> 0]); + }, + x: function(a, b) { + return Za([a >>> 0, b >>> 0]); + }, + q: function(a, b, c) { + return Za([a >>> 0, b >>> 0, c >>> 0]); + }, + Bb: () => { + var a = Qa.pop(); + a || Ga("no exception to throw"); + var b = a.Nh; + 0 == z[a.Ih + 13 >>> 0] && (Qa.push(a), z[a.Ih + 13 >>> 0] = 1, z[a.Ih + 12 >>> 0] = 0, Ra++); + L = b; + throw L; + }, + s: function(a, b, c) { + a >>>= 0; + var d = new Sa(a); + I[d.Ih + 16 >>> 2 >>> 0] = 0; + I[d.Ih + 4 >>> 2 >>> 0] = b >>> 0; + I[d.Ih + 8 >>> 2 >>> 0] = c >>> 0; + L = a; + Ra++; + throw L; + }, + fb: () => Ra, + g: function(a) { + L ||= a >>> 0; + throw L; + }, + Cb: function() { + return 0; + }, + $c: function() { + }, + Mc: function() { + }, + Oc: function() { + }, + Gc: function() { + return 0; + }, + Zc: function() { + }, + Uc: function() { + }, + Yc: function() { + }, + _b: function() { + }, + Nc: function() { + }, + Kc: function() { + }, + _c: function() { + }, + Lc: function() { + }, + Wb: function(a, b, c) { + b = N(b >>> 0); + P(a >>> 0, { name: b, fromWireType: (d) => d, toWireType: function(d, e) { + if ("bigint" != typeof e && "number" != typeof e) + throw null === e ? e = "null" : (d = typeof e, e = "object" === d || "array" === d || "function" === d ? e.toString() : "" + e), new TypeError(`Cannot convert "${e}" to ${this.name}`); + "number" == typeof e && (e = BigInt(e)); + return e; + }, argPackAdvance: 8, readValueFromPointer: hb(b, c >>> 0, -1 == b.indexOf("u")), Mh: null }); + }, + Ec: function(a, b, c, d) { + b = N(b >>> 0); + P(a >>> 0, { name: b, fromWireType: function(e) { + return !!e; + }, toWireType: function(e, f) { + return f ? c : d; + }, argPackAdvance: 8, readValueFromPointer: function(e) { + return this.fromWireType(E[e >>> 0]); + }, Mh: null }); + }, + Cc: function(a) { + return P(a >>> 0, lb); + }, + Vb: function(a, b, c) { + b = N(b >>> 0); + P(a >>> 0, { name: b, fromWireType: (d) => d, toWireType: (d, e) => e, argPackAdvance: 8, readValueFromPointer: mb(b, c >>> 0), Mh: null }); + }, + Aa: function(a, b, c, d, e) { + a >>>= 0; + c >>>= 0; + b = N(b >>> 0); + -1 === e && (e = 4294967295); + e = (k) => k; + if (0 === d) { + var f = 32 - 8 * c; + e = (k) => k << f >>> f; + } + var h = b.includes("unsigned") ? function(k, l) { + return l >>> 0; + } : function(k, l) { + return l; + }; + P(a, { + name: b, + fromWireType: e, + toWireType: h, + argPackAdvance: 8, + readValueFromPointer: hb(b, c, 0 !== d), + Mh: null + }); + }, + _: function(a, b, c) { + function d(f) { + return new e(z.buffer, I[f + 4 >>> 2 >>> 0], I[f >>> 2 >>> 0]); + } + var e = [Int8Array, Uint8Array, Int16Array, Uint16Array, Int32Array, Uint32Array, Float32Array, Float64Array, BigInt64Array, BigUint64Array][b]; + c = N(c >>> 0); + P(a >>> 0, { name: c, fromWireType: d, argPackAdvance: 8, readValueFromPointer: d }, { Xh: true }); + }, + Xb: function(a, b) { + b = N(b >>> 0); + var c = "std::string" === b; + P(a >>> 0, { name: b, fromWireType: function(d) { + var e = I[d >>> 2 >>> 0], f = d + 4; + if (c) + for (var h = f, k = 0; k <= e; ++k) { + var l = f + k; + if (k == e || 0 == E[l >>> 0]) { + h = J(h, l - h); + if (void 0 === m) + var m = h; + else + m += String.fromCharCode(0), m += h; + h = l + 1; + } + } + else { + m = Array(e); + for (k = 0; k < e; ++k) + m[k] = String.fromCharCode(E[f + k >>> 0]); + m = m.join(""); + } + T(d); + return m; + }, toWireType: function(d, e) { + e instanceof ArrayBuffer && (e = new Uint8Array(e)); + var f = "string" == typeof e; + if (!(f || e instanceof Uint8Array || e instanceof Uint8ClampedArray || e instanceof Int8Array)) + throw new O("Cannot pass non-string to std::string"); + var h = c && f ? bb(e) : e.length; + var k = Jb(4 + h + 1), l = k + 4; + I[k >>> 2 >>> 0] = h; + if (c && f) + M(e, E, l, h + 1); + else if (f) + for (f = 0; f < h; ++f) { + var m = e.charCodeAt(f); + if (255 < m) + throw T(l), new O("String has UTF-16 code units that do not fit in 8 bits"); + E[l + f >>> 0] = m; + } + else + for (f = 0; f < h; ++f) + E[l + f >>> 0] = e[f]; + null !== d && d.push(T, k); + return k; + }, argPackAdvance: 8, readValueFromPointer: kb, Mh(d) { + T(d); + } }); + }, + Ab: function(a, b, c) { + b >>>= 0; + c >>>= 0; + c = N(c); + if (2 === b) { + var d = ob; + var e = pb; + var f = qb; + var h = (k) => ua[k >>> 1 >>> 0]; + } else + 4 === b && (d = rb, e = sb, f = tb, h = (k) => I[k >>> 2 >>> 0]); + P(a >>> 0, { + name: c, + fromWireType: (k) => { + for (var l = I[k >>> 2 >>> 0], m, n = k + 4, q = 0; q <= l; ++q) { + var r = k + 4 + q * b; + if (q == l || 0 == h(r)) + n = d(n, r - n), void 0 === m ? m = n : (m += String.fromCharCode(0), m += n), n = r + b; + } + T(k); + return m; + }, + toWireType: (k, l) => { + if ("string" != typeof l) + throw new O(`Cannot pass non-string to C++ string type ${c}`); + var m = f(l), n = Jb(4 + m + b); + I[n >>> 2 >>> 0] = m / b; + e(l, n + 4, m + b); + null !== k && k.push(T, n); + return n; + }, + argPackAdvance: 8, + readValueFromPointer: kb, + Mh(k) { + T(k); + } + }); + }, + Fc: function(a, b) { + b = N(b >>> 0); + P(a >>> 0, { Yh: true, name: b, argPackAdvance: 0, fromWireType: () => { + }, toWireType: () => { + } }); + }, + ad: () => 1, + kd: function(a, b, c) { + b >>>= 0; + c >>>= 0; + a = R(a >>> 0); + b = vb(b, "emval::as"); + return wb(b, c, a); + }, + Cd: function(a) { + a >>>= 0; + return Ob(() => { + a = R(a); + return a.then(S); + }); + }, + ud: function(a, b, c, d) { + c >>>= 0; + d >>>= 0; + a = Pb[a >>> 0]; + b = R(b >>> 0); + return a(null, b, c, d); + }, + ia: function(a, b, c, d, e) { + c >>>= 0; + d >>>= 0; + e >>>= 0; + a = Pb[a >>> 0]; + b = R(b >>> 0); + c = Rb(c); + return a(b, b[c], d, e); + }, + Bc: jb, + qd: function(a, b) { + b >>>= 0; + a = R(a >>> 0); + b = R(b); + return a == b; + }, + zd: function(a) { + a >>>= 0; + if (0 === a) + return S(Sb()); + a = Rb(a); + return S(Sb()[a]); + }, + ha: function(a, b, c) { + b = Ub(a, b >>> 0); + var d = b.shift(); + a--; + var e = "return function (obj, func, destructorsRef, args) {\n", f = 0, h = []; + 0 === c && h.push("obj"); + for (var k = ["retType"], l = [d], m = 0; m < a; ++m) + h.push("arg" + m), k.push("argType" + m), l.push(b[m]), e += ` var arg${m} = argType${m}.readValueFromPointer(args${f ? "+" + f : ""}); +`, f += b[m].argPackAdvance; + e += ` var rv = ${1 === c ? "new func" : "func.call"}(${h.join(", ")}); +`; + d.Yh || (k.push("emval_returnValue"), l.push(wb), e += " return emval_returnValue(retType, destructorsRef, rv);\n"); + k.push(e + "};\n"); + a = Wb(k)(...l); + c = `methodCaller<(${b.map((n) => n.name).join(", ")}) => ${d.name}>`; + return Tb(Vb(c, a)); + }, + yd: function(a, b) { + b >>>= 0; + a = R(a >>> 0); + b = R(b); + return S(a[b]); + }, + ba: function(a) { + a >>>= 0; + 9 < a && (Q[a + 1] += 1); + }, + md: function() { + return S([]); + }, + hd: function(a) { + a = R(a >>> 0); + for (var b = Array(a.length), c = 0; c < a.length; c++) + b[c] = a[c]; + return S(b); + }, + U: function(a) { + return S(Rb(a >>> 0)); + }, + Xa: function() { + return S({}); + }, + vd: function(a) { + a >>>= 0; + for (var b = R(a); b.length; ) { + var c = b.pop(); + b.pop()(c); + } + jb(a); + }, + sd: function(a, b, c) { + b >>>= 0; + c >>>= 0; + a = R(a >>> 0); + b = R(b); + c = R(c); + a[b] = c; + }, + zb: function(a, b) { + b >>>= 0; + a = vb(a >>> 0, "_emval_take_value"); + a = a.readValueFromPointer(b); + return S(a); + }, + Rc: function(a, b) { + a = -9007199254740992 > a || 9007199254740992 < a ? NaN : Number(a); + b >>>= 0; + a = new Date(1e3 * a); + G[b >>> 2 >>> 0] = a.getUTCSeconds(); + G[b + 4 >>> 2 >>> 0] = a.getUTCMinutes(); + G[b + 8 >>> 2 >>> 0] = a.getUTCHours(); + G[b + 12 >>> 2 >>> 0] = a.getUTCDate(); + G[b + 16 >>> 2 >>> 0] = a.getUTCMonth(); + G[b + 20 >>> 2 >>> 0] = a.getUTCFullYear() - 1900; + G[b + 24 >>> 2 >>> 0] = a.getUTCDay(); + G[b + 28 >>> 2 >>> 0] = (a.getTime() - Date.UTC( + a.getUTCFullYear(), + 0, + 1, + 0, + 0, + 0, + 0 + )) / 864e5 | 0; + }, + Sc: function(a, b) { + a = -9007199254740992 > a || 9007199254740992 < a ? NaN : Number(a); + b >>>= 0; + a = new Date(1e3 * a); + G[b >>> 2 >>> 0] = a.getSeconds(); + G[b + 4 >>> 2 >>> 0] = a.getMinutes(); + G[b + 8 >>> 2 >>> 0] = a.getHours(); + G[b + 12 >>> 2 >>> 0] = a.getDate(); + G[b + 16 >>> 2 >>> 0] = a.getMonth(); + G[b + 20 >>> 2 >>> 0] = a.getFullYear() - 1900; + G[b + 24 >>> 2 >>> 0] = a.getDay(); + G[b + 28 >>> 2 >>> 0] = (W(a.getFullYear()) ? Xb : Yb)[a.getMonth()] + a.getDate() - 1 | 0; + G[b + 36 >>> 2 >>> 0] = -(60 * a.getTimezoneOffset()); + var c = new Date(a.getFullYear(), 6, 1).getTimezoneOffset(), d = new Date(a.getFullYear(), 0, 1).getTimezoneOffset(); + G[b + 32 >>> 2 >>> 0] = (c != d && a.getTimezoneOffset() == Math.min(d, c)) | 0; + }, + Tc: function(a) { + a >>>= 0; + var b = new Date(G[a + 20 >>> 2 >>> 0] + 1900, G[a + 16 >>> 2 >>> 0], G[a + 12 >>> 2 >>> 0], G[a + 8 >>> 2 >>> 0], G[a + 4 >>> 2 >>> 0], G[a >>> 2 >>> 0], 0), c = G[a + 32 >>> 2 >>> 0], d = b.getTimezoneOffset(), e = new Date(b.getFullYear(), 6, 1).getTimezoneOffset(), f = new Date(b.getFullYear(), 0, 1).getTimezoneOffset(), h = Math.min(f, e); + 0 > c ? G[a + 32 >>> 2 >>> 0] = Number(e != f && h == d) : 0 < c != (h == d) && (e = Math.max(f, e), b.setTime(b.getTime() + 6e4 * ((0 < c ? h : e) - d))); + G[a + 24 >>> 2 >>> 0] = b.getDay(); + G[a + 28 >>> 2 >>> 0] = (W(b.getFullYear()) ? Xb : Yb)[b.getMonth()] + b.getDate() - 1 | 0; + G[a >>> 2 >>> 0] = b.getSeconds(); + G[a + 4 >>> 2 >>> 0] = b.getMinutes(); + G[a + 8 >>> 2 >>> 0] = b.getHours(); + G[a + 12 >>> 2 >>> 0] = b.getDate(); + G[a + 16 >>> 2 >>> 0] = b.getMonth(); + G[a + 20 >>> 2 >>> 0] = b.getYear(); + a = b.getTime(); + return BigInt(isNaN(a) ? -1 : a / 1e3); + }, + Pc: function() { + return -52; + }, + Qc: function() { + }, + Ic: function(a, b, c, d) { + c >>>= 0; + d >>>= 0; + var e = (/* @__PURE__ */ new Date()).getFullYear(), f = new Date(e, 0, 1), h = new Date(e, 6, 1); + e = f.getTimezoneOffset(); + var k = h.getTimezoneOffset(); + I[a >>> 0 >>> 2 >>> 0] = 60 * Math.max(e, k); + G[b >>> 0 >>> 2 >>> 0] = Number(e != k); + a = (l) => l.toLocaleTimeString(void 0, { hour12: false, timeZoneName: "short" }).split(" ")[1]; + f = a(f); + h = a(h); + k < e ? (M(f, E, c, 17), M(h, E, d, 17)) : (M(f, E, d, 17), M(h, E, c, 17)); + }, + bb: () => { + Ga(""); + }, + A: function(a, b, c) { + a >>>= 0; + b = $b(b >>> 0, c >>> 0); + return Oa[a](...b); + }, + bc: function(a, b, c) { + a >>>= 0; + b = $b(b >>> 0, c >>> 0); + return Oa[a](...b); + }, + $b: () => Date.now(), + Jc: function() { + return 4294901760; + }, + ga: () => performance.now(), + Hc: function(a) { + a >>>= 0; + var b = E.length; + if (4294901760 < a) + return false; + for (var c = 1; 4 >= c; c *= 2) { + var d = b * (1 + 0.2 / c); + d = Math.min(d, a + 100663296); + var e = Math; + d = Math.max(a, d); + a: { + e = (e.min.call(e, 4294901760, d + (65536 - d % 65536) % 65536) - ra.buffer.byteLength + 65535) / 65536; + try { + ra.grow(e); + za(); + var f = 1; + break a; + } catch (h) { + } + f = void 0; + } + if (f) + return true; + } + return false; + }, + Wc: function(a, b) { + a >>>= 0; + b >>>= 0; + var c = 0; + cc().forEach((d, e) => { + var f = b + c; + e = I[a + 4 * e >>> 2 >>> 0] = f; + for (f = 0; f < d.length; ++f) + z[e++ >>> 0] = d.charCodeAt(f); + z[e >>> 0] = 0; + c += d.length + 1; + }); + return 0; + }, + Xc: function(a, b) { + a >>>= 0; + b >>>= 0; + var c = cc(); + I[a >>> 2 >>> 0] = c.length; + var d = 0; + c.forEach((e) => d += e.length + 1); + I[b >>> 2 >>> 0] = d; + return 0; + }, + Db: () => 52, + Zb: function() { + return 52; + }, + Vc: function() { + return 70; + }, + Yb: function(a, b, c, d) { + b >>>= 0; + c >>>= 0; + d >>>= 0; + for (var e = 0, f = 0; f < c; f++) { + var h = I[b >>> 2 >>> 0], k = I[b + 4 >>> 2 >>> 0]; + b += 8; + for (var l = 0; l < k; l++) { + var m = E[h + l >>> 0], n = dc[a]; + 0 === m || 10 === m ? ((1 === a ? qa : w)(ab(n, 0)), n.length = 0) : n.push(m); + } + e += k; + } + I[d >>> 2 >>> 0] = e; + return 0; + }, + xb: nc, + cd: oc, + ua: pc, + W: qc, + $: rc, + ra: sc, + ta: tc, + dd: uc, + ob: vc, + P: wc, + z: xc, + b: yc, + Ub: zc, + ya: Ac, + e: Bc, + kb: Cc, + h: Dc, + X: Ec, + i: Fc, + ed: Gc, + j: Hc, + t: Ic, + r: Jc, + o: Kc, + Wa: Lc, + Ca: Mc, + ma: Nc, + Qb: Oc, + db: Pc, + Ib: Qc, + mb: Rc, + kc: Sc, + xc: Tc, + hc: Uc, + ic: Vc, + ac: Wc, + oa: Xc, + yb: Yc, + Ba: Zc, + Eb: $c, + ea: ad, + jc: bd, + Ta: cd, + F: dd, + G: ed, + Gb: fd, + jd: gd, + qa: hd, + O: jd, + V: kd, + T: ld, + y: md, + Fb: nd, + gc: od, + D: pd, + Hb: qd, + id: rd, + Ua: sd, + wa: td, + lc: ud, + cc: vd, + Nb: wd, + aa: xd, + I: yd, + C: zd, + _a: Ad, + fc: Bd, + Q: Cd, + d: Dd, + ab: Ed, + n: Fd, + Ya: Gd, + va: Hd, + wb: Id, + f: Jd, + yc: Kd, + da: Ld, + gb: Md, + Da: Nd, + lb: Od, + hb: Pd, + c: Qd, + vc: Rd, + od: Sd, + k: Td, + tc: Ud, + l: Vd, + wc: Wd, + sc: Xd, + rd: Yd, + p: Zd, + Ra: $d, + tb: ae, + Qa: be, + Kb: ce, + B: de, + K: ee, + S: fe, + $a: ge, + pc: he, + ub: ie, + za: je, + ka: ke, + xa: le, + Sb: me, + La: ne, + jb: oe, + Ga: pe, + nc: qe, + Ha: re, + Ia: se, + fd: te, + xd: ue, + Z: ve, + pa: we, + pd: xe, + wd: ye, + Mb: ze, + Ma: Ae, + Ka: Be, + Tb: Ce, + rc: De, + Ja: Ee, + Na: Fe, + pb: Ge, + la: He, + Ea: Ie, + mc: Je, + qc: Ke, + Jb: Le, + Fa: Me, + ja: Ne, + Ad: Oe, + nd: Pe, + R: Qe, + eb: Re, + Za: Se, + ec: Te, + ib: Ue, + E: Ve, + M: We, + Va: Xe, + ld: Ye, + ca: Ze, + nb: $e, + na: af, + dc: bf, + Ac: cf, + u: df, + L: ef, + td: ff, + Pb: gf, + oc: hf, + Bd: jf, + Ob: kf, + Lb: lf, + cb: mf, + zc: nf, + Rb: of, + Oa: pf, + Y: qf, + uc: rf, + J: sf, + gd: tf, + vb: uf, + sa: vf, + H: wf, + rb: xf, + Pa: yf, + Sa: zf, + sb: Af, + qb: Bf, + w: function(a) { + return a >>> 0; + }, + Dc: hc, + fa: function(a, b, c, d) { + return hc(a >>> 0, b >>> 0, c >>> 0, d >>> 0); + } + }, U = function() { + function a(c) { + U = c.exports; + U = zb(); + U = Df(); + ra = U.Dd; + za(); + Ba.unshift(U.Ed); + Da--; + 0 == Da && (null !== Ea && (clearInterval(Ea), Ea = null), Fa && (c = Fa, Fa = null, c())); + return U; + } + var b = { a: Cf }; + Da++; + if (g.instantiateWasm) + try { + return g.instantiateWasm(b, a); + } catch (c) { + w(`Module.instantiateWasm callback failed with error: ${c}`), ba(c); + } + Na(b, function(c) { + a(c.instance); + }).catch(ba); + return {}; + }(), ub = (a) => (ub = U.Fd)(a); + g._OrtInit = (a, b) => (g._OrtInit = U.Gd)(a, b); + g._OrtGetLastError = (a, b) => (g._OrtGetLastError = U.Hd)(a, b); + g._OrtCreateSessionOptions = (a, b, c, d, e, f, h, k, l, m) => (g._OrtCreateSessionOptions = U.Id)(a, b, c, d, e, f, h, k, l, m); + g._OrtAppendExecutionProvider = (a, b) => (g._OrtAppendExecutionProvider = U.Jd)(a, b); + g._OrtAddFreeDimensionOverride = (a, b, c) => (g._OrtAddFreeDimensionOverride = U.Kd)(a, b, c); + g._OrtAddSessionConfigEntry = (a, b, c) => (g._OrtAddSessionConfigEntry = U.Ld)(a, b, c); + g._OrtReleaseSessionOptions = (a) => (g._OrtReleaseSessionOptions = U.Md)(a); + g._OrtCreateSession = (a, b, c) => (g._OrtCreateSession = U.Nd)(a, b, c); + g._OrtReleaseSession = (a) => (g._OrtReleaseSession = U.Od)(a); + g._OrtGetInputOutputCount = (a, b, c) => (g._OrtGetInputOutputCount = U.Pd)(a, b, c); + g._OrtGetInputName = (a, b) => (g._OrtGetInputName = U.Qd)(a, b); + g._OrtGetOutputName = (a, b) => (g._OrtGetOutputName = U.Rd)(a, b); + g._OrtFree = (a) => (g._OrtFree = U.Sd)(a); + g._OrtCreateTensor = (a, b, c, d, e, f) => (g._OrtCreateTensor = U.Td)(a, b, c, d, e, f); + g._OrtGetTensorData = (a, b, c, d, e) => (g._OrtGetTensorData = U.Ud)(a, b, c, d, e); + g._OrtReleaseTensor = (a) => (g._OrtReleaseTensor = U.Vd)(a); + g._OrtCreateRunOptions = (a, b, c, d) => (g._OrtCreateRunOptions = U.Wd)(a, b, c, d); + g._OrtAddRunConfigEntry = (a, b, c) => (g._OrtAddRunConfigEntry = U.Xd)(a, b, c); + g._OrtReleaseRunOptions = (a) => (g._OrtReleaseRunOptions = U.Yd)(a); + g._OrtCreateBinding = (a) => (g._OrtCreateBinding = U.Zd)(a); + g._OrtBindInput = (a, b, c) => (g._OrtBindInput = U._d)(a, b, c); + g._OrtBindOutput = (a, b, c, d) => (g._OrtBindOutput = U.$d)(a, b, c, d); + g._OrtClearBoundOutputs = (a) => (g._OrtClearBoundOutputs = U.ae)(a); + g._OrtReleaseBinding = (a) => (g._OrtReleaseBinding = U.be)(a); + g._OrtRunWithBinding = (a, b, c, d, e) => (g._OrtRunWithBinding = U.ce)(a, b, c, d, e); + g._OrtRun = (a, b, c, d, e, f, h, k) => (g._OrtRun = U.de)(a, b, c, d, e, f, h, k); + g._OrtEndProfiling = (a) => (g._OrtEndProfiling = U.ee)(a); + g._JsepOutput = (a, b, c) => (g._JsepOutput = U.fe)(a, b, c); + g._JsepGetNodeName = (a) => (g._JsepGetNodeName = U.ge)(a); + var Jb = g._malloc = (a) => (Jb = g._malloc = U.he)(a), T = g._free = (a) => (T = g._free = U.ie)(a), X = (a, b) => (X = U.ke)(a, b), Ta = (a) => (Ta = U.le)(a), Y = (a) => (Y = U.me)(a), Ef = (a) => (Ef = U.ne)(a), Z = () => (Z = U.oe)(), mc = (a) => (mc = U.pe)(a), kc = (a) => (kc = U.qe)(a), Ua = (a, b, c) => (Ua = U.re)(a, b, c), lc = (a) => (lc = U.se)(a), dynCall_vii = g.dynCall_vii = (a, b, c) => (dynCall_vii = g.dynCall_vii = U.te)(a, b, c), Ff = g.dynCall_iiii = (a, b, c, d) => (Ff = g.dynCall_iiii = U.ue)(a, b, c, d), dynCall_iii = g.dynCall_iii = (a, b, c) => (dynCall_iii = g.dynCall_iii = U.ve)(a, b, c), Gf = g.dynCall_ii = (a, b) => (Gf = g.dynCall_ii = U.we)(a, b), Hf = g.dynCall_iiiiiii = (a, b, c, d, e, f, h) => (Hf = g.dynCall_iiiiiii = U.xe)(a, b, c, d, e, f, h), dynCall_vi = g.dynCall_vi = (a, b) => (dynCall_vi = g.dynCall_vi = U.ye)(a, b), dynCall_v = g.dynCall_v = (a) => (dynCall_v = g.dynCall_v = U.ze)(a), If = g.dynCall_iiiiii = (a, b, c, d, e, f) => (If = g.dynCall_iiiiii = U.Ae)(a, b, c, d, e, f), Jf = g.dynCall_iiij = (a, b, c, d) => (Jf = g.dynCall_iiij = U.Be)(a, b, c, d), Kf = g.dynCall_iiiii = (a, b, c, d, e) => (Kf = g.dynCall_iiiii = U.Ce)(a, b, c, d, e), Lf = g.dynCall_viii = (a, b, c, d) => (Lf = g.dynCall_viii = U.De)(a, b, c, d), Mf = g.dynCall_viiiii = (a, b, c, d, e, f) => (Mf = g.dynCall_viiiii = U.Ee)(a, b, c, d, e, f), Nf = g.dynCall_viiii = (a, b, c, d, e) => (Nf = g.dynCall_viiii = U.Fe)(a, b, c, d, e), Of = g.dynCall_viiiiii = (a, b, c, d, e, f, h) => (Of = g.dynCall_viiiiii = U.Ge)(a, b, c, d, e, f, h), Pf = g.dynCall_viiji = (a, b, c, d, e) => (Pf = g.dynCall_viiji = U.He)(a, b, c, d, e), Qf = g.dynCall_viiiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q) => (Qf = g.dynCall_viiiiiiiiiii = U.Ie)(a, b, c, d, e, f, h, k, l, m, n, q), Rf = g.dynCall_viiijjjii = (a, b, c, d, e, f, h, k, l) => (Rf = g.dynCall_viiijjjii = U.Je)(a, b, c, d, e, f, h, k, l), Sf = g.dynCall_iid = (a, b, c) => (Sf = g.dynCall_iid = U.Ke)(a, b, c), Tf = g.dynCall_iif = (a, b, c) => (Tf = g.dynCall_iif = U.Le)(a, b, c), Uf = g.dynCall_iij = (a, b, c) => (Uf = g.dynCall_iij = U.Me)(a, b, c), Vf = g.dynCall_jii = (a, b, c) => (Vf = g.dynCall_jii = U.Ne)(a, b, c), Wf = g.dynCall_i = (a) => (Wf = g.dynCall_i = U.Oe)(a), Xf = g.dynCall_viiiiiiii = (a, b, c, d, e, f, h, k, l) => (Xf = g.dynCall_viiiiiiii = U.Pe)(a, b, c, d, e, f, h, k, l), Yf = g.dynCall_viiiiij = (a, b, c, d, e, f, h) => (Yf = g.dynCall_viiiiij = U.Qe)(a, b, c, d, e, f, h), Zf = g.dynCall_ji = (a, b) => (Zf = g.dynCall_ji = U.Re)(a, b), $f = g.dynCall_viij = (a, b, c, d) => ($f = g.dynCall_viij = U.Se)(a, b, c, d), ag = g.dynCall_iiiiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q) => (ag = g.dynCall_iiiiiiiiiiii = U.Te)(a, b, c, d, e, f, h, k, l, m, n, q), bg = g.dynCall_viiiiiiiii = (a, b, c, d, e, f, h, k, l, m) => (bg = g.dynCall_viiiiiiiii = U.Ue)(a, b, c, d, e, f, h, k, l, m), cg = g.dynCall_ij = (a, b) => (cg = g.dynCall_ij = U.Ve)(a, b), dg = g.dynCall_iiiiij = (a, b, c, d, e, f) => (dg = g.dynCall_iiiiij = U.We)(a, b, c, d, e, f), eg = g.dynCall_j = (a) => (eg = g.dynCall_j = U.Xe)(a), fg = g.dynCall_vij = (a, b, c) => (fg = g.dynCall_vij = U.Ye)(a, b, c), gg = g.dynCall_viijjjiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q) => (gg = g.dynCall_viijjjiiiiii = U.Ze)(a, b, c, d, e, f, h, k, l, m, n, q), hg = g.dynCall_viiijiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q) => (hg = g.dynCall_viiijiiiiiii = U._e)(a, b, c, d, e, f, h, k, l, m, n, q), ig = g.dynCall_iiiiiiii = (a, b, c, d, e, f, h, k) => (ig = g.dynCall_iiiiiiii = U.$e)(a, b, c, d, e, f, h, k), jg = g.dynCall_viiiiiii = (a, b, c, d, e, f, h, k) => (jg = g.dynCall_viiiiiii = U.af)(a, b, c, d, e, f, h, k), kg = g.dynCall_iiiiiiiij = (a, b, c, d, e, f, h, k, l) => (kg = g.dynCall_iiiiiiiij = U.bf)(a, b, c, d, e, f, h, k, l), lg = g.dynCall_viiiiiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p) => (lg = g.dynCall_viiiiiiiiiiiii = U.cf)(a, b, c, d, e, f, h, k, l, m, n, q, r, p), mg = g.dynCall_iiiiiiiii = (a, b, c, d, e, f, h, k, l) => (mg = g.dynCall_iiiiiiiii = U.df)(a, b, c, d, e, f, h, k, l), ng = g.dynCall_iiiiijiiiii = (a, b, c, d, e, f, h, k, l, m, n) => (ng = g.dynCall_iiiiijiiiii = U.ef)(a, b, c, d, e, f, h, k, l, m, n), og = g.dynCall_vijjjiiij = (a, b, c, d, e, f, h, k, l) => (og = g.dynCall_vijjjiiij = U.ff)(a, b, c, d, e, f, h, k, l), pg = g.dynCall_fi = (a, b) => (pg = g.dynCall_fi = U.gf)(a, b), qg = g.dynCall_fii = (a, b, c) => (qg = g.dynCall_fii = U.hf)(a, b, c), rg = g.dynCall_di = (a, b) => (rg = g.dynCall_di = U.jf)(a, b), sg = g.dynCall_dii = (a, b, c) => (sg = g.dynCall_dii = U.kf)(a, b, c), tg = g.dynCall_vijj = (a, b, c, d) => (tg = g.dynCall_vijj = U.lf)(a, b, c, d), ug = g.dynCall_iiiiiiiiii = (a, b, c, d, e, f, h, k, l, m) => (ug = g.dynCall_iiiiiiiiii = U.mf)(a, b, c, d, e, f, h, k, l, m), vg = g.dynCall_viijiii = (a, b, c, d, e, f, h) => (vg = g.dynCall_viijiii = U.nf)(a, b, c, d, e, f, h), wg = g.dynCall_viid = (a, b, c, d) => (wg = g.dynCall_viid = U.of)(a, b, c, d), xg = g.dynCall_viffiii = (a, b, c, d, e, f, h) => (xg = g.dynCall_viffiii = U.pf)(a, b, c, d, e, f, h), yg = g.dynCall_viifiii = (a, b, c, d, e, f, h) => (yg = g.dynCall_viifiii = U.qf)(a, b, c, d, e, f, h), zg = g.dynCall_viiiiidiidi = (a, b, c, d, e, f, h, k, l, m, n) => (zg = g.dynCall_viiiiidiidi = U.rf)(a, b, c, d, e, f, h, k, l, m, n), Ag = g.dynCall_viiiiiiiiidi = (a, b, c, d, e, f, h, k, l, m, n, q) => (Ag = g.dynCall_viiiiiiiiidi = U.sf)(a, b, c, d, e, f, h, k, l, m, n, q), Bg = g.dynCall_jiii = (a, b, c, d) => (Bg = g.dynCall_jiii = U.tf)(a, b, c, d), Cg = g.dynCall_vjiiiiii = (a, b, c, d, e, f, h, k) => (Cg = g.dynCall_vjiiiiii = U.uf)(a, b, c, d, e, f, h, k), Dg = g.dynCall_viiid = (a, b, c, d, e) => (Dg = g.dynCall_viiid = U.vf)(a, b, c, d, e), Eg = g.dynCall_viiiiiiiiiji = (a, b, c, d, e, f, h, k, l, m, n, q) => (Eg = g.dynCall_viiiiiiiiiji = U.wf)(a, b, c, d, e, f, h, k, l, m, n, q), Fg = g.dynCall_viji = (a, b, c, d) => (Fg = g.dynCall_viji = U.xf)(a, b, c, d), Gg = g.dynCall_vijjjjjjjjjjjjji = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y) => (Gg = g.dynCall_vijjjjjjjjjjjjji = U.yf)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y), Hg = g.dynCall_viiiji = (a, b, c, d, e, f) => (Hg = g.dynCall_viiiji = U.zf)(a, b, c, d, e, f), Ig = g.dynCall_vijjjiiji = (a, b, c, d, e, f, h, k, l) => (Ig = g.dynCall_vijjjiiji = U.Af)(a, b, c, d, e, f, h, k, l), Jg = g.dynCall_iiiji = (a, b, c, d, e) => (Jg = g.dynCall_iiiji = U.Bf)(a, b, c, d, e), Kg = g.dynCall_iiijiiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p) => (Kg = g.dynCall_iiijiiiiiiiiii = U.Cf)(a, b, c, d, e, f, h, k, l, m, n, q, r, p), Lg = g.dynCall_vj = (a, b) => (Lg = g.dynCall_vj = U.Df)(a, b), Mg = g.dynCall_jjj = (a, b, c) => (Mg = g.dynCall_jjj = U.Ef)(a, b, c), Ng = g.dynCall_iiijiiiiii = (a, b, c, d, e, f, h, k, l, m) => (Ng = g.dynCall_iiijiiiiii = U.Ff)(a, b, c, d, e, f, h, k, l, m), Og = g.dynCall_vfiii = (a, b, c, d, e) => (Og = g.dynCall_vfiii = U.Gf)(a, b, c, d, e), Pg = g.dynCall_viiiiff = (a, b, c, d, e, f, h) => (Pg = g.dynCall_viiiiff = U.Hf)(a, b, c, d, e, f, h), Qg = g.dynCall_viiiiiff = (a, b, c, d, e, f, h, k) => (Qg = g.dynCall_viiiiiff = U.If)(a, b, c, d, e, f, h, k), Rg = g.dynCall_viiff = (a, b, c, d, e) => (Rg = g.dynCall_viiff = U.Jf)(a, b, c, d, e), Sg = g.dynCall_viiiiiiiiifiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p) => (Sg = g.dynCall_viiiiiiiiifiii = U.Kf)(a, b, c, d, e, f, h, k, l, m, n, q, r, p), Tg = g.dynCall_viiiiiiiijj = (a, b, c, d, e, f, h, k, l, m, n) => (Tg = g.dynCall_viiiiiiiijj = U.Lf)(a, b, c, d, e, f, h, k, l, m, n), Ug = g.dynCall_iiiiiiiiiiiiiifii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A) => (Ug = g.dynCall_iiiiiiiiiiiiiifii = U.Mf)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A), Vg = g.dynCall_viiiiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r) => (Vg = g.dynCall_viiiiiiiiiiii = U.Nf)(a, b, c, d, e, f, h, k, l, m, n, q, r), Wg = g.dynCall_iiiiiiiiiiiiiiiiifii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D) => (Wg = g.dynCall_iiiiiiiiiiiiiiiiifii = U.Of)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D), Xg = g.dynCall_vijjiiiiii = (a, b, c, d, e, f, h, k, l, m) => (Xg = g.dynCall_vijjiiiiii = U.Pf)(a, b, c, d, e, f, h, k, l, m), Yg = g.dynCall_iiiijjj = (a, b, c, d, e, f, h) => (Yg = g.dynCall_iiiijjj = U.Qf)(a, b, c, d, e, f, h), Zg = g.dynCall_viiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n) => (Zg = g.dynCall_viiiiiiiiii = U.Rf)(a, b, c, d, e, f, h, k, l, m, n), $g = g.dynCall_iiijjj = (a, b, c, d, e, f) => ($g = g.dynCall_iiijjj = U.Sf)(a, b, c, d, e, f), ah = g.dynCall_fffffff = (a, b, c, d, e, f, h) => (ah = g.dynCall_fffffff = U.Tf)(a, b, c, d, e, f, h), bh = g.dynCall_viiiij = (a, b, c, d, e, f) => (bh = g.dynCall_viiiij = U.Uf)(a, b, c, d, e, f), ch = g.dynCall_viiiiiijiifiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p) => (ch = g.dynCall_viiiiiijiifiii = U.Vf)(a, b, c, d, e, f, h, k, l, m, n, q, r, p), dh = g.dynCall_vjjjjjjffjifiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B) => (dh = g.dynCall_vjjjjjjffjifiiiiii = U.Wf)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B), eh = g.dynCall_viiiiiiffjifiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A) => (eh = g.dynCall_viiiiiiffjifiiiii = U.Xf)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A), fh = g.dynCall_viiiiiiffjfiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y) => (fh = g.dynCall_viiiiiiffjfiiiii = U.Yf)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y), gh = g.dynCall_viiiiiiffjiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u) => (gh = g.dynCall_viiiiiiffjiiiii = U.Zf)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u), hh = g.dynCall_vjjjjjjjjfffiiifiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F) => (hh = g.dynCall_vjjjjjjjjfffiiifiiiii = U._f)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F), ih = g.dynCall_vjjjjjjfffifijiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C) => (ih = g.dynCall_vjjjjjjfffifijiiiii = U.$f)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C), jh = g.dynCall_vjjjjjjfffifiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B) => (jh = g.dynCall_vjjjjjjfffifiiiiii = U.ag)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B), kh = g.dynCall_vjjjjjjjjfffjifiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F) => (kh = g.dynCall_vjjjjjjjjfffjifiiiiii = U.bg)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F), lh = g.dynCall_vijiiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r) => (lh = g.dynCall_vijiiiiiiiiii = U.cg)(a, b, c, d, e, f, h, k, l, m, n, q, r), mh = g.dynCall_vijjfffiii = (a, b, c, d, e, f, h, k, l, m) => (mh = g.dynCall_vijjfffiii = U.dg)(a, b, c, d, e, f, h, k, l, m), nh = g.dynCall_viiiiiiijiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r) => (nh = g.dynCall_viiiiiiijiiii = U.eg)(a, b, c, d, e, f, h, k, l, m, n, q, r), oh = g.dynCall_vijjjjjjifiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u) => (oh = g.dynCall_vijjjjjjifiiiii = U.fg)( + a, + b, + c, + d, + e, + f, + h, + k, + l, + m, + n, + q, + r, + p, + u + ), ph = g.dynCall_viifi = (a, b, c, d, e) => (ph = g.dynCall_viifi = U.gg)(a, b, c, d, e), qh = g.dynCall_vjjjjjiiii = (a, b, c, d, e, f, h, k, l, m) => (qh = g.dynCall_vjjjjjiiii = U.hg)(a, b, c, d, e, f, h, k, l, m), rh = g.dynCall_vjjjjfiii = (a, b, c, d, e, f, h, k, l) => (rh = g.dynCall_vjjjjfiii = U.ig)(a, b, c, d, e, f, h, k, l), sh = g.dynCall_viiiiiijiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p) => (sh = g.dynCall_viiiiiijiiiiii = U.jg)(a, b, c, d, e, f, h, k, l, m, n, q, r, p), th = g.dynCall_vijjii = (a, b, c, d, e, f) => (th = g.dynCall_vijjii = U.kg)(a, b, c, d, e, f), uh = g.dynCall_viiiiijjiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r) => (uh = g.dynCall_viiiiijjiiiii = U.lg)(a, b, c, d, e, f, h, k, l, m, n, q, r), vh = g.dynCall_iiiiiji = (a, b, c, d, e, f, h) => (vh = g.dynCall_iiiiiji = U.mg)(a, b, c, d, e, f, h), wh = g.dynCall_iiiiji = (a, b, c, d, e, f) => (wh = g.dynCall_iiiiji = U.ng)(a, b, c, d, e, f), xh = g.dynCall_viiiiijiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r) => (xh = g.dynCall_viiiiijiiiiii = U.og)(a, b, c, d, e, f, h, k, l, m, n, q, r), yh = g.dynCall_viiijiiiiii = (a, b, c, d, e, f, h, k, l, m, n) => (yh = g.dynCall_viiijiiiiii = U.pg)(a, b, c, d, e, f, h, k, l, m, n), zh = g.dynCall_viijj = (a, b, c, d, e) => (zh = g.dynCall_viijj = U.qg)(a, b, c, d, e), Ah = g.dynCall_viiiijii = (a, b, c, d, e, f, h, k) => (Ah = g.dynCall_viiiijii = U.rg)(a, b, c, d, e, f, h, k), Bh = g.dynCall_viijjiii = (a, b, c, d, e, f, h, k) => (Bh = g.dynCall_viijjiii = U.sg)(a, b, c, d, e, f, h, k), Ch = g.dynCall_ijii = (a, b, c, d) => (Ch = g.dynCall_ijii = U.tg)(a, b, c, d), Dh = g.dynCall_viiiiijjji = (a, b, c, d, e, f, h, k, l, m) => (Dh = g.dynCall_viiiiijjji = U.ug)(a, b, c, d, e, f, h, k, l, m), Eh = g.dynCall_vijjjjiij = (a, b, c, d, e, f, h, k, l) => (Eh = g.dynCall_vijjjjiij = U.vg)(a, b, c, d, e, f, h, k, l), Fh = g.dynCall_viiiiijij = (a, b, c, d, e, f, h, k, l) => (Fh = g.dynCall_viiiiijij = U.wg)(a, b, c, d, e, f, h, k, l), Gh = g.dynCall_viiiiiijij = (a, b, c, d, e, f, h, k, l, m) => (Gh = g.dynCall_viiiiiijij = U.xg)(a, b, c, d, e, f, h, k, l, m), Hh = g.dynCall_vijiii = (a, b, c, d, e, f) => (Hh = g.dynCall_vijiii = U.yg)(a, b, c, d, e, f), Ih = g.dynCall_viiiiiiiiifi = (a, b, c, d, e, f, h, k, l, m, n, q) => (Ih = g.dynCall_viiiiiiiiifi = U.zg)(a, b, c, d, e, f, h, k, l, m, n, q), Jh = g.dynCall_iiijiiii = (a, b, c, d, e, f, h, k) => (Jh = g.dynCall_iiijiiii = U.Ag)(a, b, c, d, e, f, h, k), Kh = g.dynCall_viiiiiijjiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p) => (Kh = g.dynCall_viiiiiijjiiiii = U.Bg)( + a, + b, + c, + d, + e, + f, + h, + k, + l, + m, + n, + q, + r, + p + ), Lh = g.dynCall_viiiiiiijiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u) => (Lh = g.dynCall_viiiiiiijiiiiii = U.Cg)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u), Mh = g.dynCall_vif = (a, b, c) => (Mh = g.dynCall_vif = U.Dg)(a, b, c), Nh = g.dynCall_viif = (a, b, c, d) => (Nh = g.dynCall_viif = U.Eg)(a, b, c, d), Oh = g.dynCall_fiii = (a, b, c, d) => (Oh = g.dynCall_fiii = U.Fg)(a, b, c, d), Ph = g.dynCall_diii = (a, b, c, d) => (Ph = g.dynCall_diii = U.Gg)(a, b, c, d), Qh = g.dynCall_viiiiiifii = (a, b, c, d, e, f, h, k, l, m) => (Qh = g.dynCall_viiiiiifii = U.Hg)(a, b, c, d, e, f, h, k, l, m), Rh = g.dynCall_viiiiijiiiiiiiiiiiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F, H, K, Va, Wa, Xa) => (Rh = g.dynCall_viiiiijiiiiiiiiiiiiiiiiiii = U.Ig)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F, H, K, Va, Wa, Xa), Sh = g.dynCall_viijji = (a, b, c, d, e, f) => (Sh = g.dynCall_viijji = U.Jg)(a, b, c, d, e, f), Th = g.dynCall_iiiiiiiiiiiji = (a, b, c, d, e, f, h, k, l, m, n, q, r) => (Th = g.dynCall_iiiiiiiiiiiji = U.Kg)(a, b, c, d, e, f, h, k, l, m, n, q, r), Uh = g.dynCall_viifiifijjjii = (a, b, c, d, e, f, h, k, l, m, n, q, r) => (Uh = g.dynCall_viifiifijjjii = U.Lg)(a, b, c, d, e, f, h, k, l, m, n, q, r), Vh = g.dynCall_viiiiiiiiiiiiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F) => (Vh = g.dynCall_viiiiiiiiiiiiiiiiiiii = U.Mg)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F), Wh = g.dynCall_viiiiifiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r) => (Wh = g.dynCall_viiiiifiiiiii = U.Ng)(a, b, c, d, e, f, h, k, l, m, n, q, r), Xh = g.dynCall_vijiiiiiiijjii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p) => (Xh = g.dynCall_vijiiiiiiijjii = U.Og)(a, b, c, d, e, f, h, k, l, m, n, q, r, p), Yh = g.dynCall_viiiiiiiiiiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C) => (Yh = g.dynCall_viiiiiiiiiiiiiiiiii = U.Pg)( + a, + b, + c, + d, + e, + f, + h, + k, + l, + m, + n, + q, + r, + p, + u, + y, + A, + B, + C + ), Zh = g.dynCall_viiiiiiiiiiiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D) => (Zh = g.dynCall_viiiiiiiiiiiiiiiiiii = U.Qg)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D), $h = g.dynCall_viiiiiiiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y) => ($h = g.dynCall_viiiiiiiiiiiiiii = U.Rg)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y), ai = g.dynCall_viiiiiiiiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A) => (ai = g.dynCall_viiiiiiiiiiiiiiii = U.Sg)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A), bi = g.dynCall_viiiijjj = (a, b, c, d, e, f, h, k) => (bi = g.dynCall_viiiijjj = U.Tg)(a, b, c, d, e, f, h, k), ci = g.dynCall_iiiiid = (a, b, c, d, e, f) => (ci = g.dynCall_iiiiid = U.Ug)(a, b, c, d, e, f), di = g.dynCall_viiiiiiijjj = (a, b, c, d, e, f, h, k, l, m, n) => (di = g.dynCall_viiiiiiijjj = U.Vg)(a, b, c, d, e, f, h, k, l, m, n), ei = g.dynCall_iiiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n) => (ei = g.dynCall_iiiiiiiiiii = U.Wg)(a, b, c, d, e, f, h, k, l, m, n), fi = g.dynCall_iiiiiiiiiiiiiiiiiifi = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D) => (fi = g.dynCall_iiiiiiiiiiiiiiiiiifi = U.Xg)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D), gi = g.dynCall_viiif = (a, b, c, d, e) => (gi = g.dynCall_viiif = U.Yg)(a, b, c, d, e), hi = g.dynCall_viiijiiiii = (a, b, c, d, e, f, h, k, l, m) => (hi = g.dynCall_viiijiiiii = U.Zg)(a, b, c, d, e, f, h, k, l, m), ii = g.dynCall_viiij = (a, b, c, d, e) => (ii = g.dynCall_viiij = U._g)(a, b, c, d, e), ji = g.dynCall_iijjj = (a, b, c, d, e) => (ji = g.dynCall_iijjj = U.$g)(a, b, c, d, e), ki = g.dynCall_viiiiji = (a, b, c, d, e, f, h) => (ki = g.dynCall_viiiiji = U.ah)(a, b, c, d, e, f, h), li = g.dynCall_iijjji = (a, b, c, d, e, f) => (li = g.dynCall_iijjji = U.bh)(a, b, c, d, e, f), mi = g.dynCall_ijijji = (a, b, c, d, e, f) => (mi = g.dynCall_ijijji = U.ch)( + a, + b, + c, + d, + e, + f + ), ni = g.dynCall_viiijjiii = (a, b, c, d, e, f, h, k, l) => (ni = g.dynCall_viiijjiii = U.dh)(a, b, c, d, e, f, h, k, l), oi = g.dynCall_iiiiijji = (a, b, c, d, e, f, h, k) => (oi = g.dynCall_iiiiijji = U.eh)(a, b, c, d, e, f, h, k), pi = g.dynCall_iiiifi = (a, b, c, d, e, f) => (pi = g.dynCall_iiiifi = U.fh)(a, b, c, d, e, f), qi = g.dynCall_iiijii = (a, b, c, d, e, f) => (qi = g.dynCall_iiijii = U.gh)(a, b, c, d, e, f), ri = g.dynCall_iiiiiiiiijii = (a, b, c, d, e, f, h, k, l, m, n, q) => (ri = g.dynCall_iiiiiiiiijii = U.hh)(a, b, c, d, e, f, h, k, l, m, n, q), si = g.dynCall_iiiijjii = (a, b, c, d, e, f, h, k) => (si = g.dynCall_iiiijjii = U.ih)(a, b, c, d, e, f, h, k), ti = g.dynCall_iiiiiijjjii = (a, b, c, d, e, f, h, k, l, m, n) => (ti = g.dynCall_iiiiiijjjii = U.jh)(a, b, c, d, e, f, h, k, l, m, n), ui = g.dynCall_iiijiii = (a, b, c, d, e, f, h) => (ui = g.dynCall_iiijiii = U.kh)(a, b, c, d, e, f, h), vi = g.dynCall_iiiiiiiijjjfi = (a, b, c, d, e, f, h, k, l, m, n, q, r) => (vi = g.dynCall_iiiiiiiijjjfi = U.lh)(a, b, c, d, e, f, h, k, l, m, n, q, r), wi = g.dynCall_iijiiii = (a, b, c, d, e, f, h) => (wi = g.dynCall_iijiiii = U.mh)(a, b, c, d, e, f, h), xi = g.dynCall_iijjjii = (a, b, c, d, e, f, h) => (xi = g.dynCall_iijjjii = U.nh)(a, b, c, d, e, f, h), yi = g.dynCall_jij = (a, b, c) => (yi = g.dynCall_jij = U.oh)(a, b, c), zi = g.dynCall_iiji = (a, b, c, d) => (zi = g.dynCall_iiji = U.ph)(a, b, c, d), Ai = g.dynCall_iiif = (a, b, c, d) => (Ai = g.dynCall_iiif = U.qh)(a, b, c, d), Bi = g.dynCall_vidi = (a, b, c, d) => (Bi = g.dynCall_vidi = U.rh)(a, b, c, d), Ci = g.dynCall_vjiii = (a, b, c, d, e) => (Ci = g.dynCall_vjiii = U.sh)(a, b, c, d, e), Di = g.dynCall_diiii = (a, b, c, d, e) => (Di = g.dynCall_diiii = U.th)(a, b, c, d, e), Ei = g.dynCall_diiiii = (a, b, c, d, e, f) => (Ei = g.dynCall_diiiii = U.uh)(a, b, c, d, e, f), Fi = g.dynCall_viiijjiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q) => (Fi = g.dynCall_viiijjiiiiii = U.vh)(a, b, c, d, e, f, h, k, l, m, n, q), Gi = g.dynCall_viijjijjjjiii = (a, b, c, d, e, f, h, k, l, m, n, q, r) => (Gi = g.dynCall_viijjijjjjiii = U.wh)(a, b, c, d, e, f, h, k, l, m, n, q, r), Hi = g.dynCall_iiiij = (a, b, c, d, e) => (Hi = g.dynCall_iiiij = U.xh)(a, b, c, d, e), Ii = g.dynCall_viiijii = (a, b, c, d, e, f, h) => (Ii = g.dynCall_viiijii = U.yh)(a, b, c, d, e, f, h), Ji = g.dynCall_viijiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r) => (Ji = g.dynCall_viijiiiiiiiii = U.zh)(a, b, c, d, e, f, h, k, l, m, n, q, r), Ki = g.dynCall_fiiii = (a, b, c, d, e) => (Ki = g.dynCall_fiiii = U.Ah)(a, b, c, d, e), Li = g.dynCall_jfi = (a, b, c) => (Li = g.dynCall_jfi = U.Bh)(a, b, c), Mi = g.dynCall_viiiiiiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u) => (Mi = g.dynCall_viiiiiiiiiiiiii = U.Ch)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u), Ni = g.dynCall_jiij = (a, b, c, d) => (Ni = g.dynCall_jiij = U.Dh)(a, b, c, d), Mb = (a) => (Mb = U.Eh)(a), Bb = () => (Bb = U.Fh)(), Lb = (a) => (Lb = U.Gh)(a), Nb = () => (Nb = U.Hh)(); + g.___start_em_js = 1275044; + g.___stop_em_js = 1275205; + function Dc(a, b, c, d) { + var e = Z(); + try { + return Ff(a, b, c, d); + } catch (f) { + Y(e); + if (f !== f + 0) + throw f; + X(1, 0); + } + } + function Bc(a, b, c) { + var d = Z(); + try { + return dynCall_iii(a, b, c); + } catch (e) { + Y(d); + if (e !== e + 0) + throw e; + X(1, 0); + } + } + function Jd(a, b, c) { + var d = Z(); + try { + dynCall_vii(a, b, c); + } catch (e) { + Y(d); + if (e !== e + 0) + throw e; + X(1, 0); + } + } + function yc(a, b) { + var c = Z(); + try { + return Gf(a, b); + } catch (d) { + Y(c); + if (d !== d + 0) + throw d; + X(1, 0); + } + } + function Fd(a, b) { + var c = Z(); + try { + dynCall_vi(a, b); + } catch (d) { + Y(c); + if (d !== d + 0) + throw d; + X(1, 0); + } + } + function dd(a, b, c, d) { + var e = Z(); + try { + return Jf(a, b, c, d); + } catch (f) { + Y(e); + if (f !== f + 0) + throw f; + X(1, 0); + } + } + function Dd(a) { + var b = Z(); + try { + dynCall_v(a); + } catch (c) { + Y(b); + if (c !== c + 0) + throw c; + X(1, 0); + } + } + function Ic(a, b, c, d, e, f, h) { + var k = Z(); + try { + return Hf(a, b, c, d, e, f, h); + } catch (l) { + Y(k); + if (l !== l + 0) + throw l; + X(1, 0); + } + } + function Hc(a, b, c, d, e, f) { + var h = Z(); + try { + return If(a, b, c, d, e, f); + } catch (k) { + Y(h); + if (k !== k + 0) + throw k; + X(1, 0); + } + } + function Fc(a, b, c, d, e) { + var f = Z(); + try { + return Kf(a, b, c, d, e); + } catch (h) { + Y(f); + if (h !== h + 0) + throw h; + X(1, 0); + } + } + function Qd(a, b, c, d) { + var e = Z(); + try { + Lf(a, b, c, d); + } catch (f) { + Y(e); + if (f !== f + 0) + throw f; + X(1, 0); + } + } + function Td(a, b, c, d, e) { + var f = Z(); + try { + Nf(a, b, c, d, e); + } catch (h) { + Y(f); + if (h !== h + 0) + throw h; + X(1, 0); + } + } + function Vd(a, b, c, d, e, f) { + var h = Z(); + try { + Mf(a, b, c, d, e, f); + } catch (k) { + Y(h); + if (k !== k + 0) + throw k; + X(1, 0); + } + } + function Zd(a, b, c, d, e, f, h) { + var k = Z(); + try { + Of(a, b, c, d, e, f, h); + } catch (l) { + Y(k); + if (l !== l + 0) + throw l; + X(1, 0); + } + } + function ke(a, b, c, d, e, f, h, k, l, m, n, q) { + var r = Z(); + try { + Qf(a, b, c, d, e, f, h, k, l, m, n, q); + } catch (p) { + Y(r); + if (p !== p + 0) + throw p; + X(1, 0); + } + } + function zc(a, b, c) { + var d = Z(); + try { + return Sf(a, b, c); + } catch (e) { + Y(d); + if (e !== e + 0) + throw e; + X(1, 0); + } + } + function Ac(a, b, c) { + var d = Z(); + try { + return Tf(a, b, c); + } catch (e) { + Y(d); + if (e !== e + 0) + throw e; + X(1, 0); + } + } + function md(a, b, c) { + var d = Z(); + try { + return Uf(a, b, c); + } catch (e) { + Y(d); + if (e !== e + 0) + throw e; + X(1, 0); + } + } + function yd(a, b, c) { + var d = Z(); + try { + return Vf(a, b, c); + } catch (e) { + Y(d); + if (e !== e + 0) + throw e; + X(1, 0); + return 0n; + } + } + function ee(a, b, c, d, e, f, h, k, l) { + var m = Z(); + try { + Xf(a, b, c, d, e, f, h, k, l); + } catch (n) { + Y(m); + if (n !== n + 0) + throw n; + X(1, 0); + } + } + function xc(a) { + var b = Z(); + try { + return Wf(a); + } catch (c) { + Y(b); + if (c !== c + 0) + throw c; + X(1, 0); + } + } + function df(a, b, c) { + var d = Z(); + try { + fg(a, b, c); + } catch (e) { + Y(d); + if (e !== e + 0) + throw e; + X(1, 0); + } + } + function We(a, b, c, d, e) { + var f = Z(); + try { + Pf(a, b, c, d, e); + } catch (h) { + Y(f); + if (h !== h + 0) + throw h; + X(1, 0); + } + } + function Ce(a, b, c, d, e, f, h) { + var k = Z(); + try { + Yf(a, b, c, d, e, f, h); + } catch (l) { + Y(k); + if (l !== l + 0) + throw l; + X(1, 0); + } + } + function cf(a, b, c, d, e, f, h, k, l, m, n, q) { + var r = Z(); + try { + gg(a, b, c, d, e, f, h, k, l, m, n, q); + } catch (p) { + Y(r); + if (p !== p + 0) + throw p; + X(1, 0); + } + } + function Re(a, b, c, d, e, f, h, k, l, m, n, q) { + var r = Z(); + try { + hg(a, b, c, d, e, f, h, k, l, m, n, q); + } catch (p) { + Y(r); + if (p !== p + 0) + throw p; + X(1, 0); + } + } + function Jc(a, b, c, d, e, f, h, k) { + var l = Z(); + try { + return ig(a, b, c, d, e, f, h, k); + } catch (m) { + Y(l); + if (m !== m + 0) + throw m; + X(1, 0); + } + } + function de(a, b, c, d, e, f, h, k) { + var l = Z(); + try { + jg(a, b, c, d, e, f, h, k); + } catch (m) { + Y(l); + if (m !== m + 0) + throw m; + X(1, 0); + } + } + function Nc(a, b, c, d, e, f, h, k, l, m, n, q) { + var r = Z(); + try { + return ag(a, b, c, d, e, f, h, k, l, m, n, q); + } catch (p) { + Y(r); + if (p !== p + 0) + throw p; + X(1, 0); + } + } + function fe(a, b, c, d, e, f, h, k, l, m) { + var n = Z(); + try { + bg(a, b, c, d, e, f, h, k, l, m); + } catch (q) { + Y(n); + if (q !== q + 0) + throw q; + X(1, 0); + } + } + function me(a, b, c, d, e, f, h, k, l, m, n, q, r, p) { + var u = Z(); + try { + lg(a, b, c, d, e, f, h, k, l, m, n, q, r, p); + } catch (y) { + Y(u); + if (y !== y + 0) + throw y; + X(1, 0); + } + } + function Yc(a, b, c, d, e, f, h, k, l, m, n) { + var q = Z(); + try { + return ng(a, b, c, d, e, f, h, k, l, m, n); + } catch (r) { + Y(q); + if (r !== r + 0) + throw r; + X(1, 0); + } + } + function Kc(a, b, c, d, e, f, h, k, l) { + var m = Z(); + try { + return mg(a, b, c, d, e, f, h, k, l); + } catch (n) { + Y(m); + if (n !== n + 0) + throw n; + X(1, 0); + } + } + function nf(a, b, c, d, e, f, h, k, l) { + var m = Z(); + try { + og(a, b, c, d, e, f, h, k, l); + } catch (n) { + Y(m); + if (n !== n + 0) + throw n; + X(1, 0); + } + } + function Ve(a, b, c, d) { + var e = Z(); + try { + $f(a, b, c, d); + } catch (f) { + Y(e); + if (f !== f + 0) + throw f; + X(1, 0); + } + } + function tc(a, b) { + var c = Z(); + try { + return pg(a, b); + } catch (d) { + Y(c); + if (d !== d + 0) + throw d; + X(1, 0); + } + } + function xd(a, b) { + var c = Z(); + try { + return Zf(a, b); + } catch (d) { + Y(c); + if (d !== d + 0) + throw d; + X(1, 0); + return 0n; + } + } + function nc(a, b) { + var c = Z(); + try { + return rg(a, b); + } catch (d) { + Y(c); + if (d !== d + 0) + throw d; + X(1, 0); + } + } + function jf(a, b, c, d) { + var e = Z(); + try { + tg(a, b, c, d); + } catch (f) { + Y(e); + if (f !== f + 0) + throw f; + X(1, 0); + } + } + function Oe(a, b, c, d, e, f, h) { + var k = Z(); + try { + Ii(a, b, c, d, e, f, h); + } catch (l) { + Y(k); + if (l !== l + 0) + throw l; + X(1, 0); + } + } + function Ze(a, b, c, d, e) { + var f = Z(); + try { + zh(a, b, c, d, e); + } catch (h) { + Y(f); + if (h !== h + 0) + throw h; + X(1, 0); + } + } + function Lc(a, b, c, d, e, f, h, k, l, m) { + var n = Z(); + try { + return ug(a, b, c, d, e, f, h, k, l, m); + } catch (q) { + Y(n); + if (q !== q + 0) + throw q; + X(1, 0); + } + } + function Xe(a, b, c, d, e, f, h) { + var k = Z(); + try { + vg(a, b, c, d, e, f, h); + } catch (l) { + Y(k); + if (l !== l + 0) + throw l; + X(1, 0); + } + } + function Kd(a, b, c, d) { + var e = Z(); + try { + wg(a, b, c, d); + } catch (f) { + Y(e); + if (f !== f + 0) + throw f; + X(1, 0); + } + } + function Tc(a, b, c, d, e, f, h, k, l) { + var m = Z(); + try { + return kg(a, b, c, d, e, f, h, k, l); + } catch (n) { + Y(m); + if (n !== n + 0) + throw n; + X(1, 0); + } + } + function Id(a, b, c, d, e, f, h) { + var k = Z(); + try { + xg(a, b, c, d, e, f, h); + } catch (l) { + Y(k); + if (l !== l + 0) + throw l; + X(1, 0); + } + } + function Ue(a, b, c, d, e, f, h, k, l) { + var m = Z(); + try { + Rf(a, b, c, d, e, f, h, k, l); + } catch (n) { + Y(m); + if (n !== n + 0) + throw n; + X(1, 0); + } + } + function Pd(a, b, c, d, e, f, h) { + var k = Z(); + try { + yg(a, b, c, d, e, f, h); + } catch (l) { + Y(k); + if (l !== l + 0) + throw l; + X(1, 0); + } + } + function Wd(a, b, c, d, e, f, h, k, l, m, n) { + var q = Z(); + try { + zg(a, b, c, d, e, f, h, k, l, m, n); + } catch (r) { + Y(q); + if (r !== r + 0) + throw r; + X(1, 0); + } + } + function zd(a, b, c, d) { + var e = Z(); + try { + return Bg(a, b, c, d); + } catch (f) { + Y(e); + if (f !== f + 0) + throw f; + X(1, 0); + return 0n; + } + } + function uf(a, b, c, d, e, f, h, k) { + var l = Z(); + try { + Cg(a, b, c, d, e, f, h, k); + } catch (m) { + Y(l); + if (m !== m + 0) + throw m; + X(1, 0); + } + } + function Rd(a, b, c, d, e) { + var f = Z(); + try { + Dg(a, b, c, d, e); + } catch (h) { + Y(f); + if (h !== h + 0) + throw h; + X(1, 0); + } + } + function ef(a, b, c, d) { + var e = Z(); + try { + Fg(a, b, c, d); + } catch (f) { + Y(e); + if (f !== f + 0) + throw f; + X(1, 0); + } + } + function rf(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y) { + var A = Z(); + try { + Gg(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y); + } catch (B) { + Y(A); + if (B !== B + 0) + throw B; + X(1, 0); + } + } + function Ne(a, b, c, d, e, f) { + var h = Z(); + try { + Hg(a, b, c, d, e, f); + } catch (k) { + Y(h); + if (k !== k + 0) + throw k; + X(1, 0); + } + } + function of(a, b, c, d, e, f, h, k, l) { + var m = Z(); + try { + Ig(a, b, c, d, e, f, h, k, l); + } catch (n) { + Y(m); + if (n !== n + 0) + throw n; + X(1, 0); + } + } + function ed(a, b, c, d, e) { + var f = Z(); + try { + return Jg(a, b, c, d, e); + } catch (h) { + Y(f); + if (h !== h + 0) + throw h; + X(1, 0); + } + } + function kd(a, b, c, d, e, f, h, k, l, m, n, q, r, p) { + var u = Z(); + try { + return Kg(a, b, c, d, e, f, h, k, l, m, n, q, r, p); + } catch (y) { + Y(u); + if (y !== y + 0) + throw y; + X(1, 0); + } + } + function sf(a, b) { + var c = Z(); + try { + Lg(a, b); + } catch (d) { + Y(c); + if (d !== d + 0) + throw d; + X(1, 0); + } + } + function Cd(a, b, c) { + var d = Z(); + try { + return Mg(a, b, c); + } catch (e) { + Y(d); + if (e !== e + 0) + throw e; + X(1, 0); + return 0n; + } + } + function jd(a, b, c, d, e, f, h, k, l, m) { + var n = Z(); + try { + return Ng(a, b, c, d, e, f, h, k, l, m); + } catch (q) { + Y(n); + if (q !== q + 0) + throw q; + X(1, 0); + } + } + function ie(a, b, c, d, e, f, h, k, l, m, n, q, r, p) { + var u = Z(); + try { + Sg(a, b, c, d, e, f, h, k, l, m, n, q, r, p); + } catch (y) { + Y(u); + if (y !== y + 0) + throw y; + X(1, 0); + } + } + function Ed(a, b, c, d, e) { + var f = Z(); + try { + Og(a, b, c, d, e); + } catch (h) { + Y(f); + if (h !== h + 0) + throw h; + X(1, 0); + } + } + function Ud(a, b, c, d, e, f, h) { + var k = Z(); + try { + Pg(a, b, c, d, e, f, h); + } catch (l) { + Y(k); + if (l !== l + 0) + throw l; + X(1, 0); + } + } + function Md(a, b, c, d, e) { + var f = Z(); + try { + Rg(a, b, c, d, e); + } catch (h) { + Y(f); + if (h !== h + 0) + throw h; + X(1, 0); + } + } + function Xd(a, b, c, d, e, f, h, k) { + var l = Z(); + try { + Qg(a, b, c, d, e, f, h, k); + } catch (m) { + Y(l); + if (m !== m + 0) + throw m; + X(1, 0); + } + } + function ue(a, b, c, d, e, f, h, k, l, m, n) { + var q = Z(); + try { + Tg(a, b, c, d, e, f, h, k, l, m, n); + } catch (r) { + Y(q); + if (r !== r + 0) + throw r; + X(1, 0); + } + } + function Oc(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A) { + var B = Z(); + try { + return Ug(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A); + } catch (C) { + Y(B); + if (C !== C + 0) + throw C; + X(1, 0); + } + } + function le(a, b, c, d, e, f, h, k, l, m, n, q, r) { + var p = Z(); + try { + Vg(a, b, c, d, e, f, h, k, l, m, n, q, r); + } catch (u) { + Y(p); + if (u !== u + 0) + throw u; + X(1, 0); + } + } + function sd(a, b) { + var c = Z(); + try { + return cg(a, b); + } catch (d) { + Y(c); + if (d !== d + 0) + throw d; + X(1, 0); + } + } + function wc(a, b, c, d, e) { + var f = Z(); + try { + return Ki(a, b, c, d, e); + } catch (h) { + Y(f); + if (h !== h + 0) + throw h; + X(1, 0); + } + } + function Pc(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D) { + var F = Z(); + try { + return Wg(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D); + } catch (H) { + Y(F); + if (H !== H + 0) + throw H; + X(1, 0); + } + } + function mf(a, b, c, d, e, f, h, k, l, m) { + var n = Z(); + try { + Xg(a, b, c, d, e, f, h, k, l, m); + } catch (q) { + Y(n); + if (q !== q + 0) + throw q; + X(1, 0); + } + } + function cd(a, b, c, d, e, f, h) { + var k = Z(); + try { + return Yg(a, b, c, d, e, f, h); + } catch (l) { + Y(k); + if (l !== l + 0) + throw l; + X(1, 0); + } + } + function je(a, b, c, d, e, f, h, k, l, m, n) { + var q = Z(); + try { + Zg(a, b, c, d, e, f, h, k, l, m, n); + } catch (r) { + Y(q); + if (r !== r + 0) + throw r; + X(1, 0); + } + } + function ld(a, b, c, d, e, f) { + var h = Z(); + try { + return $g(a, b, c, d, e, f); + } catch (k) { + Y(h); + if (k !== k + 0) + throw k; + X(1, 0); + } + } + function Ie(a, b, c, d, e, f) { + var h = Z(); + try { + bh(a, b, c, d, e, f); + } catch (k) { + Y(h); + if (k !== k + 0) + throw k; + X(1, 0); + } + } + function ye(a, b, c, d, e, f, h, k, l, m, n, q, r, p) { + var u = Z(); + try { + ch(a, b, c, d, e, f, h, k, l, m, n, q, r, p); + } catch (y) { + Y(u); + if (y !== y + 0) + throw y; + X(1, 0); + } + } + function zf(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B) { + var C = Z(); + try { + dh(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B); + } catch (D) { + Y(C); + if (D !== D + 0) + throw D; + X(1, 0); + } + } + function ae(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A) { + var B = Z(); + try { + eh(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A); + } catch (C) { + Y(B); + if (C !== C + 0) + throw C; + X(1, 0); + } + } + function $d(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y) { + var A = Z(); + try { + fh(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y); + } catch (B) { + Y(A); + if (B !== B + 0) + throw B; + X(1, 0); + } + } + function be(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u) { + var y = Z(); + try { + gh(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u); + } catch (A) { + Y(y); + if (A !== A + 0) + throw A; + X(1, 0); + } + } + function Af(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F) { + var H = Z(); + try { + hh(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F); + } catch (K) { + Y(H); + if (K !== K + 0) + throw K; + X(1, 0); + } + } + function yf(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C) { + var D = Z(); + try { + ih(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C); + } catch (F) { + Y(D); + if (F !== F + 0) + throw F; + X(1, 0); + } + } + function xf(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B) { + var C = Z(); + try { + jh(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B); + } catch (D) { + Y(C); + if (D !== D + 0) + throw D; + X(1, 0); + } + } + function Bf(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F) { + var H = Z(); + try { + kh(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F); + } catch (K) { + Y(H); + if (K !== K + 0) + throw K; + X(1, 0); + } + } + function gf(a, b, c, d, e, f, h, k, l, m, n, q, r) { + var p = Z(); + try { + lh(a, b, c, d, e, f, h, k, l, m, n, q, r); + } catch (u) { + Y(p); + if (u !== u + 0) + throw u; + X(1, 0); + } + } + function kf(a, b, c, d, e, f, h, k, l, m) { + var n = Z(); + try { + mh(a, b, c, d, e, f, h, k, l, m); + } catch (q) { + Y(n); + if (q !== q + 0) + throw q; + X(1, 0); + } + } + function ve(a, b, c, d, e, f, h, k, l, m, n, q, r) { + var p = Z(); + try { + nh(a, b, c, d, e, f, h, k, l, m, n, q, r); + } catch (u) { + Y(p); + if (u !== u + 0) + throw u; + X(1, 0); + } + } + function qf(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u) { + var y = Z(); + try { + oh(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u); + } catch (A) { + Y(y); + if (A !== A + 0) + throw A; + X(1, 0); + } + } + function wf(a, b, c, d, e, f, h, k, l, m) { + var n = Z(); + try { + qh(a, b, c, d, e, f, h, k, l, m); + } catch (q) { + Y(n); + if (q !== q + 0) + throw q; + X(1, 0); + } + } + function vf(a, b, c, d, e, f, h, k, l) { + var m = Z(); + try { + rh(a, b, c, d, e, f, h, k, l); + } catch (n) { + Y(m); + if (n !== n + 0) + throw n; + X(1, 0); + } + } + function sc(a, b, c, d, e, f, h) { + var k = Z(); + try { + return ah(a, b, c, d, e, f, h); + } catch (l) { + Y(k); + if (l !== l + 0) + throw l; + X(1, 0); + } + } + function Nd(a, b, c, d, e) { + var f = Z(); + try { + ph(a, b, c, d, e); + } catch (h) { + Y(f); + if (h !== h + 0) + throw h; + X(1, 0); + } + } + function wd(a, b, c) { + var d = Z(); + try { + return Li(a, b, c); + } catch (e) { + Y(d); + if (e !== e + 0) + throw e; + X(1, 0); + return 0n; + } + } + function ze(a, b, c, d, e, f, h, k, l, m, n, q, r, p) { + var u = Z(); + try { + sh(a, b, c, d, e, f, h, k, l, m, n, q, r, p); + } catch (y) { + Y(u); + if (y !== y + 0) + throw y; + X(1, 0); + } + } + function Ge(a, b, c, d, e, f, h, k, l, m, n, q, r) { + var p = Z(); + try { + uh(a, b, c, d, e, f, h, k, l, m, n, q, r); + } catch (u) { + Y(p); + if (u !== u + 0) + throw u; + X(1, 0); + } + } + function Xc(a, b, c, d, e, f, h) { + var k = Z(); + try { + return vh(a, b, c, d, e, f, h); + } catch (l) { + Y(k); + if (l !== l + 0) + throw l; + X(1, 0); + } + } + function ad(a, b, c, d, e, f) { + var h = Z(); + try { + return wh(a, b, c, d, e, f); + } catch (k) { + Y(h); + if (k !== k + 0) + throw k; + X(1, 0); + } + } + function De(a, b, c, d, e, f, h, k, l, m, n, q, r) { + var p = Z(); + try { + xh(a, b, c, d, e, f, h, k, l, m, n, q, r); + } catch (u) { + Y(p); + if (u !== u + 0) + throw u; + X(1, 0); + } + } + function lf(a, b, c, d, e, f) { + var h = Z(); + try { + th(a, b, c, d, e, f); + } catch (k) { + Y(h); + if (k !== k + 0) + throw k; + X(1, 0); + } + } + function Qe(a, b, c, d, e, f, h, k, l, m, n) { + var q = Z(); + try { + yh(a, b, c, d, e, f, h, k, l, m, n); + } catch (r) { + Y(q); + if (r !== r + 0) + throw r; + X(1, 0); + } + } + function Ke(a, b, c, d, e, f, h, k) { + var l = Z(); + try { + Ah(a, b, c, d, e, f, h, k); + } catch (m) { + Y(l); + if (m !== m + 0) + throw m; + X(1, 0); + } + } + function af(a, b, c, d, e, f, h, k) { + var l = Z(); + try { + Bh(a, b, c, d, e, f, h, k); + } catch (m) { + Y(l); + if (m !== m + 0) + throw m; + X(1, 0); + } + } + function td(a, b, c, d) { + var e = Z(); + try { + return Ch(a, b, c, d); + } catch (f) { + Y(e); + if (f !== f + 0) + throw f; + X(1, 0); + } + } + function He(a, b, c, d, e, f, h, k, l, m) { + var n = Z(); + try { + Dh(a, b, c, d, e, f, h, k, l, m); + } catch (q) { + Y(n); + if (q !== q + 0) + throw q; + X(1, 0); + } + } + function pf(a, b, c, d, e, f, h, k, l) { + var m = Z(); + try { + Eh(a, b, c, d, e, f, h, k, l); + } catch (n) { + Y(m); + if (n !== n + 0) + throw n; + X(1, 0); + } + } + function Fe(a, b, c, d, e, f, h, k, l) { + var m = Z(); + try { + Fh(a, b, c, d, e, f, h, k, l); + } catch (n) { + Y(m); + if (n !== n + 0) + throw n; + X(1, 0); + } + } + function Ae(a, b, c, d, e, f, h, k, l, m) { + var n = Z(); + try { + Gh(a, b, c, d, e, f, h, k, l, m); + } catch (q) { + Y(n); + if (q !== q + 0) + throw q; + X(1, 0); + } + } + function ff(a, b, c, d, e, f) { + var h = Z(); + try { + Hh(a, b, c, d, e, f); + } catch (k) { + Y(h); + if (k !== k + 0) + throw k; + X(1, 0); + } + } + function he(a, b, c, d, e, f, h, k, l, m, n, q) { + var r = Z(); + try { + Ih(a, b, c, d, e, f, h, k, l, m, n, q); + } catch (p) { + Y(r); + if (p !== p + 0) + throw p; + X(1, 0); + } + } + function hd(a, b, c, d, e, f, h, k) { + var l = Z(); + try { + return Jh(a, b, c, d, e, f, h, k); + } catch (m) { + Y(l); + if (m !== m + 0) + throw m; + X(1, 0); + } + } + function ne(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u) { + var y = Z(); + try { + Mi(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u); + } catch (A) { + Y(y); + if (A !== A + 0) + throw A; + X(1, 0); + } + } + function Be(a, b, c, d, e, f, h, k, l, m, n, q, r, p) { + var u = Z(); + try { + Kh(a, b, c, d, e, f, h, k, l, m, n, q, r, p); + } catch (y) { + Y(u); + if (y !== y + 0) + throw y; + X(1, 0); + } + } + function we(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u) { + var y = Z(); + try { + Lh(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u); + } catch (A) { + Y(y); + if (A !== A + 0) + throw A; + X(1, 0); + } + } + function Hd(a, b, c) { + var d = Z(); + try { + Mh(a, b, c); + } catch (e) { + Y(d); + if (e !== e + 0) + throw e; + X(1, 0); + } + } + function vc(a, b, c, d) { + var e = Z(); + try { + return Oh(a, b, c, d); + } catch (f) { + Y(e); + if (f !== f + 0) + throw f; + X(1, 0); + } + } + function pc(a, b, c, d) { + var e = Z(); + try { + return Ph(a, b, c, d); + } catch (f) { + Y(e); + if (f !== f + 0) + throw f; + X(1, 0); + } + } + function ce(a, b, c, d, e, f, h, k, l, m) { + var n = Z(); + try { + Qh(a, b, c, d, e, f, h, k, l, m); + } catch (q) { + Y(n); + if (q !== q + 0) + throw q; + X(1, 0); + } + } + function Ee(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F, H, K, Va, Wa, Xa) { + var Qi = Z(); + try { + Rh(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F, H, K, Va, Wa, Xa); + } catch (Ya) { + Y(Qi); + if (Ya !== Ya + 0) + throw Ya; + X(1, 0); + } + } + function $e(a, b, c, d, e, f) { + var h = Z(); + try { + Sh(a, b, c, d, e, f); + } catch (k) { + Y(h); + if (k !== k + 0) + throw k; + X(1, 0); + } + } + function Rc(a, b, c, d, e, f, h, k, l, m, n, q, r) { + var p = Z(); + try { + return Th(a, b, c, d, e, f, h, k, l, m, n, q, r); + } catch (u) { + Y(p); + if (u !== u + 0) + throw u; + X(1, 0); + } + } + function Od(a, b, c, d, e, f, h, k, l, m, n, q, r) { + var p = Z(); + try { + Uh(a, b, c, d, e, f, h, k, l, m, n, q, r); + } catch (u) { + Y(p); + if (u !== u + 0) + throw u; + X(1, 0); + } + } + function se(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F) { + var H = Z(); + try { + Vh(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F); + } catch (K) { + Y(H); + if (K !== K + 0) + throw K; + X(1, 0); + } + } + function Yd(a, b, c, d, e, f, h, k, l, m, n, q, r) { + var p = Z(); + try { + Wh(a, b, c, d, e, f, h, k, l, m, n, q, r); + } catch (u) { + Y(p); + if (u !== u + 0) + throw u; + X(1, 0); + } + } + function hf(a, b, c, d, e, f, h, k, l, m, n, q, r, p) { + var u = Z(); + try { + Xh(a, b, c, d, e, f, h, k, l, m, n, q, r, p); + } catch (y) { + Y(u); + if (y !== y + 0) + throw y; + X(1, 0); + } + } + function re(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D) { + var F = Z(); + try { + Zh(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D); + } catch (H) { + Y(F); + if (H !== H + 0) + throw H; + X(1, 0); + } + } + function pe(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A) { + var B = Z(); + try { + ai(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A); + } catch (C) { + Y(B); + if (C !== C + 0) + throw C; + X(1, 0); + } + } + function qe(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C) { + var D = Z(); + try { + Yh(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C); + } catch (F) { + Y(D); + if (F !== F + 0) + throw F; + X(1, 0); + } + } + function Le(a, b, c, d, e, f, h, k) { + var l = Z(); + try { + bi(a, b, c, d, e, f, h, k); + } catch (m) { + Y(l); + if (m !== m + 0) + throw m; + X(1, 0); + } + } + function xe(a, b, c, d, e, f, h, k, l, m, n) { + var q = Z(); + try { + di(a, b, c, d, e, f, h, k, l, m, n); + } catch (r) { + Y(q); + if (r !== r + 0) + throw r; + X(1, 0); + } + } + function Mc(a, b, c, d, e, f, h, k, l, m, n) { + var q = Z(); + try { + return ei(a, b, c, d, e, f, h, k, l, m, n); + } catch (r) { + Y(q); + if (r !== r + 0) + throw r; + X(1, 0); + } + } + function Qc(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D) { + var F = Z(); + try { + return fi(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D); + } catch (H) { + Y(F); + if (H !== H + 0) + throw H; + X(1, 0); + } + } + function Sd(a, b, c, d, e) { + var f = Z(); + try { + gi(a, b, c, d, e); + } catch (h) { + Y(f); + if (h !== h + 0) + throw h; + X(1, 0); + } + } + function ge(a, b, c, d, e, f, h, k, l, m, n, q) { + var r = Z(); + try { + Ag(a, b, c, d, e, f, h, k, l, m, n, q); + } catch (p) { + Y(r); + if (p !== p + 0) + throw p; + X(1, 0); + } + } + function Pe(a, b, c, d, e, f, h, k, l, m) { + var n = Z(); + try { + hi(a, b, c, d, e, f, h, k, l, m); + } catch (q) { + Y(n); + if (q !== q + 0) + throw q; + X(1, 0); + } + } + function Me(a, b, c, d, e) { + var f = Z(); + try { + ii(a, b, c, d, e); + } catch (h) { + Y(f); + if (h !== h + 0) + throw h; + X(1, 0); + } + } + function Ye(a, b, c, d, e, f, h, k, l, m, n, q, r) { + var p = Z(); + try { + Ji(a, b, c, d, e, f, h, k, l, m, n, q, r); + } catch (u) { + Y(p); + if (u !== u + 0) + throw u; + X(1, 0); + } + } + function pd(a, b, c, d, e) { + var f = Z(); + try { + return ji(a, b, c, d, e); + } catch (h) { + Y(f); + if (h !== h + 0) + throw h; + X(1, 0); + } + } + function Ad(a, b, c, d) { + var e = Z(); + try { + return Ni(a, b, c, d); + } catch (f) { + Y(e); + if (f !== f + 0) + throw f; + X(1, 0); + return 0n; + } + } + function Je(a, b, c, d, e, f, h) { + var k = Z(); + try { + ki(a, b, c, d, e, f, h); + } catch (l) { + Y(k); + if (l !== l + 0) + throw l; + X(1, 0); + } + } + function qd(a, b, c, d, e, f) { + var h = Z(); + try { + return li(a, b, c, d, e, f); + } catch (k) { + Y(h); + if (k !== k + 0) + throw k; + X(1, 0); + } + } + function ud(a, b, c, d, e, f) { + var h = Z(); + try { + return mi(a, b, c, d, e, f); + } catch (k) { + Y(h); + if (k !== k + 0) + throw k; + X(1, 0); + } + } + function Se(a, b, c, d, e, f, h, k, l) { + var m = Z(); + try { + ni(a, b, c, d, e, f, h, k, l); + } catch (n) { + Y(m); + if (n !== n + 0) + throw n; + X(1, 0); + } + } + function Ld(a, b, c, d) { + var e = Z(); + try { + Nh(a, b, c, d); + } catch (f) { + Y(e); + if (f !== f + 0) + throw f; + X(1, 0); + } + } + function Zc(a, b, c, d, e, f, h, k) { + var l = Z(); + try { + return oi(a, b, c, d, e, f, h, k); + } catch (m) { + Y(l); + if (m !== m + 0) + throw m; + X(1, 0); + } + } + function Ec(a, b, c, d, e, f) { + var h = Z(); + try { + return pi(a, b, c, d, e, f); + } catch (k) { + Y(h); + if (k !== k + 0) + throw k; + X(1, 0); + } + } + function fd(a, b, c, d, e, f) { + var h = Z(); + try { + return qi(a, b, c, d, e, f); + } catch (k) { + Y(h); + if (k !== k + 0) + throw k; + X(1, 0); + } + } + function Sc(a, b, c, d, e, f, h, k, l, m, n, q) { + var r = Z(); + try { + return ri(a, b, c, d, e, f, h, k, l, m, n, q); + } catch (p) { + Y(r); + if (p !== p + 0) + throw p; + X(1, 0); + } + } + function bd(a, b, c, d, e, f, h, k) { + var l = Z(); + try { + return si(a, b, c, d, e, f, h, k); + } catch (m) { + Y(l); + if (m !== m + 0) + throw m; + X(1, 0); + } + } + function Vc(a, b, c, d, e, f, h, k, l, m, n) { + var q = Z(); + try { + return ti(a, b, c, d, e, f, h, k, l, m, n); + } catch (r) { + Y(q); + if (r !== r + 0) + throw r; + X(1, 0); + } + } + function gd(a, b, c, d, e, f, h) { + var k = Z(); + try { + return ui(a, b, c, d, e, f, h); + } catch (l) { + Y(k); + if (l !== l + 0) + throw l; + X(1, 0); + } + } + function Uc(a, b, c, d, e, f, h, k, l, m, n, q, r) { + var p = Z(); + try { + return vi(a, b, c, d, e, f, h, k, l, m, n, q, r); + } catch (u) { + Y(p); + if (u !== u + 0) + throw u; + X(1, 0); + } + } + function od(a, b, c, d, e, f, h) { + var k = Z(); + try { + return wi(a, b, c, d, e, f, h); + } catch (l) { + Y(k); + if (l !== l + 0) + throw l; + X(1, 0); + } + } + function rd(a, b, c, d, e, f, h) { + var k = Z(); + try { + return xi(a, b, c, d, e, f, h); + } catch (l) { + Y(k); + if (l !== l + 0) + throw l; + X(1, 0); + } + } + function Bd(a, b, c) { + var d = Z(); + try { + return yi(a, b, c); + } catch (e) { + Y(d); + if (e !== e + 0) + throw e; + X(1, 0); + return 0n; + } + } + function nd(a, b, c, d) { + var e = Z(); + try { + return zi(a, b, c, d); + } catch (f) { + Y(e); + if (f !== f + 0) + throw f; + X(1, 0); + } + } + function Gd(a, b, c, d) { + var e = Z(); + try { + Bi(a, b, c, d); + } catch (f) { + Y(e); + if (f !== f + 0) + throw f; + X(1, 0); + } + } + function Cc(a, b, c, d) { + var e = Z(); + try { + return Ai(a, b, c, d); + } catch (f) { + Y(e); + if (f !== f + 0) + throw f; + X(1, 0); + } + } + function tf(a, b, c, d, e) { + var f = Z(); + try { + Ci(a, b, c, d, e); + } catch (h) { + Y(f); + if (h !== h + 0) + throw h; + X(1, 0); + } + } + function qc(a, b, c, d, e) { + var f = Z(); + try { + return Di(a, b, c, d, e); + } catch (h) { + Y(f); + if (h !== h + 0) + throw h; + X(1, 0); + } + } + function rc(a, b, c, d, e, f) { + var h = Z(); + try { + return Ei(a, b, c, d, e, f); + } catch (k) { + Y(h); + if (k !== k + 0) + throw k; + X(1, 0); + } + } + function Te(a, b, c, d, e, f, h, k, l, m, n, q) { + var r = Z(); + try { + Fi(a, b, c, d, e, f, h, k, l, m, n, q); + } catch (p) { + Y(r); + if (p !== p + 0) + throw p; + X(1, 0); + } + } + function bf(a, b, c, d, e, f, h, k, l, m, n, q, r) { + var p = Z(); + try { + Gi(a, b, c, d, e, f, h, k, l, m, n, q, r); + } catch (u) { + Y(p); + if (u !== u + 0) + throw u; + X(1, 0); + } + } + function te(a, b, c, d, e, f, h, k, l, m, n, q) { + var r = Z(); + try { + Eg(a, b, c, d, e, f, h, k, l, m, n, q); + } catch (p) { + Y(r); + if (p !== p + 0) + throw p; + X(1, 0); + } + } + function $c(a, b, c, d, e) { + var f = Z(); + try { + return Hi(a, b, c, d, e); + } catch (h) { + Y(f); + if (h !== h + 0) + throw h; + X(1, 0); + } + } + function vd(a) { + var b = Z(); + try { + return eg(a); + } catch (c) { + Y(b); + if (c !== c + 0) + throw c; + X(1, 0); + return 0n; + } + } + function Wc(a, b, c, d, e, f) { + var h = Z(); + try { + return dg(a, b, c, d, e, f); + } catch (k) { + Y(h); + if (k !== k + 0) + throw k; + X(1, 0); + } + } + function Gc(a, b, c, d, e, f) { + var h = Z(); + try { + return ci(a, b, c, d, e, f); + } catch (k) { + Y(h); + if (k !== k + 0) + throw k; + X(1, 0); + } + } + function oe(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y) { + var A = Z(); + try { + $h(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y); + } catch (B) { + Y(A); + if (B !== B + 0) + throw B; + X(1, 0); + } + } + function uc(a, b, c) { + var d = Z(); + try { + return qg(a, b, c); + } catch (e) { + Y(d); + if (e !== e + 0) + throw e; + X(1, 0); + } + } + function oc(a, b, c) { + var d = Z(); + try { + return sg(a, b, c); + } catch (e) { + Y(d); + if (e !== e + 0) + throw e; + X(1, 0); + } + } + function Df() { + var a = U; + a = Object.assign({}, a); + var b = (c) => (d) => c(d) >>> 0; + a.Fd = b(a.Fd); + a.he = b(a.he); + a.ne = b(a.ne); + a.oe = ((c) => () => c() >>> 0)(a.oe); + return a; + } + g.stackSave = () => Z(); + g.stackRestore = (a) => Y(a); + g.stackAlloc = (a) => Ef(a); + g.UTF8ToString = J; + g.stringToUTF8 = (a, b, c) => M(a, E, b, c); + g.lengthBytesUTF8 = bb; + var Oi; + Fa = function Pi() { + Oi || Ri(); + Oi || (Fa = Pi); + }; + function Ri() { + if (!(0 < Da)) { + if (g.preRun) + for ("function" == typeof g.preRun && (g.preRun = [g.preRun]); g.preRun.length; ) { + var a = g.preRun.shift(); + Aa.unshift(a); + } + for (; 0 < Aa.length; ) + Aa.shift()(g); + if (!(0 < Da || Oi || (Oi = true, g.calledRun = true, x))) { + for (; 0 < Ba.length; ) + Ba.shift()(g); + for (aa(g); 0 < Ca.length; ) + Ca.shift()(g); + } + } + } + Ri(); + return readyPromise; + }; + })(); + if (typeof exports === "object" && typeof module === "object") + module.exports = ortWasm; + else if (typeof define === "function" && define["amd"]) + define([], () => ortWasm); + } +}); + +// nodejs-ignore:worker_threads +var require_worker_threads = __commonJS({ + "nodejs-ignore:worker_threads"() { + } +}); + +// nodejs-ignore:perf_hooks +var require_perf_hooks = __commonJS({ + "nodejs-ignore:perf_hooks"() { + } +}); + +// nodejs-ignore:os +var os_exports = {}; +__export(os_exports, { + cpus: () => cpus2 +}); +var cpus2; +var init_os = __esm({ + "nodejs-ignore:os"() { + cpus2 = void 0; + } +}); + +// web/lib/wasm/binding/ort-wasm-simd-threaded.jsep.js +var require_ort_wasm_simd_threaded_jsep = __commonJS({ + "web/lib/wasm/binding/ort-wasm-simd-threaded.jsep.js"(exports, module) { + "use strict"; + var ortWasmThreaded = (() => { + var _scriptDir = typeof document !== "undefined" && document.currentScript ? document.currentScript.src : void 0; + if (typeof __filename !== "undefined") + _scriptDir = _scriptDir || __filename; + return function(moduleArg = {}) { + function d() { + l.buffer != p.buffer && t(); + return p; + } + function v() { + l.buffer != p.buffer && t(); + return aa; + } + function z() { + l.buffer != p.buffer && t(); + return ba; + } + function A() { + l.buffer != p.buffer && t(); + return ca; + } + function da() { + l.buffer != p.buffer && t(); + return ea; + } + var B = moduleArg, fa, C; + B.ready = new Promise((a, b) => { + fa = a; + C = b; + }); + "use strict"; + B.jsepInit = (a, b, c, e, f, h, k, q) => { + B.Qb = a; + B.wb = b; + B.yb = c; + B.jb = e; + B.xb = f; + B.Ea = h; + B.zb = k; + B.Ab = q; + b = (n, m, r) => (...w) => { + const x = D, g = m?.(); + w = n(...w); + const u = m?.(); + g !== u && (n = u, r(g), m = r = null); + return D != x ? ha() : w; + }; + c = (n) => async (...m) => { + try { + if (B.bb) + throw Error("Session already started"); + const r = B.bb = { Fb: m[0], errors: [] }, w = await n(...m); + if (B.bb !== r) + throw Error("Session mismatch"); + a.flush(); + const x = r.errors; + if (0 < x.length) { + let g = await Promise.all(x); + g = g.filter((u) => u); + if (0 < g.length) + throw Error(g.join("\n")); + } + return w; + } finally { + B.bb = null; + } + }; + B._OrtRun = c(b(B._OrtRun, () => B._OrtRun, (n) => B._OrtRun = n)); + B._OrtRunWithBinding = c(b(B._OrtRunWithBinding, () => B._OrtRunWithBinding, (n) => B._OrtRunWithBinding = n)); + B._OrtBindInput = b(B._OrtBindInput, () => B._OrtBindInput, (n) => B._OrtBindInput = n); + B.jsepRegisterBuffer = (n, m, r, w) => a.registerBuffer(n, m, r, w); + B.jsepUnregisterBuffers = (n) => { + a.unregisterBuffers(n); + }; + B.jsepGetBuffer = (n) => a.getBuffer(n); + B.jsepCreateDownloader = (n, m, r) => a.createDownloader(n, m, r); + }; + var ia = Object.assign({}, B), ja = "./this.program", E = (a, b) => { + throw b; + }, ka = "object" == typeof window, F = "function" == typeof importScripts, G = "object" == typeof process && "object" == typeof process.versions && "string" == typeof process.versions.node, H = B.ENVIRONMENT_IS_PTHREAD || false, I = ""; + function la(a) { + return B.locateFile ? B.locateFile(a, I) : I + a; + } + var ma, J, na; + if (G) { + var fs = (init_fs(), __toCommonJS(fs_exports)), oa = (init_path(), __toCommonJS(path_exports)); + I = F ? oa.dirname(I) + "/" : __dirname + "/"; + ma = (b, c) => { + b = b.startsWith("file://") ? new URL(b) : oa.normalize(b); + return fs.readFileSync(b, c ? void 0 : "utf8"); + }; + na = (b) => { + b = ma(b, true); + b.buffer || (b = new Uint8Array(b)); + return b; + }; + J = (b, c, e, f = true) => { + b = b.startsWith("file://") ? new URL(b) : oa.normalize(b); + fs.readFile(b, f ? void 0 : "utf8", (h, k) => { + h ? e(h) : c(f ? k.buffer : k); + }); + }; + !B.thisProgram && 1 < process.argv.length && (ja = process.argv[1].replace(/\\/g, "/")); + process.argv.slice(2); + E = (b, c) => { + process.exitCode = b; + throw c; + }; + B.inspect = () => "[Emscripten Module object]"; + let a; + try { + a = require_worker_threads(); + } catch (b) { + throw console.error('The "worker_threads" module is not supported in this node.js build - perhaps a newer version is needed?'), b; + } + global.Worker = a.Worker; + } else if (ka || F) + F ? I = self.location.href : "undefined" != typeof document && document.currentScript && (I = document.currentScript.src), typeof _scriptDir !== "undefined" && _scriptDir && (I = _scriptDir), 0 !== I.indexOf("blob:") ? I = I.substr(0, I.replace(/[?#].*/, "").lastIndexOf("/") + 1) : I = "", G || (ma = (a) => { + var b = new XMLHttpRequest(); + b.open("GET", a, false); + b.send(null); + return b.responseText; + }, F && (na = (a) => { + var b = new XMLHttpRequest(); + b.open("GET", a, false); + b.responseType = "arraybuffer"; + b.send(null); + return new Uint8Array(b.response); + }), J = (a, b, c) => { + var e = new XMLHttpRequest(); + e.open("GET", a, true); + e.responseType = "arraybuffer"; + e.onload = () => { + 200 == e.status || 0 == e.status && e.response ? b(e.response) : c(); + }; + e.onerror = c; + e.send(null); + }); + G && "undefined" == typeof performance && (global.performance = require_perf_hooks().performance); + var pa = console.log.bind(console), qa = console.error.bind(console); + G && (pa = (...a) => fs.writeSync(1, a.join(" ") + "\n"), qa = (...a) => fs.writeSync(2, a.join(" ") + "\n")); + var ra = B.print || pa, K = B.printErr || qa; + Object.assign(B, ia); + ia = null; + B.thisProgram && (ja = B.thisProgram); + B.quit && (E = B.quit); + var L; + B.wasmBinary && (L = B.wasmBinary); + var noExitRuntime = B.noExitRuntime || true; + "object" != typeof WebAssembly && M("no native wasm support detected"); + var l, N, sa, P = false, Q, p, aa, ba, ca, ea; + function t() { + var a = l.buffer; + B.HEAP8 = p = new Int8Array(a); + B.HEAP16 = new Int16Array(a); + B.HEAP32 = ba = new Int32Array(a); + B.HEAPU8 = aa = new Uint8Array(a); + B.HEAPU16 = new Uint16Array(a); + B.HEAPU32 = ca = new Uint32Array(a); + B.HEAPF32 = new Float32Array(a); + B.HEAPF64 = ea = new Float64Array(a); + } + var ta = B.INITIAL_MEMORY || 16777216; + 5242880 <= ta || M("INITIAL_MEMORY should be larger than STACK_SIZE, was " + ta + "! (STACK_SIZE=5242880)"); + if (H) + l = B.wasmMemory; + else if (B.wasmMemory) + l = B.wasmMemory; + else if (l = new WebAssembly.Memory({ initial: ta / 65536, maximum: 65536, shared: true }), !(l.buffer instanceof SharedArrayBuffer)) + throw K("requested a shared WebAssembly.Memory but the returned buffer is not a SharedArrayBuffer, indicating that while the browser has SharedArrayBuffer it does not have WebAssembly threads support - you may need to set a flag"), G && K("(on node you may need: --experimental-wasm-threads --experimental-wasm-bulk-memory and/or recent version)"), Error("bad memory"); + t(); + ta = l.buffer.byteLength; + var ua = [], va = [], wa = [], xa = 0; + function ya() { + return noExitRuntime || 0 < xa; + } + var R = 0, za = null, S = null; + function Aa() { + R++; + B.monitorRunDependencies && B.monitorRunDependencies(R); + } + function Ba() { + R--; + B.monitorRunDependencies && B.monitorRunDependencies(R); + if (0 == R && (null !== za && (clearInterval(za), za = null), S)) { + var a = S; + S = null; + a(); + } + } + function M(a) { + if (B.onAbort) + B.onAbort(a); + a = "Aborted(" + a + ")"; + K(a); + P = true; + Q = 1; + a = new WebAssembly.RuntimeError(a + ". Build with -sASSERTIONS for more info."); + C(a); + throw a; + } + function Ca(a) { + return a.startsWith("data:application/octet-stream;base64,"); + } + var T; + T = "ort-wasm-simd-threaded.wasm"; + Ca(T) || (T = la(T)); + function Da(a) { + if (a == T && L) + return new Uint8Array(L); + if (na) + return na(a); + throw "both async and sync fetching of the wasm failed"; + } + function Ea(a) { + if (!L && (ka || F)) { + if ("function" == typeof fetch && !a.startsWith("file://")) + return fetch(a, { credentials: "same-origin" }).then((b) => { + if (!b.ok) + throw "failed to load wasm binary file at '" + a + "'"; + return b.arrayBuffer(); + }).catch(() => Da(a)); + if (J) + return new Promise((b, c) => { + J(a, (e) => b(new Uint8Array(e)), c); + }); + } + return Promise.resolve().then(() => Da(a)); + } + function Fa(a, b, c) { + return Ea(a).then((e) => WebAssembly.instantiate(e, b)).then((e) => e).then(c, (e) => { + K("failed to asynchronously prepare wasm: " + e); + M(e); + }); + } + function Ga(a, b) { + var c = T; + return L || "function" != typeof WebAssembly.instantiateStreaming || Ca(c) || c.startsWith("file://") || G || "function" != typeof fetch ? Fa(c, a, b) : fetch(c, { credentials: "same-origin" }).then((e) => WebAssembly.instantiateStreaming(e, a).then(b, function(f) { + K("wasm streaming compile failed: " + f); + K("falling back to ArrayBuffer instantiation"); + return Fa(c, a, b); + })); + } + var U, Ha = { + 906828: (a) => { + B.Ea("Abs", a, void 0); + }, + 906879: (a) => { + B.Ea("Neg", a, void 0); + }, + 906930: (a) => { + B.Ea("Floor", a, void 0); + }, + 906983: (a) => { + B.Ea("Ceil", a, void 0); + }, + 907035: (a) => { + B.Ea("Reciprocal", a, void 0); + }, + 907093: (a) => { + B.Ea("Sqrt", a, void 0); + }, + 907145: (a) => { + B.Ea("Exp", a, void 0); + }, + 907196: (a) => { + B.Ea("Erf", a, void 0); + }, + 907247: (a) => { + B.Ea("Sigmoid", a, void 0); + }, + 907302: (a) => { + B.Ea("Log", a, void 0); + }, + 907353: (a) => { + B.Ea("Sin", a, void 0); + }, + 907404: (a) => { + B.Ea("Cos", a, void 0); + }, + 907455: (a) => { + B.Ea("Tan", a, void 0); + }, + 907506: (a) => { + B.Ea("Asin", a, void 0); + }, + 907558: (a) => { + B.Ea( + "Acos", + a, + void 0 + ); + }, + 907610: (a) => { + B.Ea("Atan", a, void 0); + }, + 907662: (a) => { + B.Ea("Sinh", a, void 0); + }, + 907714: (a) => { + B.Ea("Cosh", a, void 0); + }, + 907766: (a) => { + B.Ea("Asinh", a, void 0); + }, + 907819: (a) => { + B.Ea("Acosh", a, void 0); + }, + 907872: (a) => { + B.Ea("Atanh", a, void 0); + }, + 907925: (a) => { + B.Ea("Tanh", a, void 0); + }, + 907977: (a) => { + B.Ea("Not", a, void 0); + }, + 908028: (a, b, c) => { + B.Ea("ClipV10", a, { min: b, max: c }); + }, + 908100: (a) => { + B.Ea("Clip", a, void 0); + }, + 908152: (a, b) => { + B.Ea("Elu", a, { alpha: b }); + }, + 908210: (a) => { + B.Ea("Relu", a, void 0); + }, + 908262: (a, b) => { + B.Ea("LeakyRelu", a, { alpha: b }); + }, + 908326: (a, b) => { + B.Ea("ThresholdedRelu", a, { alpha: b }); + }, + 908396: (a, b) => { + B.Ea("Cast", a, { to: b }); + }, + 908454: (a) => { + B.Ea("Add", a, void 0); + }, + 908505: (a) => { + B.Ea("Sub", a, void 0); + }, + 908556: (a) => { + B.Ea("Mul", a, void 0); + }, + 908607: (a) => { + B.Ea("Div", a, void 0); + }, + 908658: (a) => { + B.Ea("Pow", a, void 0); + }, + 908709: (a) => { + B.Ea("Equal", a, void 0); + }, + 908762: (a) => { + B.Ea("Greater", a, void 0); + }, + 908817: (a) => { + B.Ea("GreaterOrEqual", a, void 0); + }, + 908879: (a) => { + B.Ea("Less", a, void 0); + }, + 908931: (a) => { + B.Ea("LessOrEqual", a, void 0); + }, + 908990: (a, b, c, e, f) => { + B.Ea("ReduceMean", a, { + keepDims: !!b, + noopWithEmptyAxes: !!c, + axes: e ? Array.from(z().subarray(f >>> 0, f + e >>> 0)) : [] + }); + }, + 909154: (a, b, c, e, f) => { + B.Ea("ReduceMax", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: e ? Array.from(z().subarray(f >>> 0, f + e >>> 0)) : [] }); + }, + 909317: (a, b, c, e, f) => { + B.Ea("ReduceMin", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: e ? Array.from(z().subarray(f >>> 0, f + e >>> 0)) : [] }); + }, + 909480: (a, b, c, e, f) => { + B.Ea("ReduceProd", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: e ? Array.from(z().subarray(f >>> 0, f + e >>> 0)) : [] }); + }, + 909644: (a, b, c, e, f) => { + B.Ea("ReduceSum", a, { + keepDims: !!b, + noopWithEmptyAxes: !!c, + axes: e ? Array.from(z().subarray(f >>> 0, f + e >>> 0)) : [] + }); + }, + 909807: (a, b, c, e, f) => { + B.Ea("ReduceL1", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: e ? Array.from(z().subarray(f >>> 0, f + e >>> 0)) : [] }); + }, + 909969: (a, b, c, e, f) => { + B.Ea("ReduceL2", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: e ? Array.from(z().subarray(f >>> 0, f + e >>> 0)) : [] }); + }, + 910131: (a, b, c, e, f) => { + B.Ea("ReduceLogSum", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: e ? Array.from(z().subarray(f >>> 0, f + e >>> 0)) : [] }); + }, + 910297: (a, b, c, e, f) => { + B.Ea("ReduceSumSquare", a, { + keepDims: !!b, + noopWithEmptyAxes: !!c, + axes: e ? Array.from(z().subarray(f >>> 0, f + e >>> 0)) : [] + }); + }, + 910466: (a, b, c, e, f) => { + B.Ea("ReduceLogSumExp", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: e ? Array.from(z().subarray(f >>> 0, f + e >>> 0)) : [] }); + }, + 910635: (a) => { + B.Ea("Where", a, void 0); + }, + 910688: (a, b, c) => { + B.Ea("Transpose", a, { perm: b ? Array.from(z().subarray(c >>> 0, c + b >>> 0)) : [] }); + }, + 910801: (a, b, c, e, f, h, k, q, n, m) => { + B.Ea("Conv", a, { format: n ? "NHWC" : "NCHW", auto_pad: b, dilations: [c], group: e, kernel_shape: [f], pads: [h, k], strides: [q], w_is_const: () => !!d()[m >>> 0] }); + }, + 911029: (a, b, c, e, f, h, k, q, n, m, r, w, x, g, u) => { + B.Ea("Conv", a, { format: g ? "NHWC" : "NCHW", auto_pad: b, dilations: [c, e], group: f, kernel_shape: [h, k], pads: [q, n, m, r], strides: [w, x], w_is_const: () => !!d()[u >>> 0] }); + }, + 911288: (a, b, c, e, f, h, k, q, n, m) => { + B.Ea("Conv", a, { format: n ? "NHWC" : "NCHW", auto_pad: b, dilations: [c], group: e, kernel_shape: [f], pads: [h, k], strides: [q], w_is_const: () => !!d()[m >>> 0] }); + }, + 911516: (a, b, c, e, f, h, k, q, n, m, r, w, x, g, u) => { + B.Ea("Conv", a, { format: g ? "NHWC" : "NCHW", auto_pad: b, dilations: [c, e], group: f, kernel_shape: [ + h, + k + ], pads: [q, n, m, r], strides: [w, x], w_is_const: () => !!d()[u >>> 0] }); + }, + 911775: (a, b, c, e, f, h, k, q, n, m, r, w, x, g) => { + B.Ea("ConvTranspose", a, { format: n ? "NHWC" : "NCHW", autoPad: b, dilations: [c], group: e, kernel_shape: [f], pads: [h, k], strides: [q], wIsConst: () => !!d()[m >>> 0], outputPadding: r ? Array.from(z().subarray(w >>> 0, w + r >>> 0)) : [], outputShape: x ? Array.from(z().subarray(g >>> 0, g + x >>> 0)) : [] }); + }, + 912155: (a, b, c, e, f, h, k, q, n, m, r, w, x) => { + B.Ea("ConvTranspose", a, { format: q ? "NHWC" : "NCHW", autoPad: b, dilations: Array.from(z().subarray(c >>> 0, c + 2 >>> 0)), group: e, kernelShape: Array.from(z().subarray(f >>> 0, f + 2 >>> 0)), pads: Array.from(z().subarray(h >>> 0, h + 4 >>> 0)), strides: Array.from(z().subarray(k >>> 0, k + 2 >>> 0)), wIsConst: () => !!d()[n >>> 0], outputPadding: 0 < m ? Array.from(z().subarray(r >>> 0, r + m >>> 0)) : [], outputShape: 0 < w ? Array.from(z().subarray(x >>> 0, x + w >>> 0)) : [] }); + }, + 912678: (a, b, c, e, f, h, k, q, n, m, r, w, x, g) => { + B.Ea("ConvTranspose", a, { format: n ? "NHWC" : "NCHW", autoPad: b, dilations: [c], group: e, kernel_shape: [f], pads: [h, k], strides: [q], wIsConst: () => !!d()[m >>> 0], outputPadding: r ? Array.from(z().subarray(w >>> 0, w + r >>> 0)) : [], outputShape: x ? Array.from(z().subarray(g >>> 0, g + x >>> 0)) : [] }); + }, + 913058: (a, b, c, e, f, h, k, q, n, m, r, w, x) => { + B.Ea("ConvTranspose", a, { format: q ? "NHWC" : "NCHW", autoPad: b, dilations: Array.from(z().subarray(c >>> 0, c + 2 >>> 0)), group: e, kernelShape: Array.from(z().subarray(f >>> 0, f + 2 >>> 0)), pads: Array.from(z().subarray(h >>> 0, h + 4 >>> 0)), strides: Array.from(z().subarray(k >>> 0, k + 2 >>> 0)), wIsConst: () => !!d()[n >>> 0], outputPadding: 0 < m ? Array.from(z().subarray(r >>> 0, r + m >>> 0)) : [], outputShape: 0 < w ? Array.from(z().subarray(x >>> 0, x + w >>> 0)) : [] }); + }, + 913581: (a, b) => { + B.Ea("GlobalAveragePool", a, { format: b ? "NHWC" : "NCHW" }); + }, + 913672: (a, b, c, e, f, h, k, q, n, m, r, w, x, g, u, y) => { + B.Ea("AveragePool", a, { format: y ? "NHWC" : "NCHW", auto_pad: b, ceil_mode: c, count_include_pad: e, storage_order: f, dilations: [h, k], kernel_shape: [q, n], pads: [m, r, w, x], strides: [g, u] }); + }, + 913956: (a, b) => { + B.Ea("GlobalAveragePool", a, { format: b ? "NHWC" : "NCHW" }); + }, + 914047: (a, b, c, e, f, h, k, q, n, m, r, w, x, g, u, y) => { + B.Ea("AveragePool", a, { + format: y ? "NHWC" : "NCHW", + auto_pad: b, + ceil_mode: c, + count_include_pad: e, + storage_order: f, + dilations: [h, k], + kernel_shape: [q, n], + pads: [m, r, w, x], + strides: [g, u] + }); + }, + 914331: (a, b) => { + B.Ea("GlobalMaxPool", a, { format: b ? "NHWC" : "NCHW" }); + }, + 914418: (a, b, c, e, f, h, k, q, n, m, r, w, x, g, u, y) => { + B.Ea("MaxPool", a, { format: y ? "NHWC" : "NCHW", auto_pad: b, ceil_mode: c, count_include_pad: e, storage_order: f, dilations: [h, k], kernel_shape: [q, n], pads: [m, r, w, x], strides: [g, u] }); + }, + 914698: (a, b) => { + B.Ea("GlobalMaxPool", a, { format: b ? "NHWC" : "NCHW" }); + }, + 914785: (a, b, c, e, f, h, k, q, n, m, r, w, x, g, u, y) => { + B.Ea( + "MaxPool", + a, + { format: y ? "NHWC" : "NCHW", auto_pad: b, ceil_mode: c, count_include_pad: e, storage_order: f, dilations: [h, k], kernel_shape: [q, n], pads: [m, r, w, x], strides: [g, u] } + ); + }, + 915065: (a, b, c, e, f) => { + B.Ea("Gemm", a, { alpha: b, beta: c, transA: e, transB: f }); + }, + 915169: (a) => { + B.Ea("MatMul", a, void 0); + }, + 915223: (a, b, c, e) => { + B.Ea("ArgMax", a, { keepDims: !!b, selectLastIndex: !!c, axis: e }); + }, + 915331: (a, b, c, e) => { + B.Ea("ArgMin", a, { keepDims: !!b, selectLastIndex: !!c, axis: e }); + }, + 915439: (a, b) => { + B.Ea("Softmax", a, { axis: b }); + }, + 915502: (a, b) => { + B.Ea("Concat", a, { axis: b }); + }, + 915562: (a, b, c, e, f) => { + B.Ea("Split", a, { axis: b, numOutputs: c, splitSizes: e ? Array.from(z().subarray(f >>> 0, f + e >>> 0)) : [] }); + }, + 915707: (a) => { + B.Ea("Expand", a, void 0); + }, + 915761: (a, b) => { + B.Ea("Gather", a, { axis: Number(b) }); + }, + 915832: (a, b) => { + B.Ea("GatherElements", a, { axis: Number(b) }); + }, + 915911: (a, b, c, e, f, h, k, q, n, m, r) => { + B.Ea("Resize", a, { antialias: b, axes: c ? Array.from(z().subarray(e >>> 0, e + c >>> 0)) : [], coordinateTransformMode: V(f), cubicCoeffA: h, excludeOutside: k, extrapolationValue: q, keepAspectRatioPolicy: V(n), mode: V(m), nearestMode: V(r) }); + }, + 916262: (a, b, c, e, f, h, k) => { + B.Ea("Slice", a, { starts: b ? Array.from(z().subarray(c >>> 0, c + b >>> 0)) : [], ends: e ? Array.from(z().subarray(f >>> 0, f + e >>> 0)) : [], axes: h ? Array.from(z().subarray(k >>> 0, k + h >>> 0)) : [] }); + }, + 916493: (a) => { + B.Ea("Tile", a, void 0); + }, + 916545: (a, b, c) => { + B.Ea("LayerNormalization", a, { axis: Number(b), epsilon: Number(c) }); + }, + 916652: (a, b, c) => { + B.Ea("InstanceNormalization", a, { epsilon: b, format: c ? "NHWC" : "NCHW" }); + }, + 916766: (a, b, c) => { + B.Ea("InstanceNormalization", a, { epsilon: b, format: c ? "NHWC" : "NCHW" }); + }, + 916880: (a) => { + B.Ea( + "Range", + a, + void 0 + ); + }, + 916933: (a, b) => { + B.Ea("Einsum", a, { equation: V(b) }); + }, + 917014: (a, b, c, e, f) => { + B.Ea("Pad", a, { mode: b, value: c, pads: e ? Array.from(z().subarray(f >>> 0, f + e >>> 0)) : [] }); + }, + 917146: (a) => { + B.Ea("Gelu", a, void 0); + }, + 917198: (a) => { + B.Ea("BiasAdd", a, void 0); + }, + 917253: (a) => { + B.Ea("BiasSplitGelu", a, void 0); + }, + 917314: (a, b) => { + B.Ea("SkipLayerNormalization", a, { epsilon: b }); + }, + 917395: (a) => { + B.zb(a); + }, + 917429: (a, b) => B.Ab(a, b, B.bb.Fb, B.bb.errors), + 917541: (a) => B.wb(a), + 917574: (a) => B.yb(a), + 917606: (a, b, c) => { + B.jb(a, b, c, true); + }, + 917645: (a, b, c) => { + B.jb(a, b, c); + } + }; + function Ia(a) { + this.name = "ExitStatus"; + this.message = `Program terminated with exit(${a})`; + this.status = a; + } + function Ja(a) { + a.terminate(); + a.onmessage = () => { + }; + } + function Ka(a) { + (a = W.Qa[a]) || M(); + W.Eb(a); + } + function La(a) { + var b = W.tb(); + if (!b) + return 6; + W.Ya.push(b); + W.Qa[a.Xa] = b; + b.Xa = a.Xa; + var c = { cmd: "run", start_routine: a.Gb, arg: a.rb, pthread_ptr: a.Xa }; + G && b.unref(); + b.postMessage(c, a.Mb); + return 0; + } + var Ma = "undefined" != typeof TextDecoder ? new TextDecoder("utf8") : void 0, Na = (a, b, c) => { + b >>>= 0; + var e = b + c; + for (c = b; a[c] && !(c >= e); ) + ++c; + if (16 < c - b && a.buffer && Ma) + return Ma.decode(a.buffer instanceof SharedArrayBuffer ? a.slice(b, c) : a.subarray(b, c)); + for (e = ""; b < c; ) { + var f = a[b++]; + if (f & 128) { + var h = a[b++] & 63; + if (192 == (f & 224)) + e += String.fromCharCode((f & 31) << 6 | h); + else { + var k = a[b++] & 63; + f = 224 == (f & 240) ? (f & 15) << 12 | h << 6 | k : (f & 7) << 18 | h << 12 | k << 6 | a[b++] & 63; + 65536 > f ? e += String.fromCharCode(f) : (f -= 65536, e += String.fromCharCode(55296 | f >> 10, 56320 | f & 1023)); + } + } else + e += String.fromCharCode(f); + } + return e; + }, V = (a, b) => (a >>>= 0) ? Na(v(), a, b) : ""; + function Oa(a) { + if (H) + return X(1, 1, a); + Q = a; + if (!ya()) { + W.Hb(); + if (B.onExit) + B.onExit(a); + P = true; + } + E(a, new Ia(a)); + } + var Qa = (a) => { + Q = a; + if (H) + throw Pa(a), "unwind"; + Oa(a); + }, W = { + ab: [], + Ya: [], + mb: [], + Qa: {}, + gb: function() { + H ? W.vb() : W.ub(); + }, + ub: function() { + ua.unshift(() => { + Aa(); + W.Bb(() => Ba()); + }); + }, + vb: function() { + W.receiveObjectTransfer = W.Db; + W.threadInitTLS = W.lb; + W.setExitStatus = W.kb; + noExitRuntime = false; + }, + kb: function(a) { + Q = a; + }, + Sb: ["$terminateWorker"], + Hb: function() { + for (var a of W.Ya) + Ja(a); + for (a of W.ab) + Ja(a); + W.ab = []; + W.Ya = []; + W.Qa = []; + }, + Eb: function(a) { + var b = a.Xa; + delete W.Qa[b]; + W.ab.push(a); + W.Ya.splice(W.Ya.indexOf(a), 1); + a.Xa = 0; + Ra(b); + }, + Db: function() { + }, + lb: function() { + W.mb.forEach((a) => a()); + }, + Cb: (a) => new Promise((b) => { + a.onmessage = (h) => { + h = h.data; + var k = h.cmd; + if (h.targetThread && h.targetThread != Sa()) { + var q = W.Qa[h.Rb]; + q ? q.postMessage(h, h.transferList) : K('Internal error! Worker sent a message "' + k + '" to target pthread ' + h.targetThread + ", but that thread no longer exists!"); + } else if ("checkMailbox" === k) + Ta(); + else if ("spawnThread" === k) + La(h); + else if ("cleanupThread" === k) + Ka(h.thread); + else if ("killThread" === k) + h = h.thread, k = W.Qa[h], delete W.Qa[h], Ja(k), Ra(h), W.Ya.splice( + W.Ya.indexOf(k), + 1 + ), k.Xa = 0; + else if ("cancelThread" === k) + W.Qa[h.thread].postMessage({ cmd: "cancel" }); + else if ("loaded" === k) + a.loaded = true, b(a); + else if ("alert" === k) + alert("Thread " + h.threadId + ": " + h.text); + else if ("setimmediate" === h.target) + a.postMessage(h); + else if ("callHandler" === k) + B[h.handler](...h.args); + else + k && K("worker sent an unknown command " + k); + }; + a.onerror = (h) => { + K("worker sent an error! " + h.filename + ":" + h.lineno + ": " + h.message); + throw h; + }; + G && (a.on("message", function(h) { + a.onmessage({ data: h }); + }), a.on("error", function(h) { + a.onerror(h); + })); + var c = [], e = ["onExit", "onAbort", "print", "printErr"], f; + for (f of e) + B.hasOwnProperty(f) && c.push(f); + a.postMessage({ cmd: "load", handlers: c, urlOrBlob: B.mainScriptUrlOrBlob || _scriptDir, wasmMemory: l, wasmModule: sa }); + }), + Bb: function(a) { + a(); + }, + qb: function() { + var a = la("ort-wasm-simd-threaded.worker.js"); + a = new Worker(a); + W.ab.push(a); + }, + tb: function() { + 0 == W.ab.length && (W.qb(), W.Cb(W.ab[0])); + return W.ab.pop(); + } + }; + B.PThread = W; + var Ua = (a) => { + for (; 0 < a.length; ) + a.shift()(B); + }; + B.establishStackSpace = function() { + var a = Sa(), b = z()[a + 52 >> 2 >>> 0]; + a = z()[a + 56 >> 2 >>> 0]; + Va(b, b - a); + Wa(b); + }; + function Pa(a) { + if (H) + return X(2, 0, a); + Qa(a); + } + B.invokeEntryPoint = function(a, b) { + a = Xa.apply(null, [a, b]); + ya() ? W.kb(a) : Ya(a); + }; + function Za(a) { + this.fb = a - 24; + this.pb = function(b) { + A()[this.fb + 4 >> 2 >>> 0] = b; + }; + this.ob = function(b) { + A()[this.fb + 8 >> 2 >>> 0] = b; + }; + this.gb = function(b, c) { + this.nb(); + this.pb(b); + this.ob(c); + }; + this.nb = function() { + A()[this.fb + 16 >> 2 >>> 0] = 0; + }; + } + var $a = 0, ab = 0; + function bb(a, b, c, e) { + return H ? X(3, 1, a, b, c, e) : cb(a, b, c, e); + } + function cb(a, b, c, e) { + a >>>= 0; + b >>>= 0; + c >>>= 0; + e >>>= 0; + if ("undefined" == typeof SharedArrayBuffer) + return K("Current environment does not support SharedArrayBuffer, pthreads are not available!"), 6; + var f = []; + if (H && 0 === f.length) + return bb(a, b, c, e); + a = { Gb: c, Xa: a, rb: e, Mb: f }; + return H ? (a.Ob = "spawnThread", postMessage(a, f), 0) : La(a); + } + function db(a, b, c) { + return H ? X(4, 1, a, b, c) : 0; + } + function eb(a, b) { + if (H) + return X(5, 1, a, b); + } + var fb = (a) => { + for (var b = 0, c = 0; c < a.length; ++c) { + var e = a.charCodeAt(c); + 127 >= e ? b++ : 2047 >= e ? b += 2 : 55296 <= e && 57343 >= e ? (b += 4, ++c) : b += 3; + } + return b; + }, gb = (a, b, c, e) => { + c >>>= 0; + if (!(0 < e)) + return 0; + var f = c; + e = c + e - 1; + for (var h = 0; h < a.length; ++h) { + var k = a.charCodeAt(h); + if (55296 <= k && 57343 >= k) { + var q = a.charCodeAt(++h); + k = 65536 + ((k & 1023) << 10) | q & 1023; + } + if (127 >= k) { + if (c >= e) + break; + b[c++ >>> 0] = k; + } else { + if (2047 >= k) { + if (c + 1 >= e) + break; + b[c++ >>> 0] = 192 | k >> 6; + } else { + if (65535 >= k) { + if (c + 2 >= e) + break; + b[c++ >>> 0] = 224 | k >> 12; + } else { + if (c + 3 >= e) + break; + b[c++ >>> 0] = 240 | k >> 18; + b[c++ >>> 0] = 128 | k >> 12 & 63; + } + b[c++ >>> 0] = 128 | k >> 6 & 63; + } + b[c++ >>> 0] = 128 | k & 63; + } + } + b[c >>> 0] = 0; + return c - f; + }, hb = (a, b, c) => gb(a, v(), b, c); + function ib(a, b) { + if (H) + return X(6, 1, a, b); + } + function jb(a, b, c) { + if (H) + return X(7, 1, a, b, c); + } + function kb(a, b, c) { + return H ? X(8, 1, a, b, c) : 0; + } + function lb(a, b) { + if (H) + return X(9, 1, a, b); + } + function mb(a, b, c) { + if (H) + return X(10, 1, a, b, c); + } + function nb(a, b, c, e) { + if (H) + return X(11, 1, a, b, c, e); + } + function ob(a, b, c, e) { + if (H) + return X(12, 1, a, b, c, e); + } + function pb(a, b, c, e) { + if (H) + return X(13, 1, a, b, c, e); + } + function qb(a) { + if (H) + return X(14, 1, a); + } + function rb(a, b) { + if (H) + return X(15, 1, a, b); + } + function sb(a, b, c) { + if (H) + return X(16, 1, a, b, c); + } + var tb = (a) => { + if (!P) + try { + if (a(), !ya()) + try { + H ? Ya(Q) : Qa(Q); + } catch (b) { + b instanceof Ia || "unwind" == b || E(1, b); + } + } catch (b) { + b instanceof Ia || "unwind" == b || E(1, b); + } + }; + function ub(a) { + a >>>= 0; + "function" === typeof Atomics.Nb && (Atomics.Nb(z(), a >> 2, a).value.then(Ta), a += 128, Atomics.store(z(), a >> 2, 1)); + } + B.__emscripten_thread_mailbox_await = ub; + function Ta() { + var a = Sa(); + a && (ub(a), tb(() => vb())); + } + B.checkMailbox = Ta; + var Y = (a) => 0 === a % 4 && (0 !== a % 100 || 0 === a % 400), wb = [0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335], xb = [0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334]; + function yb(a, b, c, e, f, h, k, q) { + return H ? X(17, 1, a, b, c, e, f, h, k, q) : -52; + } + function zb(a, b, c, e, f, h, k) { + if (H) + return X(18, 1, a, b, c, e, f, h, k); + } + var Bb = (a) => { + var b = fb(a) + 1, c = Ab(b); + c && hb(a, c, b); + return c; + }, Cb = [], Db = (a, b) => { + Cb.length = 0; + var c; + for (b >>= 2; c = v()[a++ >>> 0]; ) + b += 105 != c & b, Cb.push(105 == c ? z()[b >>> 0] : da()[b++ >>> 1]), ++b; + return Cb; + }, Fb = (a) => { + var b = Eb(); + a = a(); + Wa(b); + return a; + }; + function X(a, b) { + var c = arguments.length - 2, e = arguments; + return Fb(() => { + for (var f = Gb(8 * c), h = f >> 3, k = 0; k < c; k++) { + var q = e[2 + k]; + da()[h + k >>> 0] = q; + } + return Hb(a, c, f, b); + }); + } + var Ib = [], Jb = {}, Lb = () => { + if (!Kb) { + var a = { USER: "web_user", LOGNAME: "web_user", PATH: "/", PWD: "/", HOME: "/home/web_user", LANG: ("object" == typeof navigator && navigator.languages && navigator.languages[0] || "C").replace("-", "_") + ".UTF-8", _: ja || "./this.program" }, b; + for (b in Jb) + void 0 === Jb[b] ? delete a[b] : a[b] = Jb[b]; + var c = []; + for (b in a) + c.push(`${b}=${a[b]}`); + Kb = c; + } + return Kb; + }, Kb; + function Mb(a, b) { + if (H) + return X(19, 1, a, b); + a >>>= 0; + b >>>= 0; + var c = 0; + Lb().forEach(function(e, f) { + var h = b + c; + f = A()[a + 4 * f >> 2 >>> 0] = h; + for (h = 0; h < e.length; ++h) + d()[f++ >> 0 >>> 0] = e.charCodeAt(h); + d()[f >> 0 >>> 0] = 0; + c += e.length + 1; + }); + return 0; + } + function Nb(a, b) { + if (H) + return X(20, 1, a, b); + a >>>= 0; + b >>>= 0; + var c = Lb(); + A()[a >> 2 >>> 0] = c.length; + var e = 0; + c.forEach(function(f) { + e += f.length + 1; + }); + A()[b >> 2 >>> 0] = e; + return 0; + } + function Ob(a) { + return H ? X(21, 1, a) : 52; + } + function Pb(a, b, c, e) { + return H ? X(22, 1, a, b, c, e) : 52; + } + function Qb(a, b, c, e, f) { + return H ? X(23, 1, a, b, c, e, f) : 70; + } + var Rb = [null, [], []]; + function Tb(a, b, c, e) { + if (H) + return X(24, 1, a, b, c, e); + b >>>= 0; + c >>>= 0; + e >>>= 0; + for (var f = 0, h = 0; h < c; h++) { + var k = A()[b >> 2 >>> 0], q = A()[b + 4 >> 2 >>> 0]; + b += 8; + for (var n = 0; n < q; n++) { + var m = v()[k + n >>> 0], r = Rb[a]; + 0 === m || 10 === m ? ((1 === a ? ra : K)(Na(r, 0)), r.length = 0) : r.push(m); + } + f += q; + } + A()[e >> 2 >>> 0] = f; + return 0; + } + var Ub = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], Vb = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]; + function Wb(a) { + var b = Array(fb(a) + 1); + gb(a, b, 0, b.length); + return b; + } + var Xb = (a, b) => { + d().set(a, b >>> 0); + }; + function Yb(a, b, c, e) { + function f(g, u, y) { + for (g = "number" == typeof g ? g.toString() : g || ""; g.length < u; ) + g = y[0] + g; + return g; + } + function h(g, u) { + return f(g, u, "0"); + } + function k(g, u) { + function y(Sb) { + return 0 > Sb ? -1 : 0 < Sb ? 1 : 0; + } + var O; + 0 === (O = y(g.getFullYear() - u.getFullYear())) && 0 === (O = y(g.getMonth() - u.getMonth())) && (O = y(g.getDate() - u.getDate())); + return O; + } + function q(g) { + switch (g.getDay()) { + case 0: + return new Date(g.getFullYear() - 1, 11, 29); + case 1: + return g; + case 2: + return new Date(g.getFullYear(), 0, 3); + case 3: + return new Date( + g.getFullYear(), + 0, + 2 + ); + case 4: + return new Date(g.getFullYear(), 0, 1); + case 5: + return new Date(g.getFullYear() - 1, 11, 31); + case 6: + return new Date(g.getFullYear() - 1, 11, 30); + } + } + function n(g) { + var u = g.Za; + for (g = new Date(new Date(g.$a + 1900, 0, 1).getTime()); 0 < u; ) { + var y = g.getMonth(), O = (Y(g.getFullYear()) ? Ub : Vb)[y]; + if (u > O - g.getDate()) + u -= O - g.getDate() + 1, g.setDate(1), 11 > y ? g.setMonth(y + 1) : (g.setMonth(0), g.setFullYear(g.getFullYear() + 1)); + else { + g.setDate(g.getDate() + u); + break; + } + } + y = new Date(g.getFullYear() + 1, 0, 4); + u = q(new Date( + g.getFullYear(), + 0, + 4 + )); + y = q(y); + return 0 >= k(u, g) ? 0 >= k(y, g) ? g.getFullYear() + 1 : g.getFullYear() : g.getFullYear() - 1; + } + a >>>= 0; + b >>>= 0; + c >>>= 0; + e >>>= 0; + var m = z()[e + 40 >> 2 >>> 0]; + e = { Kb: z()[e >> 2 >>> 0], Jb: z()[e + 4 >> 2 >>> 0], cb: z()[e + 8 >> 2 >>> 0], ib: z()[e + 12 >> 2 >>> 0], eb: z()[e + 16 >> 2 >>> 0], $a: z()[e + 20 >> 2 >>> 0], Wa: z()[e + 24 >> 2 >>> 0], Za: z()[e + 28 >> 2 >>> 0], Tb: z()[e + 32 >> 2 >>> 0], Ib: z()[e + 36 >> 2 >>> 0], Lb: m ? V(m) : "" }; + c = V(c); + m = { + "%c": "%a %b %d %H:%M:%S %Y", + "%D": "%m/%d/%y", + "%F": "%Y-%m-%d", + "%h": "%b", + "%r": "%I:%M:%S %p", + "%R": "%H:%M", + "%T": "%H:%M:%S", + "%x": "%m/%d/%y", + "%X": "%H:%M:%S", + "%Ec": "%c", + "%EC": "%C", + "%Ex": "%m/%d/%y", + "%EX": "%H:%M:%S", + "%Ey": "%y", + "%EY": "%Y", + "%Od": "%d", + "%Oe": "%e", + "%OH": "%H", + "%OI": "%I", + "%Om": "%m", + "%OM": "%M", + "%OS": "%S", + "%Ou": "%u", + "%OU": "%U", + "%OV": "%V", + "%Ow": "%w", + "%OW": "%W", + "%Oy": "%y" + }; + for (var r in m) + c = c.replace(new RegExp(r, "g"), m[r]); + var w = "Sunday Monday Tuesday Wednesday Thursday Friday Saturday".split(" "), x = "January February March April May June July August September October November December".split(" "); + m = { "%a": (g) => w[g.Wa].substring(0, 3), "%A": (g) => w[g.Wa], "%b": (g) => x[g.eb].substring(0, 3), "%B": (g) => x[g.eb], "%C": (g) => h((g.$a + 1900) / 100 | 0, 2), "%d": (g) => h(g.ib, 2), "%e": (g) => f(g.ib, 2, " "), "%g": (g) => n(g).toString().substring(2), "%G": (g) => n(g), "%H": (g) => h(g.cb, 2), "%I": (g) => { + g = g.cb; + 0 == g ? g = 12 : 12 < g && (g -= 12); + return h(g, 2); + }, "%j": (g) => { + for (var u = 0, y = 0; y <= g.eb - 1; u += (Y(g.$a + 1900) ? Ub : Vb)[y++]) + ; + return h(g.ib + u, 3); + }, "%m": (g) => h(g.eb + 1, 2), "%M": (g) => h(g.Jb, 2), "%n": () => "\n", "%p": (g) => 0 <= g.cb && 12 > g.cb ? "AM" : "PM", "%S": (g) => h(g.Kb, 2), "%t": () => " ", "%u": (g) => g.Wa || 7, "%U": (g) => h(Math.floor((g.Za + 7 - g.Wa) / 7), 2), "%V": (g) => { + var u = Math.floor((g.Za + 7 - (g.Wa + 6) % 7) / 7); + 2 >= (g.Wa + 371 - g.Za - 2) % 7 && u++; + if (u) + 53 == u && (y = (g.Wa + 371 - g.Za) % 7, 4 == y || 3 == y && Y(g.$a) || (u = 1)); + else { + u = 52; + var y = (g.Wa + 7 - g.Za - 1) % 7; + (4 == y || 5 == y && Y(g.$a % 400 - 1)) && u++; + } + return h(u, 2); + }, "%w": (g) => g.Wa, "%W": (g) => h(Math.floor((g.Za + 7 - (g.Wa + 6) % 7) / 7), 2), "%y": (g) => (g.$a + 1900).toString().substring(2), "%Y": (g) => g.$a + 1900, "%z": (g) => { + g = g.Ib; + var u = 0 <= g; + g = Math.abs(g) / 60; + return (u ? "+" : "-") + String("0000" + (g / 60 * 100 + g % 60)).slice(-4); + }, "%Z": (g) => g.Lb, "%%": () => "%" }; + c = c.replace(/%%/g, "\0\0"); + for (r in m) + c.includes(r) && (c = c.replace(new RegExp(r, "g"), m[r](e))); + c = c.replace(/\0\0/g, "%"); + r = Wb(c); + if (r.length > b) + return 0; + Xb(r, a); + return r.length - 1; + } + function Zb(a) { + try { + a(); + } catch (b) { + M(b); + } + } + function $b(a) { + var b = {}, c; + for (c in a) + (function(e) { + var f = a[e]; + b[e] = "function" == typeof f ? function() { + ac.push(e); + try { + return f.apply(null, arguments); + } finally { + P || (ac.pop() === e || M(), D && 1 === Z && 0 === ac.length && (Z = 0, xa += 1, Zb(bc), "undefined" != typeof Fibers && Fibers.Ub())); + } + } : f; + })(c); + return b; + } + var Z = 0, D = null, cc = 0, ac = [], dc = {}, ec = {}, fc = 0, gc = null, hc = []; + function ha() { + return new Promise((a, b) => { + gc = { resolve: a, reject: b }; + }); + } + function ic() { + var a = Ab(65548), b = a + 12; + A()[a >> 2 >>> 0] = b; + A()[a + 4 >> 2 >>> 0] = b + 65536; + b = ac[0]; + var c = dc[b]; + void 0 === c && (c = fc++, dc[b] = c, ec[c] = b); + b = c; + z()[a + 8 >> 2 >>> 0] = b; + return a; + } + function jc() { + var a = z()[D + 8 >> 2 >>> 0]; + a = N[ec[a]]; + --xa; + return a(); + } + function kc(a) { + if (!P) { + if (0 === Z) { + var b = false, c = false; + a((e = 0) => { + if (!P && (cc = e, b = true, c)) { + Z = 2; + Zb(() => lc(D)); + "undefined" != typeof Browser && Browser.hb.sb && Browser.hb.resume(); + e = false; + try { + var f = jc(); + } catch (q) { + f = q, e = true; + } + var h = false; + if (!D) { + var k = gc; + k && (gc = null, (e ? k.reject : k.resolve)(f), h = true); + } + if (e && !h) + throw f; + } + }); + c = true; + b || (Z = 1, D = ic(), "undefined" != typeof Browser && Browser.hb.sb && Browser.hb.pause(), Zb(() => mc(D))); + } else + 2 === Z ? (Z = 0, Zb(nc), oc(D), D = null, hc.forEach((e) => tb(e))) : M(`invalid state: ${Z}`); + return cc; + } + } + function pc(a) { + return kc((b) => { + a().then(b); + }); + } + W.gb(); + var qc = [null, Oa, Pa, bb, db, eb, ib, jb, kb, lb, mb, nb, ob, pb, qb, rb, sb, yb, zb, Mb, Nb, Ob, Pb, Qb, Tb], tc = { + r: function(a, b, c) { + return pc(async () => { + await B.xb(a, b, c); + }); + }, + b: function(a, b, c) { + a >>>= 0; + new Za(a).gb(b >>> 0, c >>> 0); + $a = a; + ab++; + throw $a; + }, + O: function(a) { + rc(a >>> 0, !F, 1, !ka, 131072, false); + W.lb(); + }, + l: function(a) { + a >>>= 0; + H ? postMessage({ cmd: "cleanupThread", thread: a }) : Ka(a); + }, + I: cb, + i: db, + U: eb, + E: ib, + G: jb, + V: kb, + S: lb, + K: mb, + R: nb, + p: ob, + F: pb, + C: qb, + T: rb, + D: sb, + q: () => true, + A: function(a, b) { + a >>>= 0; + a == b >>> 0 ? setTimeout(() => Ta()) : H ? postMessage({ + targetThread: a, + cmd: "checkMailbox" + }) : (a = W.Qa[a]) && a.postMessage({ cmd: "checkMailbox" }); + }, + M: function() { + return -1; + }, + N: ub, + X: function(a) { + G && W.Qa[a >>> 0].ref(); + }, + u: function(a, b, c) { + a = b + 2097152 >>> 0 < 4194305 - !!a ? (a >>> 0) + 4294967296 * b : NaN; + c >>>= 0; + a = new Date(1e3 * a); + z()[c >> 2 >>> 0] = a.getUTCSeconds(); + z()[c + 4 >> 2 >>> 0] = a.getUTCMinutes(); + z()[c + 8 >> 2 >>> 0] = a.getUTCHours(); + z()[c + 12 >> 2 >>> 0] = a.getUTCDate(); + z()[c + 16 >> 2 >>> 0] = a.getUTCMonth(); + z()[c + 20 >> 2 >>> 0] = a.getUTCFullYear() - 1900; + z()[c + 24 >> 2 >>> 0] = a.getUTCDay(); + a = (a.getTime() - Date.UTC( + a.getUTCFullYear(), + 0, + 1, + 0, + 0, + 0, + 0 + )) / 864e5 | 0; + z()[c + 28 >> 2 >>> 0] = a; + }, + v: function(a, b, c) { + a = b + 2097152 >>> 0 < 4194305 - !!a ? (a >>> 0) + 4294967296 * b : NaN; + c >>>= 0; + a = new Date(1e3 * a); + z()[c >> 2 >>> 0] = a.getSeconds(); + z()[c + 4 >> 2 >>> 0] = a.getMinutes(); + z()[c + 8 >> 2 >>> 0] = a.getHours(); + z()[c + 12 >> 2 >>> 0] = a.getDate(); + z()[c + 16 >> 2 >>> 0] = a.getMonth(); + z()[c + 20 >> 2 >>> 0] = a.getFullYear() - 1900; + z()[c + 24 >> 2 >>> 0] = a.getDay(); + b = (Y(a.getFullYear()) ? wb : xb)[a.getMonth()] + a.getDate() - 1 | 0; + z()[c + 28 >> 2 >>> 0] = b; + z()[c + 36 >> 2 >>> 0] = -(60 * a.getTimezoneOffset()); + b = new Date( + a.getFullYear(), + 6, + 1 + ).getTimezoneOffset(); + var e = new Date(a.getFullYear(), 0, 1).getTimezoneOffset(); + a = (b != e && a.getTimezoneOffset() == Math.min(e, b)) | 0; + z()[c + 32 >> 2 >>> 0] = a; + }, + w: function(a) { + a >>>= 0; + var b = new Date(z()[a + 20 >> 2 >>> 0] + 1900, z()[a + 16 >> 2 >>> 0], z()[a + 12 >> 2 >>> 0], z()[a + 8 >> 2 >>> 0], z()[a + 4 >> 2 >>> 0], z()[a >> 2 >>> 0], 0), c = z()[a + 32 >> 2 >>> 0], e = b.getTimezoneOffset(), f = new Date(b.getFullYear(), 6, 1).getTimezoneOffset(), h = new Date(b.getFullYear(), 0, 1).getTimezoneOffset(), k = Math.min(h, f); + 0 > c ? z()[a + 32 >> 2 >>> 0] = Number(f != h && k == e) : 0 < c != (k == e) && (f = Math.max(h, f), b.setTime(b.getTime() + 6e4 * ((0 < c ? k : f) - e))); + z()[a + 24 >> 2 >>> 0] = b.getDay(); + c = (Y(b.getFullYear()) ? wb : xb)[b.getMonth()] + b.getDate() - 1 | 0; + z()[a + 28 >> 2 >>> 0] = c; + z()[a >> 2 >>> 0] = b.getSeconds(); + z()[a + 4 >> 2 >>> 0] = b.getMinutes(); + z()[a + 8 >> 2 >>> 0] = b.getHours(); + z()[a + 12 >> 2 >>> 0] = b.getDate(); + z()[a + 16 >> 2 >>> 0] = b.getMonth(); + z()[a + 20 >> 2 >>> 0] = b.getYear(); + a = b.getTime() / 1e3; + return sc((U = a, 1 <= +Math.abs(U) ? 0 < U ? +Math.floor(U / 4294967296) >>> 0 : ~~+Math.ceil((U - +(~~U >>> 0)) / 4294967296) >>> 0 : 0)), a >>> 0; + }, + s: yb, + t: zb, + z: function(a, b, c) { + function e(m) { + return (m = m.toTimeString().match(/\(([A-Za-z ]+)\)$/)) ? m[1] : "GMT"; + } + a >>>= 0; + b >>>= 0; + c >>>= 0; + var f = (/* @__PURE__ */ new Date()).getFullYear(), h = new Date(f, 0, 1), k = new Date(f, 6, 1); + f = h.getTimezoneOffset(); + var q = k.getTimezoneOffset(), n = Math.max(f, q); + A()[a >> 2 >>> 0] = 60 * n; + z()[b >> 2 >>> 0] = Number(f != q); + a = e(h); + b = e(k); + a = Bb(a); + b = Bb(b); + q < f ? (A()[c >> 2 >>> 0] = a, A()[c + 4 >> 2 >>> 0] = b) : (A()[c >> 2 >>> 0] = b, A()[c + 4 >> 2 >>> 0] = a); + }, + d: () => { + M(""); + }, + c: function(a, b, c) { + a >>>= 0; + b = Db(b >>> 0, c >>> 0); + return Ha[a].apply(null, b); + }, + k: function(a, b, c) { + a >>>= 0; + b = Db(b >>> 0, c >>> 0); + return Ha[a].apply(null, b); + }, + m: function() { + }, + j: function() { + return Date.now(); + }, + W: () => { + xa += 1; + throw "unwind"; + }, + B: function() { + return 4294901760; + }, + f: () => performance.timeOrigin + performance.now(), + g: function() { + return G ? (init_os(), __toCommonJS(os_exports)).cpus().length : navigator.hardwareConcurrency; + }, + L: function(a, b, c, e) { + W.Pb = b >>> 0; + Ib.length = c; + b = e >>> 0 >> 3; + for (e = 0; e < c; e++) + Ib[e] = da()[b + e >>> 0]; + return (0 > a ? Ha[-a - 1] : qc[a]).apply(null, Ib); + }, + y: function(a) { + a >>>= 0; + var b = v().length; + if (a <= b || 4294901760 < a) + return false; + for (var c = 1; 4 >= c; c *= 2) { + var e = b * (1 + 0.2 / c); + e = Math.min(e, a + 100663296); + var f = Math; + e = Math.max(a, e); + a: { + f = f.min.call(f, 4294901760, e + (65536 - e % 65536) % 65536) - l.buffer.byteLength + 65535 >>> 16; + try { + l.grow(f); + t(); + var h = 1; + break a; + } catch (k) { + } + h = void 0; + } + if (h) + return true; + } + return false; + }, + P: Mb, + Q: Nb, + H: Qa, + h: Ob, + o: Pb, + x: Qb, + n: Tb, + a: l || B.wasmMemory, + J: Yb, + e: function(a, b, c, e) { + return Yb(a >>> 0, b >>> 0, c >>> 0, e >>> 0); + } + }; + (function() { + function a(c, e) { + c = c.exports; + c = $b(c); + N = c = uc(c); + W.mb.push(N.Da); + va.unshift(N.Y); + sa = e; + Ba(); + return c; + } + var b = { a: tc }; + Aa(); + if (B.instantiateWasm) + try { + return B.instantiateWasm(b, a); + } catch (c) { + K("Module.instantiateWasm callback failed with error: " + c), C(c); + } + Ga(b, function(c) { + a(c.instance, c.module); + }).catch(C); + return {}; + })(); + B._OrtInit = (a, b) => (B._OrtInit = N.Z)(a, b); + B._OrtGetLastError = (a, b) => (B._OrtGetLastError = N._)(a, b); + B._OrtCreateSessionOptions = (a, b, c, e, f, h, k, q, n, m) => (B._OrtCreateSessionOptions = N.$)(a, b, c, e, f, h, k, q, n, m); + B._OrtAppendExecutionProvider = (a, b) => (B._OrtAppendExecutionProvider = N.aa)(a, b); + B._OrtAddFreeDimensionOverride = (a, b, c) => (B._OrtAddFreeDimensionOverride = N.ba)(a, b, c); + B._OrtAddSessionConfigEntry = (a, b, c) => (B._OrtAddSessionConfigEntry = N.ca)(a, b, c); + B._OrtReleaseSessionOptions = (a) => (B._OrtReleaseSessionOptions = N.da)(a); + B._OrtCreateSession = (a, b, c) => (B._OrtCreateSession = N.ea)(a, b, c); + B._OrtReleaseSession = (a) => (B._OrtReleaseSession = N.fa)(a); + B._OrtGetInputOutputCount = (a, b, c) => (B._OrtGetInputOutputCount = N.ga)(a, b, c); + B._OrtGetInputName = (a, b) => (B._OrtGetInputName = N.ha)(a, b); + B._OrtGetOutputName = (a, b) => (B._OrtGetOutputName = N.ia)(a, b); + B._OrtFree = (a) => (B._OrtFree = N.ja)(a); + B._OrtCreateTensor = (a, b, c, e, f, h) => (B._OrtCreateTensor = N.ka)(a, b, c, e, f, h); + B._OrtGetTensorData = (a, b, c, e, f) => (B._OrtGetTensorData = N.la)(a, b, c, e, f); + B._OrtReleaseTensor = (a) => (B._OrtReleaseTensor = N.ma)(a); + B._OrtCreateRunOptions = (a, b, c, e) => (B._OrtCreateRunOptions = N.na)(a, b, c, e); + B._OrtAddRunConfigEntry = (a, b, c) => (B._OrtAddRunConfigEntry = N.oa)(a, b, c); + B._OrtReleaseRunOptions = (a) => (B._OrtReleaseRunOptions = N.pa)(a); + B._OrtCreateBinding = (a) => (B._OrtCreateBinding = N.qa)(a); + B._OrtBindInput = (a, b, c) => (B._OrtBindInput = N.ra)(a, b, c); + B._OrtBindOutput = (a, b, c, e) => (B._OrtBindOutput = N.sa)(a, b, c, e); + B._OrtClearBoundOutputs = (a) => (B._OrtClearBoundOutputs = N.ta)(a); + B._OrtReleaseBinding = (a) => (B._OrtReleaseBinding = N.ua)(a); + B._OrtRunWithBinding = (a, b, c, e, f) => (B._OrtRunWithBinding = N.va)(a, b, c, e, f); + B._OrtRun = (a, b, c, e, f, h, k, q) => (B._OrtRun = N.wa)(a, b, c, e, f, h, k, q); + B._OrtEndProfiling = (a) => (B._OrtEndProfiling = N.xa)(a); + B._JsepOutput = (a, b, c) => (B._JsepOutput = N.ya)(a, b, c); + B._JsepGetNodeName = (a) => (B._JsepGetNodeName = N.za)(a); + var Sa = B._pthread_self = () => (Sa = B._pthread_self = N.Aa)(), Ab = B._malloc = (a) => (Ab = B._malloc = N.Ba)(a), oc = B._free = (a) => (oc = B._free = N.Ca)(a); + B.__emscripten_tls_init = () => (B.__emscripten_tls_init = N.Da)(); + var rc = B.__emscripten_thread_init = (a, b, c, e, f, h) => (rc = B.__emscripten_thread_init = N.Fa)(a, b, c, e, f, h); + B.__emscripten_thread_crashed = () => (B.__emscripten_thread_crashed = N.Ga)(); + var Hb = (a, b, c, e) => (Hb = N.Ha)(a, b, c, e), Ra = (a) => (Ra = N.Ia)(a), Ya = B.__emscripten_thread_exit = (a) => (Ya = B.__emscripten_thread_exit = N.Ja)(a), vb = B.__emscripten_check_mailbox = () => (vb = B.__emscripten_check_mailbox = N.Ka)(), sc = (a) => (sc = N.La)(a), Va = (a, b) => (Va = N.Ma)(a, b), Eb = () => (Eb = N.Na)(), Wa = (a) => (Wa = N.Oa)(a), Gb = (a) => (Gb = N.Pa)(a), Xa = B.dynCall_ii = (a, b) => (Xa = B.dynCall_ii = N.Ra)(a, b), mc = (a) => (mc = N.Sa)(a), bc = () => (bc = N.Ta)(), lc = (a) => (lc = N.Ua)(a), nc = () => (nc = N.Va)(); + B.___start_em_js = 917678; + B.___stop_em_js = 917839; + function uc(a) { + a = Object.assign({}, a); + var b = (e) => () => e() >>> 0, c = (e) => (f) => e(f) >>> 0; + a.__errno_location = b(a.__errno_location); + a.pthread_self = b(a.pthread_self); + a.malloc = c(a.malloc); + a.stackSave = b(a.stackSave); + a.stackAlloc = c(a.stackAlloc); + return a; + } + B.keepRuntimeAlive = ya; + B.wasmMemory = l; + B.stackAlloc = Gb; + B.stackSave = Eb; + B.stackRestore = Wa; + B.UTF8ToString = V; + B.stringToUTF8 = hb; + B.lengthBytesUTF8 = fb; + B.ExitStatus = Ia; + B.PThread = W; + var vc; + S = function wc() { + vc || xc(); + vc || (S = wc); + }; + function xc() { + function a() { + if (!vc && (vc = true, B.calledRun = true, !P)) { + H || Ua(va); + fa(B); + if (B.onRuntimeInitialized) + B.onRuntimeInitialized(); + if (!H) { + if (B.postRun) + for ("function" == typeof B.postRun && (B.postRun = [B.postRun]); B.postRun.length; ) { + var b = B.postRun.shift(); + wa.unshift(b); + } + Ua(wa); + } + } + } + if (!(0 < R)) + if (H) + fa(B), H || Ua(va), startWorker(B); + else { + if (B.preRun) + for ("function" == typeof B.preRun && (B.preRun = [B.preRun]); B.preRun.length; ) + ua.unshift(B.preRun.shift()); + Ua(ua); + 0 < R || (B.setStatus ? (B.setStatus("Running..."), setTimeout(function() { + setTimeout( + function() { + B.setStatus(""); + }, + 1 + ); + a(); + }, 1)) : a()); + } + } + if (B.preInit) + for ("function" == typeof B.preInit && (B.preInit = [B.preInit]); 0 < B.preInit.length; ) + B.preInit.pop()(); + xc(); + return moduleArg.ready; + }; + })(); + if (typeof exports === "object" && typeof module === "object") + module.exports = ortWasmThreaded; + else if (typeof define === "function" && define["amd"]) + define([], () => ortWasmThreaded); + } +}); + +// web/lib/wasm/binding/ort-wasm-threaded.worker.js +var require_ort_wasm_threaded_worker = __commonJS({ + "web/lib/wasm/binding/ort-wasm-threaded.worker.js"(exports, module) { + module.exports = '"use strict";var Module={};var ENVIRONMENT_IS_NODE=typeof process=="object"&&typeof process.versions=="object"&&typeof process.versions.node=="string";if(ENVIRONMENT_IS_NODE){var nodeWorkerThreads=require("worker_threads");var parentPort=nodeWorkerThreads.parentPort;parentPort.on("message",data=>onmessage({data:data}));var fs=require("fs");Object.assign(global,{self:global,require:require,Module:Module,location:{href:__filename},Worker:nodeWorkerThreads.Worker,importScripts:f=>(0,eval)(fs.readFileSync(f,"utf8")+"//# sourceURL="+f),postMessage:msg=>parentPort.postMessage(msg),performance:global.performance||{now:Date.now}})}var initializedJS=false;function threadPrintErr(){var text=Array.prototype.slice.call(arguments).join(" ");if(ENVIRONMENT_IS_NODE){fs.writeSync(2,text+"\\n");return}console.error(text)}function threadAlert(){var text=Array.prototype.slice.call(arguments).join(" ");postMessage({cmd:"alert",text:text,threadId:Module["_pthread_self"]()})}var err=threadPrintErr;self.alert=threadAlert;Module["instantiateWasm"]=(info,receiveInstance)=>{var module=Module["wasmModule"];Module["wasmModule"]=null;var instance=new WebAssembly.Instance(module,info);return receiveInstance(instance)};self.onunhandledrejection=e=>{throw e.reason??e};function handleMessage(e){try{if(e.data.cmd==="load"){let messageQueue=[];self.onmessage=e=>messageQueue.push(e);self.startWorker=instance=>{Module=instance;postMessage({"cmd":"loaded"});for(let msg of messageQueue){handleMessage(msg)}self.onmessage=handleMessage};Module["wasmModule"]=e.data.wasmModule;for(const handler of e.data.handlers){Module[handler]=(...args)=>{postMessage({cmd:"callHandler",handler:handler,args:args})}}Module["wasmMemory"]=e.data.wasmMemory;Module["buffer"]=Module["wasmMemory"].buffer;Module["ENVIRONMENT_IS_PTHREAD"]=true;if(typeof e.data.urlOrBlob=="string"){importScripts(e.data.urlOrBlob)}else{var objectUrl=URL.createObjectURL(e.data.urlOrBlob);importScripts(objectUrl);URL.revokeObjectURL(objectUrl)}ortWasmThreaded(Module)}else if(e.data.cmd==="run"){Module["__emscripten_thread_init"](e.data.pthread_ptr,/*isMainBrowserThread=*/0,/*isMainRuntimeThread=*/0,/*canBlock=*/1);Module["__emscripten_thread_mailbox_await"](e.data.pthread_ptr);Module["establishStackSpace"]();Module["PThread"].receiveObjectTransfer(e.data);Module["PThread"].threadInitTLS();if(!initializedJS){initializedJS=true}try{Module["invokeEntryPoint"](e.data.start_routine,e.data.arg)}catch(ex){if(ex!="unwind"){throw ex}}}else if(e.data.cmd==="cancel"){if(Module["_pthread_self"]()){Module["__emscripten_thread_exit"](-1)}}else if(e.data.target==="setimmediate"){}else if(e.data.cmd==="checkMailbox"){if(initializedJS){Module["checkMailbox"]()}}else if(e.data.cmd){err("worker.js received unknown command "+e.data.cmd);err(e.data)}}catch(ex){if(Module["__emscripten_thread_crashed"]){Module["__emscripten_thread_crashed"]()}throw ex}}self.onmessage=handleMessage;\n'; + } +}); + +// web/lib/wasm/wasm-factory.ts +var ortWasmFactory, ortWasmFactoryThreaded, wasm, initialized, initializing, aborted, isMultiThreadSupported, isSimdSupported, getWasmFileName, initializeWebAssembly, getInstance; +var init_wasm_factory = __esm({ + "web/lib/wasm/wasm-factory.ts"() { + "use strict"; + init_node_path(); + if (false) { + ortWasmFactory = null; + } else { + ortWasmFactory = false ? null : require_ort_wasm_simd_jsep(); + } + ortWasmFactoryThreaded = true ? false ? null : require_ort_wasm_simd_threaded_jsep() : ortWasmFactory; + initialized = false; + initializing = false; + aborted = false; + isMultiThreadSupported = (numThreads) => { + if (numThreads === 1) { + return false; + } + if (typeof SharedArrayBuffer === "undefined") { + if (typeof self !== "undefined" && !self.crossOriginIsolated) { + console.warn( + "env.wasm.numThreads is set to " + numThreads + ", but this will not work unless you enable crossOriginIsolated mode. See https://web.dev/cross-origin-isolation-guide/ for more info." + ); + } + return false; + } + if (typeof process !== "undefined" && process.versions && process.versions.node) { + console.warn( + "env.wasm.numThreads is set to " + numThreads + ", however, currently onnxruntime-web does not support multi-threads in Node.js. Please consider using onnxruntime-node for performance critical scenarios." + ); + } + try { + if (typeof MessageChannel !== "undefined") { + new MessageChannel().port1.postMessage(new SharedArrayBuffer(1)); + } + return WebAssembly.validate(new Uint8Array([ + 0, + 97, + 115, + 109, + 1, + 0, + 0, + 0, + 1, + 4, + 1, + 96, + 0, + 0, + 3, + 2, + 1, + 0, + 5, + 4, + 1, + 3, + 1, + 1, + 10, + 11, + 1, + 9, + 0, + 65, + 0, + 254, + 16, + 2, + 0, + 26, + 11 + ])); + } catch (e) { + return false; + } + }; + isSimdSupported = () => { + try { + return WebAssembly.validate(new Uint8Array([ + 0, + 97, + 115, + 109, + 1, + 0, + 0, + 0, + 1, + 4, + 1, + 96, + 0, + 0, + 3, + 2, + 1, + 0, + 10, + 30, + 1, + 28, + 0, + 65, + 0, + 253, + 15, + 253, + 12, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 253, + 186, + 1, + 26, + 11 + ])); + } catch (e) { + return false; + } + }; + getWasmFileName = (useSimd, useThreads) => { + if (useSimd) { + if (false) { + return "ort-training-wasm-simd.wasm"; + } + return useThreads ? "ort-wasm-simd-threaded.wasm" : "ort-wasm-simd.wasm"; + } else { + return useThreads ? "ort-wasm-threaded.wasm" : "ort-wasm.wasm"; + } + }; + initializeWebAssembly = async (flags) => { + if (initialized) { + return Promise.resolve(); + } + if (initializing) { + throw new Error("multiple calls to 'initializeWebAssembly()' detected."); + } + if (aborted) { + throw new Error("previous call to 'initializeWebAssembly()' failed."); + } + initializing = true; + const timeout = flags.initTimeout; + const numThreads = flags.numThreads; + const simd = flags.simd; + const useThreads = isMultiThreadSupported(numThreads); + const useSimd = simd && isSimdSupported(); + const wasmPaths = flags.wasmPaths; + const wasmPrefixOverride = typeof wasmPaths === "string" ? wasmPaths : void 0; + const wasmFileName = getWasmFileName(useSimd, useThreads); + const wasmPathOverride = typeof wasmPaths === "object" ? wasmPaths[wasmFileName] : void 0; + let isTimeout = false; + const tasks = []; + if (timeout > 0) { + tasks.push(new Promise((resolve) => { + setTimeout(() => { + isTimeout = true; + resolve(); + }, timeout); + })); + } + tasks.push(new Promise((resolve, reject) => { + const factory = useThreads ? ortWasmFactoryThreaded : ortWasmFactory; + const config = { + locateFile: (fileName, scriptDirectory) => { + if (useThreads && fileName.endsWith(".worker.js") && typeof Blob !== "undefined") { + return URL.createObjectURL(new Blob( + [ + // This require() function is handled by esbuild plugin to load file content as string. + // eslint-disable-next-line @typescript-eslint/no-require-imports + require_ort_wasm_threaded_worker() + ], + { type: "text/javascript" } + )); + } + if (fileName.endsWith(".wasm")) { + if (wasmPathOverride) { + return wasmPathOverride; + } + const prefix = wasmPrefixOverride ?? scriptDirectory; + if (true) { + if (wasmFileName === "ort-wasm-simd.wasm") { + return prefix + "ort-wasm-simd.jsep.wasm"; + } else if (wasmFileName === "ort-wasm-simd-threaded.wasm") { + return prefix + "ort-wasm-simd-threaded.jsep.wasm"; + } + } + return prefix + wasmFileName; + } + return scriptDirectory + fileName; + } + }; + if (useThreads) { + config.numThreads = numThreads; + if (typeof Blob === "undefined") { + config.mainScriptUrlOrBlob = join(__dirname, "ort-wasm-threaded.js"); + } else { + const scriptSourceCode = `var ortWasmThreaded=${factory.toString()};`; + config.mainScriptUrlOrBlob = new Blob([scriptSourceCode], { type: "text/javascript" }); + } + } + factory(config).then( + // wasm module initialized successfully + (module) => { + initializing = false; + initialized = true; + wasm = module; + resolve(); + }, + // wasm module failed to initialize + (what) => { + initializing = false; + aborted = true; + reject(what); + } + ); + })); + await Promise.race(tasks); + if (isTimeout) { + throw new Error(`WebAssembly backend initializing failed due to timeout: ${timeout}ms`); + } + }; + getInstance = () => { + if (initialized && wasm) { + return wasm; + } + throw new Error("WebAssembly is not initialized yet."); + }; + } +}); + +// web/lib/wasm/wasm-utils.ts +var allocWasmString, iterateExtraOptions, checkLastError; +var init_wasm_utils = __esm({ + "web/lib/wasm/wasm-utils.ts"() { + "use strict"; + init_wasm_factory(); + allocWasmString = (data, allocs) => { + const wasm2 = getInstance(); + const dataLength = wasm2.lengthBytesUTF8(data) + 1; + const dataOffset = wasm2._malloc(dataLength); + wasm2.stringToUTF8(data, dataOffset, dataLength); + allocs.push(dataOffset); + return dataOffset; + }; + iterateExtraOptions = (options, prefix, seen, handler) => { + if (typeof options == "object" && options !== null) { + if (seen.has(options)) { + throw new Error("Circular reference in options"); + } else { + seen.add(options); + } + } + Object.entries(options).forEach(([key, value]) => { + const name = prefix ? prefix + key : key; + if (typeof value === "object") { + iterateExtraOptions(value, name + ".", seen, handler); + } else if (typeof value === "string" || typeof value === "number") { + handler(name, value.toString()); + } else if (typeof value === "boolean") { + handler(name, value ? "1" : "0"); + } else { + throw new Error(`Can't handle extra config type: ${typeof value}`); + } + }); + }; + checkLastError = (message) => { + const wasm2 = getInstance(); + const stack = wasm2.stackSave(); + try { + const paramsOffset = wasm2.stackAlloc(8); + wasm2._OrtGetLastError(paramsOffset, paramsOffset + 4); + const errorCode = wasm2.HEAP32[paramsOffset / 4]; + const errorMessagePointer = wasm2.HEAPU32[paramsOffset / 4 + 1]; + const errorMessage = errorMessagePointer ? wasm2.UTF8ToString(errorMessagePointer) : ""; + throw new Error(`${message} ERROR_CODE: ${errorCode}, ERROR_MESSAGE: ${errorMessage}`); + } finally { + wasm2.stackRestore(stack); + } + }; + } +}); + +// web/lib/wasm/run-options.ts +var setRunOptions; +var init_run_options = __esm({ + "web/lib/wasm/run-options.ts"() { + "use strict"; + init_wasm_factory(); + init_wasm_utils(); + setRunOptions = (options) => { + const wasm2 = getInstance(); + let runOptionsHandle = 0; + const allocs = []; + const runOptions = options || {}; + try { + if (options?.logSeverityLevel === void 0) { + runOptions.logSeverityLevel = 2; + } else if (typeof options.logSeverityLevel !== "number" || !Number.isInteger(options.logSeverityLevel) || options.logSeverityLevel < 0 || options.logSeverityLevel > 4) { + throw new Error(`log serverity level is not valid: ${options.logSeverityLevel}`); + } + if (options?.logVerbosityLevel === void 0) { + runOptions.logVerbosityLevel = 0; + } else if (typeof options.logVerbosityLevel !== "number" || !Number.isInteger(options.logVerbosityLevel)) { + throw new Error(`log verbosity level is not valid: ${options.logVerbosityLevel}`); + } + if (options?.terminate === void 0) { + runOptions.terminate = false; + } + let tagDataOffset = 0; + if (options?.tag !== void 0) { + tagDataOffset = allocWasmString(options.tag, allocs); + } + runOptionsHandle = wasm2._OrtCreateRunOptions( + runOptions.logSeverityLevel, + runOptions.logVerbosityLevel, + !!runOptions.terminate, + tagDataOffset + ); + if (runOptionsHandle === 0) { + checkLastError("Can't create run options."); + } + if (options?.extra !== void 0) { + iterateExtraOptions(options.extra, "", /* @__PURE__ */ new WeakSet(), (key, value) => { + const keyDataOffset = allocWasmString(key, allocs); + const valueDataOffset = allocWasmString(value, allocs); + if (wasm2._OrtAddRunConfigEntry(runOptionsHandle, keyDataOffset, valueDataOffset) !== 0) { + checkLastError(`Can't set a run config entry: ${key} - ${value}.`); + } + }); + } + return [runOptionsHandle, allocs]; + } catch (e) { + if (runOptionsHandle !== 0) { + wasm2._OrtReleaseRunOptions(runOptionsHandle); + } + allocs.forEach((alloc) => wasm2._free(alloc)); + throw e; + } + }; + } +}); + +// web/lib/wasm/session-options.ts +var getGraphOptimzationLevel, getExecutionMode, appendDefaultOptions, setExecutionProviders, setSessionOptions; +var init_session_options = __esm({ + "web/lib/wasm/session-options.ts"() { + "use strict"; + init_wasm_factory(); + init_wasm_utils(); + getGraphOptimzationLevel = (graphOptimizationLevel) => { + switch (graphOptimizationLevel) { + case "disabled": + return 0; + case "basic": + return 1; + case "extended": + return 2; + case "all": + return 99; + default: + throw new Error(`unsupported graph optimization level: ${graphOptimizationLevel}`); + } + }; + getExecutionMode = (executionMode) => { + switch (executionMode) { + case "sequential": + return 0; + case "parallel": + return 1; + default: + throw new Error(`unsupported execution mode: ${executionMode}`); + } + }; + appendDefaultOptions = (options) => { + if (!options.extra) { + options.extra = {}; + } + if (!options.extra.session) { + options.extra.session = {}; + } + const session = options.extra.session; + if (!session.use_ort_model_bytes_directly) { + session.use_ort_model_bytes_directly = "1"; + } + if (options.executionProviders && options.executionProviders.some((ep) => (typeof ep === "string" ? ep : ep.name) === "webgpu")) { + options.enableMemPattern = false; + } + }; + setExecutionProviders = (sessionOptionsHandle, executionProviders, allocs) => { + for (const ep of executionProviders) { + let epName = typeof ep === "string" ? ep : ep.name; + switch (epName) { + case "webnn": + epName = "WEBNN"; + if (typeof ep !== "string") { + const webnnOptions = ep; + if (webnnOptions?.deviceType) { + const keyDataOffset = allocWasmString("deviceType", allocs); + const valueDataOffset = allocWasmString(webnnOptions.deviceType, allocs); + if (getInstance()._OrtAddSessionConfigEntry(sessionOptionsHandle, keyDataOffset, valueDataOffset) !== 0) { + checkLastError(`Can't set a session config entry: 'deviceType' - ${webnnOptions.deviceType}.`); + } + } + if (webnnOptions?.numThreads) { + let numThreads = webnnOptions.numThreads; + if (typeof numThreads != "number" || !Number.isInteger(numThreads) || numThreads < 0) { + numThreads = 0; + } + const keyDataOffset = allocWasmString("numThreads", allocs); + const valueDataOffset = allocWasmString(numThreads.toString(), allocs); + if (getInstance()._OrtAddSessionConfigEntry(sessionOptionsHandle, keyDataOffset, valueDataOffset) !== 0) { + checkLastError(`Can't set a session config entry: 'numThreads' - ${webnnOptions.numThreads}.`); + } + } + if (webnnOptions?.powerPreference) { + const keyDataOffset = allocWasmString("powerPreference", allocs); + const valueDataOffset = allocWasmString(webnnOptions.powerPreference, allocs); + if (getInstance()._OrtAddSessionConfigEntry(sessionOptionsHandle, keyDataOffset, valueDataOffset) !== 0) { + checkLastError( + `Can't set a session config entry: 'powerPreference' - ${webnnOptions.powerPreference}.` + ); + } + } + } + break; + case "webgpu": + epName = "JS"; + if (typeof ep !== "string") { + const webgpuOptions = ep; + if (webgpuOptions?.preferredLayout) { + if (webgpuOptions.preferredLayout !== "NCHW" && webgpuOptions.preferredLayout !== "NHWC") { + throw new Error(`preferredLayout must be either 'NCHW' or 'NHWC': ${webgpuOptions.preferredLayout}`); + } + const keyDataOffset = allocWasmString("preferredLayout", allocs); + const valueDataOffset = allocWasmString(webgpuOptions.preferredLayout, allocs); + if (getInstance()._OrtAddSessionConfigEntry(sessionOptionsHandle, keyDataOffset, valueDataOffset) !== 0) { + checkLastError( + `Can't set a session config entry: 'preferredLayout' - ${webgpuOptions.preferredLayout}.` + ); + } + } + } + break; + case "wasm": + case "cpu": + continue; + default: + throw new Error(`not supported execution provider: ${epName}`); + } + const epNameDataOffset = allocWasmString(epName, allocs); + if (getInstance()._OrtAppendExecutionProvider(sessionOptionsHandle, epNameDataOffset) !== 0) { + checkLastError(`Can't append execution provider: ${epName}.`); + } + } + }; + setSessionOptions = (options) => { + const wasm2 = getInstance(); + let sessionOptionsHandle = 0; + const allocs = []; + const sessionOptions = options || {}; + appendDefaultOptions(sessionOptions); + try { + const graphOptimizationLevel = getGraphOptimzationLevel(sessionOptions.graphOptimizationLevel ?? "all"); + const executionMode = getExecutionMode(sessionOptions.executionMode ?? "sequential"); + const logIdDataOffset = typeof sessionOptions.logId === "string" ? allocWasmString(sessionOptions.logId, allocs) : 0; + const logSeverityLevel = sessionOptions.logSeverityLevel ?? 2; + if (!Number.isInteger(logSeverityLevel) || logSeverityLevel < 0 || logSeverityLevel > 4) { + throw new Error(`log serverity level is not valid: ${logSeverityLevel}`); + } + const logVerbosityLevel = sessionOptions.logVerbosityLevel ?? 0; + if (!Number.isInteger(logVerbosityLevel) || logVerbosityLevel < 0 || logVerbosityLevel > 4) { + throw new Error(`log verbosity level is not valid: ${logVerbosityLevel}`); + } + const optimizedModelFilePathOffset = typeof sessionOptions.optimizedModelFilePath === "string" ? allocWasmString(sessionOptions.optimizedModelFilePath, allocs) : 0; + sessionOptionsHandle = wasm2._OrtCreateSessionOptions( + graphOptimizationLevel, + !!sessionOptions.enableCpuMemArena, + !!sessionOptions.enableMemPattern, + executionMode, + !!sessionOptions.enableProfiling, + 0, + logIdDataOffset, + logSeverityLevel, + logVerbosityLevel, + optimizedModelFilePathOffset + ); + if (sessionOptionsHandle === 0) { + checkLastError("Can't create session options."); + } + if (sessionOptions.executionProviders) { + setExecutionProviders(sessionOptionsHandle, sessionOptions.executionProviders, allocs); + } + if (sessionOptions.enableGraphCapture !== void 0) { + if (typeof sessionOptions.enableGraphCapture !== "boolean") { + throw new Error(`enableGraphCapture must be a boolean value: ${sessionOptions.enableGraphCapture}`); + } + const keyDataOffset = allocWasmString("enableGraphCapture", allocs); + const valueDataOffset = allocWasmString(sessionOptions.enableGraphCapture.toString(), allocs); + if (wasm2._OrtAddSessionConfigEntry(sessionOptionsHandle, keyDataOffset, valueDataOffset) !== 0) { + checkLastError( + `Can't set a session config entry: 'enableGraphCapture' - ${sessionOptions.enableGraphCapture}.` + ); + } + } + if (sessionOptions.freeDimensionOverrides) { + for (const [name, value] of Object.entries(sessionOptions.freeDimensionOverrides)) { + if (typeof name !== "string") { + throw new Error(`free dimension override name must be a string: ${name}`); + } + if (typeof value !== "number" || !Number.isInteger(value) || value < 0) { + throw new Error(`free dimension override value must be a non-negative integer: ${value}`); + } + const nameOffset = allocWasmString(name, allocs); + if (wasm2._OrtAddFreeDimensionOverride(sessionOptionsHandle, nameOffset, value) !== 0) { + checkLastError(`Can't set a free dimension override: ${name} - ${value}.`); + } + } + } + if (sessionOptions.extra !== void 0) { + iterateExtraOptions(sessionOptions.extra, "", /* @__PURE__ */ new WeakSet(), (key, value) => { + const keyDataOffset = allocWasmString(key, allocs); + const valueDataOffset = allocWasmString(value, allocs); + if (wasm2._OrtAddSessionConfigEntry(sessionOptionsHandle, keyDataOffset, valueDataOffset) !== 0) { + checkLastError(`Can't set a session config entry: ${key} - ${value}.`); + } + }); + } + return [sessionOptionsHandle, allocs]; + } catch (e) { + if (sessionOptionsHandle !== 0) { + wasm2._OrtReleaseSessionOptions(sessionOptionsHandle); + } + allocs.forEach((alloc) => wasm2._free(alloc)); + throw e; + } + }; + } +}); + +// web/lib/wasm/wasm-common.ts +var tensorDataTypeStringToEnum, tensorDataTypeEnumToString, getTensorElementSize, tensorTypeToTypedArrayConstructor, logLevelStringToEnum, isGpuBufferSupportedType, dataLocationStringToEnum; +var init_wasm_common = __esm({ + "web/lib/wasm/wasm-common.ts"() { + "use strict"; + tensorDataTypeStringToEnum = (type) => { + switch (type) { + case "int8": + return 3 /* int8 */; + case "uint8": + return 2 /* uint8 */; + case "bool": + return 9 /* bool */; + case "int16": + return 5 /* int16 */; + case "uint16": + return 4 /* uint16 */; + case "int32": + return 6 /* int32 */; + case "uint32": + return 12 /* uint32 */; + case "float16": + return 10 /* float16 */; + case "float32": + return 1 /* float */; + case "float64": + return 11 /* double */; + case "string": + return 8 /* string */; + case "int64": + return 7 /* int64 */; + case "uint64": + return 13 /* uint64 */; + default: + throw new Error(`unsupported data type: ${type}`); + } + }; + tensorDataTypeEnumToString = (typeProto) => { + switch (typeProto) { + case 3 /* int8 */: + return "int8"; + case 2 /* uint8 */: + return "uint8"; + case 9 /* bool */: + return "bool"; + case 5 /* int16 */: + return "int16"; + case 4 /* uint16 */: + return "uint16"; + case 6 /* int32 */: + return "int32"; + case 12 /* uint32 */: + return "uint32"; + case 10 /* float16 */: + return "float16"; + case 1 /* float */: + return "float32"; + case 11 /* double */: + return "float64"; + case 8 /* string */: + return "string"; + case 7 /* int64 */: + return "int64"; + case 13 /* uint64 */: + return "uint64"; + default: + throw new Error(`unsupported data type: ${typeProto}`); + } + }; + getTensorElementSize = (dateType) => [void 0, 4, 1, 1, 2, 2, 4, 8, void 0, 1, 2, 8, 4, 8, void 0, void 0, void 0][dateType]; + tensorTypeToTypedArrayConstructor = (type) => { + switch (type) { + case "float16": + return typeof Float16Array !== "undefined" && Float16Array.from ? Float16Array : Uint16Array; + case "float32": + return Float32Array; + case "uint8": + return Uint8Array; + case "int8": + return Int8Array; + case "uint16": + return Uint16Array; + case "int16": + return Int16Array; + case "int32": + return Int32Array; + case "bool": + return Uint8Array; + case "float64": + return Float64Array; + case "uint32": + return Uint32Array; + case "int64": + return BigInt64Array; + case "uint64": + return BigUint64Array; + default: + throw new Error(`unsupported type: ${type}`); + } + }; + logLevelStringToEnum = (logLevel) => { + switch (logLevel) { + case "verbose": + return 0; + case "info": + return 1; + case "warning": + return 2; + case "error": + return 3; + case "fatal": + return 4; + default: + throw new Error(`unsupported logging level: ${logLevel}`); + } + }; + isGpuBufferSupportedType = (type) => type === "float32" || type === "float16" || type === "int32" || type === "int64" || type === "uint32" || type === "uint8" || type === "bool"; + dataLocationStringToEnum = (location) => { + switch (location) { + case "none": + return 0; + case "cpu": + return 1; + case "cpu-pinned": + return 2; + case "texture": + return 3; + case "gpu-buffer": + return 4; + default: + throw new Error(`unsupported data location: ${location}`); + } + }; + } +}); + +// nodejs-ignore:node:fs/promises +var readFile2; +var init_promises = __esm({ + "nodejs-ignore:node:fs/promises"() { + readFile2 = void 0; + } +}); + +// web/lib/wasm/wasm-utils-load-file.ts +var loadFile; +var init_wasm_utils_load_file = __esm({ + "web/lib/wasm/wasm-utils-load-file.ts"() { + "use strict"; + init_fs(); + init_promises(); + loadFile = async (file) => { + if (typeof file === "string") { + if (typeof process !== "undefined" && process.versions && process.versions.node) { + try { + return new Uint8Array(await readFile2(file)); + } catch (e) { + if (e.code === "ERR_FS_FILE_TOO_LARGE") { + const stream = createReadStream(file); + const chunks = []; + for await (const chunk of stream) { + chunks.push(chunk); + } + return new Uint8Array(Buffer.concat(chunks)); + } + throw e; + } + } else { + const response = await fetch(file); + if (!response.ok) { + throw new Error(`failed to load external data file: ${file}`); + } + const contentLengthHeader = response.headers.get("Content-Length"); + const fileSize = contentLengthHeader ? parseInt(contentLengthHeader, 10) : 0; + if (fileSize < 1073741824) { + return new Uint8Array(await response.arrayBuffer()); + } else { + if (!response.body) { + throw new Error(`failed to load external data file: ${file}, no response body.`); + } + const reader = response.body.getReader(); + let buffer; + try { + buffer = new ArrayBuffer(fileSize); + } catch (e) { + if (e instanceof RangeError) { + const pages = Math.ceil(fileSize / 65536); + buffer = new WebAssembly.Memory({ initial: pages, maximum: pages }).buffer; + } else { + throw e; + } + } + let offset = 0; + while (true) { + const { done, value } = await reader.read(); + if (done) { + break; + } + const chunkSize = value.byteLength; + const chunk = new Uint8Array(buffer, offset, chunkSize); + chunk.set(value); + offset += chunkSize; + } + return new Uint8Array(buffer, 0, fileSize); + } + } + } else if (file instanceof Blob) { + return new Uint8Array(await file.arrayBuffer()); + } else if (file instanceof Uint8Array) { + return file; + } else { + return new Uint8Array(file); + } + }; + } +}); + +// web/lib/wasm/jsep/log.ts +var logLevelPrefix, doLog, configLogLevel, debug, configureLogger, LOG, LOG_DEBUG; +var init_log = __esm({ + "web/lib/wasm/jsep/log.ts"() { + "use strict"; + init_wasm_common(); + logLevelPrefix = ["V", "I", "W", "E", "F"]; + doLog = (level, message) => { + console.log(`[${logLevelPrefix[level]},${(/* @__PURE__ */ new Date()).toISOString()}]${message}`); + }; + configureLogger = ($configLogLevel, $debug) => { + configLogLevel = $configLogLevel; + debug = $debug; + }; + LOG = (logLevel, msg) => { + const messageLevel = logLevelStringToEnum(logLevel); + const configLevel = logLevelStringToEnum(configLogLevel); + if (messageLevel >= configLevel) { + doLog(messageLevel, typeof msg === "function" ? msg() : msg); + } + }; + LOG_DEBUG = (...args) => { + if (debug) { + LOG(...args); + } + }; + } +}); + +// web/lib/wasm/jsep/tensor-view.ts +var createView; +var init_tensor_view = __esm({ + "web/lib/wasm/jsep/tensor-view.ts"() { + "use strict"; + init_wasm_common(); + createView = (dataBuffer, type) => new (tensorTypeToTypedArrayConstructor(type))(dataBuffer); + } +}); + +// web/lib/wasm/jsep/webgpu/types.ts +var init_types = __esm({ + "web/lib/wasm/jsep/webgpu/types.ts"() { + "use strict"; + } +}); + +// web/lib/wasm/jsep/webgpu/gpu-data-manager.ts +var bucketFreelist, bucketArr, calcNormalizedBufferSize, calcBucketBufferSize, guid, createNewGpuDataId, downloadGpuData, GpuDataManagerImpl, createGpuDataManager; +var init_gpu_data_manager = __esm({ + "web/lib/wasm/jsep/webgpu/gpu-data-manager.ts"() { + "use strict"; + init_log(); + init_types(); + bucketFreelist = /* @__PURE__ */ new Map([ + [64, 250], + [128, 200], + [256, 200], + [512, 200], + [2048, 230], + [4096, 200], + [8192, 50], + [16384, 50], + [32768, 50], + [65536, 50], + [131072, 50], + [262144, 50], + [524288, 50], + [1048576, 50], + [2097152, 30], + [4194304, 20], + [8388608, 10], + [12582912, 10], + [16777216, 10], + [26214400, 15], + [33554432, 22], + [44236800, 2], + [58982400, 6], + // we don't want to cache the bucket sizes below but not caching them + // results in some major performance hits for models like sd-turbo. + [67108864, 6], + [134217728, 6], + [167772160, 6] + ]); + bucketArr = []; + calcNormalizedBufferSize = (size) => Math.ceil(size / 16) * 16; + calcBucketBufferSize = (size) => { + for (let idx = 0; idx < bucketArr.length; idx++) { + const sizeForBucket = bucketArr[idx]; + if (size <= sizeForBucket) { + return sizeForBucket; + } + } + return Math.ceil(size / 16) * 16; + }; + guid = 1; + createNewGpuDataId = () => guid++; + downloadGpuData = async (backend, gpuBuffer, originalSize, getTargetBuffer) => { + const bufferSize = calcNormalizedBufferSize(originalSize); + const gpuReadBuffer = backend.device.createBuffer( + // eslint-disable-next-line no-bitwise + { size: bufferSize, usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ } + ); + try { + const commandEncoder = backend.getCommandEncoder(); + backend.endComputePass(); + commandEncoder.copyBufferToBuffer( + gpuBuffer, + 0, + gpuReadBuffer, + 0, + bufferSize + /* size */ + ); + backend.flush(); + await gpuReadBuffer.mapAsync(GPUMapMode.READ); + const arrayBuffer = gpuReadBuffer.getMappedRange(); + if (getTargetBuffer) { + const targetBuffer = getTargetBuffer(); + targetBuffer.set(new Uint8Array(arrayBuffer, 0, originalSize)); + return targetBuffer; + } else { + return new Uint8Array(arrayBuffer.slice(0, originalSize)); + } + } finally { + gpuReadBuffer.destroy(); + } + }; + GpuDataManagerImpl = class { + constructor(backend) { + this.backend = backend; + this.storageCache = /* @__PURE__ */ new Map(); + this.freeBuffers = /* @__PURE__ */ new Map(); + this.freeUniformBuffers = /* @__PURE__ */ new Map(); + this.buffersForUploadingPending = []; + this.buffersPending = []; + this.externalBuffers = /* @__PURE__ */ new Map(); + this.capturedPendingBuffers = /* @__PURE__ */ new Map(); + for (const [key] of bucketFreelist) { + bucketArr.push(key); + this.freeBuffers.set(key, []); + this.freeUniformBuffers.set(key, []); + } + } + upload(id, data) { + const srcArrayBuffer = data.buffer; + const srcOffset = data.byteOffset; + const srcLength = data.byteLength; + const size = calcNormalizedBufferSize(srcLength); + const gpuDataCache = this.storageCache.get(id); + if (!gpuDataCache) { + throw new Error("gpu data for uploading does not exist"); + } + if (gpuDataCache.originalSize !== srcLength) { + throw new Error(`inconsistent data size. gpu data size=${gpuDataCache.originalSize}, data size=${srcLength}`); + } + const gpuBufferForUploading = this.backend.device.createBuffer( + // eslint-disable-next-line no-bitwise + { mappedAtCreation: true, size, usage: GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC } + ); + const arrayBuffer = gpuBufferForUploading.getMappedRange(); + new Uint8Array(arrayBuffer).set(new Uint8Array(srcArrayBuffer, srcOffset, srcLength)); + gpuBufferForUploading.unmap(); + const commandEncoder = this.backend.getCommandEncoder(); + this.backend.endComputePass(); + commandEncoder.copyBufferToBuffer(gpuBufferForUploading, 0, gpuDataCache.gpuData.buffer, 0, size); + LOG_DEBUG("verbose", () => `[WebGPU] GpuDataManager.upload(id=${id})`); + this.buffersForUploadingPending.push(gpuBufferForUploading); + } + memcpy(sourceId, destinationId) { + const sourceGpuDataCache = this.storageCache.get(sourceId); + if (!sourceGpuDataCache) { + throw new Error("source gpu data for memcpy does not exist"); + } + const destinationGpuDataCache = this.storageCache.get(destinationId); + if (!destinationGpuDataCache) { + throw new Error("destination gpu data for memcpy does not exist"); + } + if (sourceGpuDataCache.originalSize !== destinationGpuDataCache.originalSize) { + throw new Error("inconsistent source and destination gpu data size"); + } + const size = calcNormalizedBufferSize(sourceGpuDataCache.originalSize); + const commandEncoder = this.backend.getCommandEncoder(); + this.backend.endComputePass(); + commandEncoder.copyBufferToBuffer( + sourceGpuDataCache.gpuData.buffer, + 0, + destinationGpuDataCache.gpuData.buffer, + 0, + size + ); + } + registerExternalBuffer(buffer, originalSize, previousBuffer) { + let id; + if (previousBuffer) { + id = this.externalBuffers.get(previousBuffer); + if (id === void 0) { + throw new Error("previous buffer is not registered"); + } + if (buffer === previousBuffer) { + LOG_DEBUG( + "verbose", + () => `[WebGPU] GpuDataManager.registerExternalBuffer(size=${originalSize}) => id=${id}, buffer is the same, skip.` + ); + return id; + } else if (this.backend.capturedCommandList.has(this.backend.currentSessionId)) { + throw new Error(`Registering a different external buffer under graph capture mode is not supported yet. + Please use the previous external buffer!`); + } + this.externalBuffers.delete(previousBuffer); + } else { + id = createNewGpuDataId(); + } + this.storageCache.set(id, { gpuData: { id, type: 0 /* default */, buffer }, originalSize }); + this.externalBuffers.set(buffer, id); + LOG_DEBUG( + "verbose", + () => `[WebGPU] GpuDataManager.registerExternalBuffer(size=${originalSize}) => id=${id}, registered.` + ); + return id; + } + unregisterExternalBuffer(buffer) { + const id = this.externalBuffers.get(buffer); + if (id !== void 0) { + this.storageCache.delete(id); + this.externalBuffers.delete(buffer); + LOG_DEBUG("verbose", () => `[WebGPU] GpuDataManager.unregisterExternalBuffer() => id=${id}`); + } + } + // eslint-disable-next-line no-bitwise + create(size, usage = GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST) { + const bufferSize = calcBucketBufferSize(size); + let gpuBuffer; + const isStorage = (usage & GPUBufferUsage.STORAGE) === GPUBufferUsage.STORAGE; + const isUniform = (usage & GPUBufferUsage.UNIFORM) === GPUBufferUsage.UNIFORM; + if (isStorage || isUniform) { + const freeBuffers = isStorage ? this.freeBuffers : this.freeUniformBuffers; + const buffers = freeBuffers.get(bufferSize); + if (!buffers) { + gpuBuffer = this.backend.device.createBuffer({ size: bufferSize, usage }); + } else { + if (buffers.length > 0) { + gpuBuffer = buffers.pop(); + } else { + gpuBuffer = this.backend.device.createBuffer({ size: bufferSize, usage }); + } + } + } else { + gpuBuffer = this.backend.device.createBuffer({ size: bufferSize, usage }); + } + const gpuData = { id: createNewGpuDataId(), type: 0 /* default */, buffer: gpuBuffer }; + this.storageCache.set(gpuData.id, { gpuData, originalSize: size }); + LOG_DEBUG("verbose", () => `[WebGPU] GpuDataManager.create(size=${size}) => id=${gpuData.id}`); + return gpuData; + } + get(id) { + return this.storageCache.get(id)?.gpuData; + } + release(id) { + const cachedData = this.storageCache.get(id); + if (!cachedData) { + throw new Error("releasing data does not exist"); + } + LOG_DEBUG("verbose", () => `[WebGPU] GpuDataManager.release(id=${id}), gpuDataId=${cachedData.gpuData.id}`); + this.storageCache.delete(id); + this.buffersPending.push(cachedData.gpuData.buffer); + return cachedData.originalSize; + } + async download(id, getTargetBuffer) { + const cachedData = this.storageCache.get(id); + if (!cachedData) { + throw new Error("data does not exist"); + } + await downloadGpuData(this.backend, cachedData.gpuData.buffer, cachedData.originalSize, getTargetBuffer); + } + refreshPendingBuffers() { + for (const buffer of this.buffersForUploadingPending) { + buffer.destroy(); + } + this.buffersForUploadingPending = []; + if (this.buffersPending.length === 0) { + return; + } + if (this.backend.sessionStatus === "default") { + for (const buffer of this.buffersPending) { + const maxInFreeList = bucketFreelist.get(buffer.size); + if ((buffer.usage & GPUBufferUsage.STORAGE) === GPUBufferUsage.STORAGE) { + const freelist = this.freeBuffers.get(buffer.size) || []; + if (maxInFreeList === void 0 || freelist.length >= maxInFreeList) { + buffer.destroy(); + } else { + freelist.push(buffer); + } + } else if ((buffer.usage & GPUBufferUsage.UNIFORM) === GPUBufferUsage.UNIFORM) { + const freelist = this.freeUniformBuffers.get(buffer.size) || []; + if (maxInFreeList === void 0 || freelist.length >= maxInFreeList) { + buffer.destroy(); + } else { + freelist.push(buffer); + } + } else { + buffer.destroy(); + } + } + this.buffersPending = []; + } else { + let capturedBuffers = this.capturedPendingBuffers.get(this.backend.currentSessionId); + if (!capturedBuffers) { + capturedBuffers = []; + this.capturedPendingBuffers.set(this.backend.currentSessionId, capturedBuffers); + } + for (const buffer of this.buffersPending) { + capturedBuffers.push(buffer); + } + this.buffersPending = []; + } + } + dispose() { + this.freeBuffers.forEach((buffers) => { + buffers.forEach((buffer) => { + buffer.destroy(); + }); + }); + this.freeUniformBuffers.forEach((buffers) => { + buffers.forEach((buffer) => { + buffer.destroy(); + }); + }); + this.storageCache.forEach((storage) => { + storage.gpuData.buffer.destroy(); + }); + this.capturedPendingBuffers.forEach((buffers) => { + buffers.forEach((buffer) => { + buffer.destroy(); + }); + }); + this.storageCache = /* @__PURE__ */ new Map(); + this.freeBuffers = /* @__PURE__ */ new Map(); + this.freeUniformBuffers = /* @__PURE__ */ new Map(); + this.capturedPendingBuffers = /* @__PURE__ */ new Map(); + } + onReleaseSession(sessionId) { + const pendingBuffers = this.capturedPendingBuffers.get(sessionId); + if (pendingBuffers) { + pendingBuffers.forEach((buffer) => { + buffer.destroy(); + }); + this.capturedPendingBuffers.delete(sessionId); + } + } + }; + createGpuDataManager = (...args) => new GpuDataManagerImpl(...args); + } +}); + +// web/lib/wasm/jsep/webgpu/attribute-with-cache-key.ts +var AttributeWithCacheKeyImpl, createAttributeWithCacheKey; +var init_attribute_with_cache_key = __esm({ + "web/lib/wasm/jsep/webgpu/attribute-with-cache-key.ts"() { + "use strict"; + AttributeWithCacheKeyImpl = class { + constructor(attribute) { + Object.assign(this, attribute); + } + get cacheKey() { + if (!this.key) { + this.key = Object.getOwnPropertyNames(this).sort().map((name) => `${this[name]}`).join(";"); + } + return this.key; + } + }; + createAttributeWithCacheKey = (attribute) => new AttributeWithCacheKeyImpl(attribute); + } +}); + +// web/lib/wasm/jsep/util.ts +var MatMulUtil, BroadcastUtil, ShapeUtil, PoolConvUtil, GemmUtil, MIN_CLIP, MAX_CLIP; +var init_util = __esm({ + "web/lib/wasm/jsep/util.ts"() { + "use strict"; + MatMulUtil = class { + /** + * Calculate the expected shape when matrix multiplication + * @param a The shape of tensor A. Should be a tuple of 2 positive integers + * @param b The shape of tensor B. Should be a tuple of 2 positive integers + * @returns The expected shape of the result, or undefined if N/A + */ + static calcMatMulShape(a, b) { + return a[1] !== b[0] ? void 0 : [a[0], b[1]]; + } + }; + BroadcastUtil = class { + /** + * Calculate the expected shape when broadcasting 2 tensors + * @param a The shape of tensor A. Should be an array of positive integers + * @param b The shape of tensor B. Should be an array of positive integers + * @param isMatMul Whether the operation is MatMul + * @returns The expected shape of the result, or undefined if N/A + */ + static calcShape(adims, bdims, isMatMul = false) { + const arank = adims.length; + const brank = bdims.length; + if (arank === 0) { + return bdims; + } + if (brank === 0) { + return adims; + } + const crank = Math.max(adims.length, bdims.length); + const cdims = new Array(crank); + if (isMatMul) { + if (arank < 2 || brank < 2) { + return void 0; + } + const cShapeMatMul = MatMulUtil.calcMatMulShape([adims[arank - 2], adims[arank - 1]], [bdims[brank - 2], bdims[brank - 1]]); + if (cShapeMatMul === void 0) { + return void 0; + } + [cdims[crank - 2], cdims[crank - 1]] = cShapeMatMul; + } + for (let i = isMatMul ? 3 : 1; i <= crank; i++) { + const aLen = arank - i < 0 ? 1 : adims[arank - i]; + const bLen = brank - i < 0 ? 1 : bdims[brank - i]; + if (aLen !== bLen && aLen > 1 && bLen > 1) { + return void 0; + } + const max = Math.max(aLen, bLen); + if (aLen && bLen) { + cdims[crank - i] = Math.max(aLen, bLen); + } else { + if (max > 1) { + return void 0; + } + cdims[crank - i] = 0; + } + } + return cdims; + } + /** + * Determine if a shape is unidirectional broadcastable to another shape + * @param shape The input shape + * @param finalShape The desired shape after broadcasting + */ + static isValidBroadcast(shape, finalShape) { + const inputRank = shape.length; + const finalRank = finalShape.length; + if (inputRank > finalRank) { + return false; + } + for (let i = 1; i <= inputRank; i++) { + if (shape[inputRank - i] !== 1 && shape[inputRank - i] !== finalShape[finalRank - i]) { + return false; + } + } + return true; + } + }; + ShapeUtil = class _ShapeUtil { + /** + * calculate the size (number of elements) + */ + static size(dims) { + return _ShapeUtil.getSizeFromDimensionRange(dims, 0, dims.length); + } + /** + * convert dims corresponding to type change to pack. ex. uint8 data to uint32 + */ + static convertShape(dims, size = 4) { + const rank = dims.length; + if (rank === 0) { + return []; + } + const newDims = new Array(rank); + let i = rank - 1; + while (i >= 0) { + if (dims[i] % size === 0) { + newDims[i] = dims[i] / size; + break; + } + if (size % dims[i] !== 0) { + throw new Error("cannot convert shape"); + } + newDims[i] = 1; + size /= dims[i]; + i--; + } + for (i--; i >= 0; i--) { + newDims[i] = dims[i]; + } + return newDims; + } + /** + * calculate the size (number of elements) from the given axis (inclusive) + */ + static sizeFromDimension(dims, axis) { + if (axis < 0 || axis > dims.length) { + throw new Error(`invalid dimension of ${axis} for sizeFromDimension as Tensor has ${dims.length} dimensions.`); + } + return _ShapeUtil.getSizeFromDimensionRange(dims, axis, dims.length); + } + /** + * calculate the size (number of elements) to the given axis (exclusive) + */ + static sizeToDimension(dims, axis) { + if (axis < 0 || axis > dims.length) { + throw new Error(`invalid dimension of ${axis} for sizeToDimension as Tensor has ${dims.length} dimensions.`); + } + return _ShapeUtil.getSizeFromDimensionRange(dims, 0, axis); + } + /** + * calculate the size (number of elements) from and to the given axis [start, end) + */ + static getSizeFromDimensionRange(dims, start, end) { + let size = 1; + for (let i = start; i < end; i++) { + if (dims[i] < 0) { + throw new Error( + // eslint-disable-next-line max-len + "cannot get valid size from specified dimension range. Most likely the range contains negative values in them." + ); + } + size *= dims[i]; + } + return size; + } + static computeStrides(dims) { + const rank = dims.length; + if (rank === 0) { + return []; + } else if (rank === 1) { + return [1]; + } + const strides = new Array(rank); + strides[rank - 1] = 1; + strides[rank - 2] = dims[rank - 1]; + for (let i = rank - 3; i >= 0; --i) { + strides[i] = strides[i + 1] * dims[i + 1]; + } + return strides; + } + /** + * normailze axis of range [-r, r) into [0, r). + */ + static normalizeAxis(axis, tensorRank) { + if (axis < -tensorRank && axis >= tensorRank) { + throw new Error("unsupported axis for this operation."); + } + return axis < 0 ? axis + tensorRank : axis; + } + static normalizeAxes(axes, tensorRank) { + return axes.map((x) => this.normalizeAxis(x, tensorRank ?? axes.length)); + } + /** + * Sorts a given array based on the indices in the Perm array + * Used in Transpose + * @param a Array to be sorted such as dims or strides + * @param perm Perm given; if null a will be reversed + */ + static sortBasedOnPerm(a, perm) { + if (perm) { + return perm.map((v) => a[v]); + } else { + return a.slice().reverse(); + } + } + /** + * Pads a given shape according to the padding values + * @param dims shape of the Tensor to be padded + * @param pad pad values + */ + static padShape(dims, pad2) { + const rank = dims.length; + return dims.map((v, i) => v + pad2[i] + pad2[i + rank]); + } + /** + * Determines if the two shapes are identical + * @param shape1 + * @param shape2 + */ + static areEqual(shape1, shape2) { + if (shape1.length !== shape2.length) { + return false; + } + return shape1.every((v, i) => v === shape2[i]); + } + }; + PoolConvUtil = class _PoolConvUtil { + /** + * Adjust the kernel, strides, pads to correct rank. Set to default value if not present + * @param isGlobalOperator If true, perform global pooling. + * @param inputDims The input tensor dimension. + * @param kernelShape The size of the kernel along each axis. + * @param strides Stride along each axis. + * @param dilations Dilation along each axis. + * @param pads Padding for the beginning and ending along each axis. + */ + static adjustPoolAttributes(isGlobalOperator, inputDims, kernelShape, strides, dilations, pads) { + if (!isGlobalOperator && kernelShape.length !== inputDims.length - 2) { + throw new Error("length of specified kernel shapes should be 2 less than length of input dimensions"); + } + if (isGlobalOperator) { + for (let dim = 0; dim < inputDims.length - 2; dim++) { + if (dim >= kernelShape.length) { + kernelShape.push(inputDims[dim + 2]); + } else { + kernelShape[dim] = inputDims[dim + 2]; + } + } + } + for (let dim = 0; dim < kernelShape.length; dim++) { + if (dim < strides.length) { + if (strides[dim] < 0) { + throw new Error("strides should be greater than or equal to 1"); + } + } else { + strides.push(1); + } + } + for (let dim = 0; dim < kernelShape.length; dim++) { + if (dim < dilations.length) { + if (dilations[dim] < 0) { + throw new Error("dilations should be greater than or equal to 1"); + } + } else { + dilations.push(1); + } + } + for (let dim = 0; dim < kernelShape.length * 2; dim++) { + if (dim < pads.length) { + if (pads[dim] < 0) { + throw new Error("pad should be greater than or equal to 1"); + } + } else { + pads.push(0); + } + } + for (let dim = 0; dim < kernelShape.length; dim++) { + if (kernelShape[dim] <= 0) { + throw new Error("kernel shapes need to be greater than 0"); + } + if (pads[dim] >= kernelShape[dim] || pads[dim + kernelShape.length] >= kernelShape[dim]) { + throw new Error("pads should be smaller than kernel"); + } + } + } + // adjust pad values based on 'autoPad' attribute + static adjustPadsBasedOnAutoPad(inputDims, strides, dilations, kernelShape, pads, isChannelLast, autoPad) { + if (!autoPad) { + return; + } + if (pads.length !== 2 * (inputDims.length - 2)) { + throw new Error("length of pads should be twice the length of data dimensions"); + } + if (strides.length !== inputDims.length - 2) { + throw new Error("length of strides should be the length of data dimensions"); + } + if (kernelShape.length !== inputDims.length - 2) { + throw new Error("length of kernel shapes should be the length of data dimensions"); + } + for (let dim = 0; dim < inputDims.length - 2; dim++) { + _PoolConvUtil.adjustPadAndReturnShape( + inputDims[dim + (isChannelLast ? 1 : 2)], + strides[dim], + dilations[dim], + kernelShape[dim], + pads, + dim, + dim + inputDims.length - 2, + autoPad + ); + } + } + /** + * Calculate the output shape for Pool ops based on input attributes. (Should be used only for Pool ops) + * @param isGlobalOperator If true, perform global pooling. + * @param inputDims The input tensor dimension. (inputs[0].dims) + * @param strides Stride along each axis. + * @param dilations Dilation along each axis. + * @param kernelShape The size of the kernel along each axis. + * @param pads Padding for the beginning and ending along each axis. + * @param autoPad DEPRECATED attribute supported for legacy models. Specifies how to implicitly calculate pads in each + * dimension. Can take values NOTSET, SAME_UPPER, SAME_LOWER, or VALID. + */ + static computePoolOutputShape(isGlobalOperator, inputDims, strides, dilations, kernelShape, pads, autoPad) { + if (inputDims.length <= 0) { + throw new Error("input shape must be of size greater than 0"); + } + const outputDims = [inputDims[0], inputDims[1]]; + _PoolConvUtil.computeShapeHelper( + isGlobalOperator, + inputDims, + outputDims, + strides, + dilations, + kernelShape, + pads, + autoPad + ); + return outputDims; + } + /** + * Calculate the output shape for Conv op based on input attributes. (Should be used only for Conv op) + * @param inputDims The input tensor dimension. (inputs[0].dims) + * @param filterDims The filter tensor dimension. (inputs[1].dims) + * @param strides Stride along each axis. + * @param kernelShape The size of the kernel along each axis. + * @param pads Padding for the beginning and ending along each axis. + * @param autoPad DEPRECATED attribute supported for legacy models. Specifies how to implicitly calculate pads in each + * dimension. Can take values NOTSET, SAME_UPPER, SAME_LOWER, or VALID. + */ + static computeConvOutputShape(inputDims, filterDims, strides, dilations, kernelShape, pads, autoPad) { + if (inputDims.length <= 0 || filterDims.length <= 0) { + throw new Error("invalid input tensor dims or invalid filter tensor dims"); + } + const outputDims = [inputDims[0], filterDims[0]]; + _PoolConvUtil.computeShapeHelper(false, inputDims, outputDims, strides, dilations, kernelShape, pads, autoPad); + return outputDims; + } + // will compute output shapes for data dimensions ONLY (i.e.) no batch size and channels + // called by computePoolOutputShape() and computeConvOutputShape() + // adjust pads based on 'autoPad' attribute prior to shape computation + static computeShapeHelper(isGlobalOperator, inputDims, outputDims, strides, dilations, kernelShape, pads, autoPad) { + if (isGlobalOperator) { + for (let dim = 0; dim < inputDims.length - 2; dim++) { + outputDims.push(1); + } + } else { + for (let dim = 0; dim < inputDims.length - 2; dim++) { + outputDims.push(_PoolConvUtil.adjustPadAndReturnShape( + inputDims[dim + 2], + strides[dim], + dilations[dim], + kernelShape[dim], + pads, + dim, + dim + inputDims.length - 2, + autoPad + )); + } + } + } + // helper for computeShapeHelper() and adjustPadsBasedOnAutoPad() + // adjusts pad value for given 'autoPad' string and computes output shape along a particular dimension + static adjustPadAndReturnShape(inSize, stride, dilation, kernel, pads, padHeadIndex, padTailIndex, autoPad) { + const dkernel = dilation * (kernel - 1) + 1; + if (autoPad && autoPad !== "NOTSET") { + switch (autoPad) { + case "VALID": + pads[padHeadIndex] = 0; + pads[padTailIndex] = 0; + return Math.floor((inSize - dkernel) / stride + 1); + case "SAME_LOWER": + case "SAME_UPPER": + if (dilation !== 1) { + throw new Error("Dilation not supported for SAME_UPPER or SAME_LOWER"); + } else { + const legacyTargetSize = (inSize + stride - 1) / stride; + const padNeeded = (legacyTargetSize - 1) * stride + kernel - inSize; + pads[padHeadIndex] = autoPad === "SAME_LOWER" ? Math.floor((padNeeded + 1) / 2) : Math.floor(padNeeded / 2); + pads[padTailIndex] = padNeeded - pads[padHeadIndex]; + return Math.floor((inSize + padNeeded - kernel) / stride + 1); + } + default: + throw new Error("Unsupported AutoPad type"); + } + } else { + return Math.floor((inSize + pads[padHeadIndex] + pads[padTailIndex] - dkernel) / stride + 1); + } + } + }; + GemmUtil = class { + // will make sure input shapes are compatible for this op + // and return back the shape of the output in the form of a tuple + // will throw exception if the input shapes are not compatible + static getShapeOfGemmResult(leftShape, transLeft, rightShape, transRight, biasShape) { + if (leftShape.length !== 2 || rightShape.length !== 2) { + throw new Error("shape need to be of size 2"); + } + let M; + let K; + let N; + if (transLeft) { + M = leftShape[1]; + K = leftShape[0]; + } else { + M = leftShape[0]; + K = leftShape[1]; + } + let kDim = -1; + if (transRight) { + N = rightShape[0]; + kDim = 1; + } else { + N = rightShape[1]; + kDim = 0; + } + if (rightShape[kDim] !== K) { + throw new Error("dimension mismatch"); + } + if (M <= 0 || N <= 0 || K <= 0) { + throw new Error("invalid shape specified"); + } + if (biasShape && !BroadcastUtil.isValidBroadcast(biasShape, [M, N])) { + throw new Error("gemm: invalid bias shape for broadcast"); + } + return [M, N, K]; + } + }; + MIN_CLIP = -34028234663852886e22; + MAX_CLIP = 34028234663852886e22; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/common.ts +var WORKGROUP_SIZE, getWgslMappedType, tensorTypeToWsglStorageType, tensorTypeToWsglValueType, createTensorShapeVariables, getMaxComponents, fillVector, castToF32, sumVector, getElementAt, createIndicesHelper, inputVariable, outputVariable, internalVariable, ShaderHelperImpl, createShaderHelper, getBroadcastDims; +var init_common = __esm({ + "web/lib/wasm/jsep/webgpu/ops/common.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + WORKGROUP_SIZE = 64; + getWgslMappedType = (type, components) => { + if (components === 3) { + throw new Error("vec3 has same alignment as vec4, use vec4 instead"); + } + switch (type) { + case 10 /* float16 */: + return components > 1 ? `vec${components}` : "f16"; + case 1 /* float */: + return components > 1 ? `vec${components}` : "f32"; + case 6 /* int32 */: + return components > 1 ? `vec${components}` : "i32"; + case 12 /* uint32 */: + return components > 1 ? `vec${components}` : "u32"; + case 7 /* int64 */: + if (components > 1) { + throw new Error("currently not supported vecX of uint64 yet"); + } + return ["vec2", "i32"]; + case 13 /* uint64 */: + if (components > 1) { + throw new Error("currently not supported vecX of uint64 yet"); + } + return ["vec2", "u32"]; + case 9 /* bool */: + if (components !== 4) { + throw new Error("bool must be vec4"); + } + return ["u32", "vec4"]; + default: + throw new Error(`Unknown data type: ${type}`); + } + }; + tensorTypeToWsglStorageType = (type, components = 1) => { + const mappedType = getWgslMappedType(type, components); + return typeof mappedType === "string" ? mappedType : mappedType[0]; + }; + tensorTypeToWsglValueType = (type, components = 1) => { + const mappedType = getWgslMappedType(type, components); + return typeof mappedType === "string" ? mappedType : mappedType[1]; + }; + createTensorShapeVariables = (...dims) => { + const programUniforms = []; + dims.forEach((dim) => { + if (dim.length !== 0) { + programUniforms.push( + { type: 12 /* uint32 */, data: dim }, + { type: 12 /* uint32 */, data: ShapeUtil.computeStrides(dim) } + ); + } + }); + return programUniforms; + }; + getMaxComponents = (size) => { + if (size % 4 === 0) { + return 4; + } else if (size % 2 === 0) { + return 2; + } + return 1; + }; + fillVector = (dataType = "f32", components, value = "0") => { + if (!components || components === 1) { + return `${dataType}(${value})`; + } + return `vec${components}<${dataType}>(${value})`; + }; + castToF32 = (dataType, components, value) => { + if (dataType === "f32") { + return value; + } + if (components === 1) { + return `f32(${value})`; + } + return `vec${components}(${value})`; + }; + sumVector = (name, components) => { + if (components === 4) { + return `(${name}.x + ${name}.y + ${name}.z + ${name}.w)`; + } else if (components === 2) { + return `(${name}.x + ${name}.y)`; + } else if (components === 3) { + return `(${name}.x + ${name}.y + ${name}.z)`; + } + return name; + }; + getElementAt = (name, index, length, type) => { + if (name.startsWith("uniforms.") && length > 4) { + if (typeof index === "string") { + if (type === "f16") { + return `${name}[(${index}) / 8][(${index}) % 8 / 4][(${index}) % 8 % 4]`; + } else { + return `${name}[(${index}) / 4][(${index}) % 4]`; + } + } else { + if (type === "f16") { + return `${name}[${Math.floor(index / 8)}][${Math.floor(index % 8 / 4)}][${index % 8 % 4}]`; + } else { + return `${name}[${Math.floor(index / 4)}][${index % 4}]`; + } + } + } else { + return length > 1 ? `${name}[${index}]` : name; + } + }; + createIndicesHelper = (name, tensorType, shapeOrRank, usage, components) => { + const useUniform = typeof shapeOrRank === "number"; + const rank = useUniform ? shapeOrRank : shapeOrRank.length; + const rankIdentity = [...new Array(rank).keys()]; + const indicesType = rank < 2 ? "u32" : rank <= 4 ? `vec${rank}` : `array`; + const mappedType = getWgslMappedType(tensorType, components); + const valueType = typeof mappedType === "string" ? mappedType : mappedType[1]; + const storageType = typeof mappedType === "string" ? mappedType : mappedType[0]; + const type = { indices: indicesType, value: valueType, storage: storageType, tensor: tensorType }; + const normalizeDim = (dim) => typeof dim === "string" ? dim : `${dim}u`; + const implementationUsed = { + offsetToIndices: false, + indicesToOffset: false, + broadcastedIndicesToOffset: false, + set: false, + setByIndices: false, + get: false, + getByIndices: false + }; + const uniformPrefix = useUniform ? "uniforms." : ""; + const shape = `${uniformPrefix}${name}_shape`; + const strides = `${uniformPrefix}${name}_strides`; + let o2iSnippet = ""; + for (let i = 0; i < rank - 1; i++) { + o2iSnippet += ` + let dim${i} = current / ${getElementAt(strides, i, rank)}; + let rest${i} = current % ${getElementAt(strides, i, rank)}; + indices[${i}] = dim${i}; + current = rest${i}; + `; + } + o2iSnippet += `indices[${rank - 1}] = current;`; + const offsetToIndicesImplementation = rank < 2 ? "" : ` + fn o2i_${name}(offset: u32) -> ${type.indices} { + var indices: ${type.indices}; + var current = offset; + ${o2iSnippet} + return indices; + }`; + const offsetToIndices = (varOffset) => { + implementationUsed.offsetToIndices = true; + return rank < 2 ? varOffset : `o2i_${name}(${varOffset})`; + }; + const offsets = []; + if (rank >= 2) { + for (let i = rank - 1; i >= 0; i--) { + offsets.push(`${getElementAt(strides, i, rank)} * (indices[${i}])`); + } + } + const indicesToOffsetImplementation = rank < 2 ? "" : ` + fn i2o_${name}(indices: ${type.indices}) -> u32 { + return ${offsets.join("+")}; + }`; + const indicesToOffset = (varIndices) => { + implementationUsed.indicesToOffset = true; + return rank < 2 ? varIndices : `i2o_${name}(${varIndices})`; + }; + const indices = (...init2) => rank === 0 ? "0u" : `${type.indices}(${init2.map(normalizeDim).join(",")})`; + const indicesGet = (varIndices, idx) => { + if (rank < 2) { + return `${varIndices}`; + } else { + return `${getElementAt(varIndices, idx, rank)}`; + } + }; + const indicesSet = (varIndices, idx, value) => { + if (rank < 2) { + return `${varIndices}=${value};`; + } else { + return `${getElementAt(varIndices, idx, rank)}=${value};`; + } + }; + const broadcastedIndicesToOffsetImplementation = {}; + const broadcastedIndicesToOffset = (varIndices, output) => { + implementationUsed.broadcastedIndicesToOffset = true; + const implKey = `${output.name}broadcastedIndicesTo${name}Offset`; + if (implKey in broadcastedIndicesToOffsetImplementation) { + return `${implKey}(${varIndices})`; + } + const offsets2 = []; + for (let i = rank - 1; i >= 0; i--) { + const idx = output.indicesGet("outputIndices", i + output.rank - rank); + offsets2.push(`${indicesGet(strides, i)} * (${idx} % ${indicesGet(shape, i)})`); + } + broadcastedIndicesToOffsetImplementation[implKey] = `fn ${implKey}(outputIndices: ${output.type.indices}) -> u32 { + return ${offsets2.length > 0 ? offsets2.join("+") : "0u"}; + }`; + return `${implKey}(${varIndices})`; + }; + const setByOffset = (offset, value) => (() => { + if (type.storage === type.value) { + return `${name}[${offset}]=${value};`; + } else if (type.storage === "vec2" && type.value === "i32") { + return `${name}[${offset}]=vec2(u32(${value}), select(0u, 0xFFFFFFFFu, ${value} < 0));`; + } else if (type.storage === "vec2" && type.value === "u32") { + return `${name}[${offset}]=vec2(u32(${value}), 0u);`; + } else if (type.storage === "u32" && type.value === "vec4") { + return `${name}[${offset}]=dot(vec4(0x1, 0x100, 0x10000, 0x1000000), vec4(${value}));`; + } else { + throw new Error(`not supported combination of storage type ${type.storage} and value type ${type.value} yet`); + } + })(); + const getByOffset = (offset) => (() => { + if (type.storage === type.value) { + return `${name}[${offset}]`; + } else if (type.storage === "vec2" && type.value === "i32") { + return `i32(${name}[${offset}].x)`; + } else if (type.storage === "vec2" && type.value === "u32") { + return `u32(${name}[${offset}].x)`; + } else if (type.storage === "u32" && type.value === "vec4") { + return `vec4(bool(${name}[${offset}] & 0xFFu), bool(${name}[${offset}] & 0xFF00u), bool(${name}[${offset}] & 0xFF0000u), bool(${name}[${offset}] & 0xFF000000u))`; + } else { + throw new Error(`not supported combination of storage type ${type.storage} and value type ${type.value} yet`); + } + })(); + const getByIndicesImplementation = rank < 2 ? "" : ` + fn get_${name}ByIndices(indices: ${type.indices}) -> ${valueType} { + return ${getByOffset(`i2o_${name}(indices)`)}; + }`; + const getImplementation = rank < 2 ? "" : (() => { + const functionParams = rankIdentity.map((i) => `d${i}: u32`).join(", "); + const dimsParams = rankIdentity.map((i) => `d${i}`).join(", "); + return ` + fn get_${name}(${functionParams}) -> ${valueType} { + return get_${name}ByIndices(${indices(dimsParams)}); + }`; + })(); + const get = (...indices2) => { + if (indices2.length !== rank) { + throw new Error(`indices length must be ${rank}`); + } + const normalizedIndices = indices2.map(normalizeDim).join(","); + if (rank === 0) { + return getByOffset("0u"); + } else if (rank === 1) { + return getByOffset(normalizedIndices[0]); + } else { + implementationUsed.get = true; + implementationUsed.getByIndices = true; + implementationUsed.indicesToOffset = true; + return `get_${name}(${normalizedIndices})`; + } + }; + const getByIndices = (varIndices) => { + if (rank < 2) { + return getByOffset(varIndices); + } else { + implementationUsed.getByIndices = true; + implementationUsed.indicesToOffset = true; + return `get_${name}ByIndices(${varIndices})`; + } + }; + const setByIndicesImplementation = rank < 2 ? "" : ` + fn set_${name}ByIndices(indices: ${type.indices}, value: ${valueType}) { + ${setByOffset(`i2o_${name}(indices)`, "value")} + }`; + const setImplementation = rank < 2 ? "" : (() => { + const functionParams = rankIdentity.map((i) => `d${i}: u32`).join(", "); + const dimsParams = rankIdentity.map((i) => `d${i}`).join(", "); + return ` + fn set_${name}(${functionParams}, value: ${valueType}) { + set_${name}ByIndices(${indices(dimsParams)}, value); + }`; + })(); + const set = (...indicesAndValue) => { + if (indicesAndValue.length !== rank + 1) { + throw new Error(`indices length must be ${rank}`); + } + const value = indicesAndValue[rank]; + if (typeof value !== "string") { + throw new Error("value must be string"); + } + const normalizedIndices = indicesAndValue.slice(0, rank).map(normalizeDim).join(","); + if (rank === 0) { + return setByOffset("0u", value); + } else if (rank === 1) { + return setByOffset(normalizedIndices[0], value); + } else { + implementationUsed.set = true; + implementationUsed.setByIndices = true; + implementationUsed.indicesToOffset = true; + return `set_${name}(${normalizedIndices}, ${value})`; + } + }; + const setByIndices = (varIndices, value) => { + if (rank < 2) { + return setByOffset(varIndices, value); + } else { + implementationUsed.setByIndices = true; + implementationUsed.indicesToOffset = true; + return `set_${name}ByIndices(${varIndices}, ${value});`; + } + }; + const impl = () => { + const impls = []; + let needShapeStrides = false; + if (implementationUsed.offsetToIndices) { + impls.push(offsetToIndicesImplementation); + needShapeStrides = true; + } + if (implementationUsed.indicesToOffset) { + impls.push(indicesToOffsetImplementation); + needShapeStrides = true; + } + if (implementationUsed.broadcastedIndicesToOffset) { + Object.values(broadcastedIndicesToOffsetImplementation).forEach((impl2) => impls.push(impl2)); + needShapeStrides = true; + } + if (implementationUsed.set) { + impls.push(setImplementation); + needShapeStrides = true; + } + if (implementationUsed.setByIndices) { + impls.push(setByIndicesImplementation); + needShapeStrides = true; + } + if (implementationUsed.get) { + impls.push(getImplementation); + needShapeStrides = true; + } + if (implementationUsed.getByIndices) { + impls.push(getByIndicesImplementation); + needShapeStrides = true; + } + if (!useUniform && needShapeStrides) { + impls.unshift( + `const ${shape} = ${type.indices}(${shapeOrRank.join(",")});`, + `const ${strides} = ${type.indices}(${ShapeUtil.computeStrides(shapeOrRank).join(",")});` + ); + } + return impls.join("\n"); + }; + return { + impl, + type, + offsetToIndices, + indicesToOffset, + broadcastedIndicesToOffset, + indices, + indicesGet, + indicesSet, + set, + setByOffset, + setByIndices, + get, + getByOffset, + getByIndices, + // isVec4, + usage, + name, + strides, + shape, + rank + }; + }; + inputVariable = (name, type, shapeOrRank, components = 1) => createIndicesHelper(name, type, shapeOrRank, "input", components); + outputVariable = (name, type, shapeOrRank, components = 1) => createIndicesHelper(name, type, shapeOrRank, "output", components); + internalVariable = (name, type, shapeOrRank, components = 1) => createIndicesHelper(name, type, shapeOrRank, "internal", components); + ShaderHelperImpl = class { + constructor(normalizedDispatchGroup, limits) { + this.normalizedDispatchGroup = normalizedDispatchGroup; + this.limits = limits; + this.internalVariables = []; + this.variables = []; + this.uniforms = []; + this.variableIndex = 0; + } + guardAgainstOutOfBoundsWorkgroupSizes(size) { + const sizeInCode = typeof size === "number" ? `${size}u` : size; + return `if (global_idx >= ${sizeInCode}) { return; }`; + } + mainStart(workgroupSize = WORKGROUP_SIZE) { + const workgroupSizeX = typeof workgroupSize === "number" ? workgroupSize : workgroupSize[0]; + const workgroupSizeY = typeof workgroupSize === "number" ? 1 : workgroupSize[1]; + const workgroupSizeZ = typeof workgroupSize === "number" ? 1 : workgroupSize[2]; + if (workgroupSizeX > this.limits.maxComputeWorkgroupSizeX || workgroupSizeY > this.limits.maxComputeWorkgroupSizeY || workgroupSizeZ > this.limits.maxComputeWorkgroupSizeZ) { + throw new Error(`workgroup size [${workgroupSizeX}, ${workgroupSizeY}, ${workgroupSizeZ}] exceeds the maximum workgroup size [${this.limits.maxComputeWorkgroupSizeX}, ${this.limits.maxComputeWorkgroupSizeY}, ${this.limits.maxComputeWorkgroupSizeZ}].`); + } + if (workgroupSizeX * workgroupSizeY * workgroupSizeZ > this.limits.maxComputeInvocationsPerWorkgroup) { + throw new Error(`workgroup size [${workgroupSizeX}, ${workgroupSizeY}, ${workgroupSizeZ}] exceeds the maximum workgroup invocations ${this.limits.maxComputeInvocationsPerWorkgroup}.`); + } + const is1DimensionDispatch = this.normalizedDispatchGroup[1] === 1 && this.normalizedDispatchGroup[2] === 1; + const paramList = is1DimensionDispatch ? `@builtin(global_invocation_id) global_id : vec3, + @builtin(workgroup_id) workgroup_id : vec3, + @builtin(local_invocation_id) local_id : vec3` : `@builtin(global_invocation_id) global_id : vec3, + @builtin(local_invocation_id) local_id : vec3, + @builtin(local_invocation_index) local_idx : u32, + @builtin(workgroup_id) workgroup_id : vec3, + @builtin(num_workgroups) num_workgroups : vec3`; + const globalIdxDefinition = is1DimensionDispatch ? "let global_idx = global_id.x; let local_idx = local_id.x;" : `let global_idx = (workgroup_id.z * num_workgroups[0] * num_workgroups[1] + + workgroup_id.y * num_workgroups[0] + workgroup_id.x) * ${workgroupSizeX * workgroupSizeY * workgroupSizeZ}u + local_idx;`; + return `@compute @workgroup_size(${workgroupSizeX}, ${workgroupSizeY}, ${workgroupSizeZ}) + fn main(${paramList}) { + ${globalIdxDefinition} + `; + } + appendVariableUniforms(variable) { + if (variable.rank !== 0) { + if (variable.shape.startsWith("uniforms.")) { + this.uniforms.push({ name: variable.shape.replace("uniforms.", ""), type: "u32", length: variable.rank }); + } + if (variable.strides.startsWith("uniforms.")) { + this.uniforms.push({ name: variable.strides.replace("uniforms.", ""), type: "u32", length: variable.rank }); + } + } + } + declareVariable(variable, bindingIndex) { + if (variable.usage === "internal") { + throw new Error("cannot use internal variable with declareVariable(). use registerInternalVariables() instead."); + } + this.variables.push(variable); + this.appendVariableUniforms(variable); + const access = variable.usage === "input" ? "read" : "read_write"; + const storageType = variable.type.storage; + return `@group(0) @binding(${bindingIndex}) var ${variable.name}: array<${storageType}>;`; + } + declareVariables(...variables) { + return variables.map((v) => this.declareVariable(v, this.variableIndex++)).join("\n"); + } + registerInternalVariable(variable) { + if (variable.usage !== "internal") { + throw new Error( + "cannot use input or output variable with registerInternalVariable(). use declareVariables() instead." + ); + } + this.internalVariables.push(variable); + this.appendVariableUniforms(variable); + } + registerInternalVariables(...variables) { + variables.forEach((v) => this.registerInternalVariable(v)); + return this; + } + registerUniform(name, type, length = 1) { + this.uniforms.push({ name, type, length }); + return this; + } + registerUniforms(additionalUniforms) { + this.uniforms = this.uniforms.concat(additionalUniforms); + return this; + } + uniformDeclaration() { + if (this.uniforms.length === 0) { + return ""; + } + const uniformSnippets = []; + for (const { name, type, length } of this.uniforms) { + if (length && length > 4) { + if (type === "f16") { + uniformSnippets.push(`@align(16) ${name}:array, ${Math.ceil(length / 8)}>`); + } else { + uniformSnippets.push(`${name}:array, ${Math.ceil(length / 4)}>`); + } + } else { + const typeTemp = length == null || length === 1 ? type : `vec${length}<${type}>`; + uniformSnippets.push(`${name}:${typeTemp}`); + } + } + return ` + struct Uniforms { ${uniformSnippets.join(", ")} }; + @group(0) @binding(${this.variableIndex}) var uniforms: Uniforms;`; + } + /** + * Get additional implementation that needs to be added to the shader source. + */ + get additionalImplementations() { + return this.uniformDeclaration() + this.variables.map((i) => i.impl()).join("\n") + this.internalVariables.map((i) => i.impl()).join("\n"); + } + /** + * Get the variable info of the shader program. + */ + get variablesInfo() { + if (this.uniforms.length === 0) { + return void 0; + } + const uniformWgslTypeToDataType = (type) => [ + 12 /* uint32 */, + 10 /* float16 */, + 1 /* float */, + 6 /* int32 */ + ][["u32", "f16", "f32", "i32"].indexOf(type)]; + return this.uniforms.map((u) => [uniformWgslTypeToDataType(u.type), u.length ?? 1]); + } + }; + createShaderHelper = (dispatchGroup, limits) => new ShaderHelperImpl(dispatchGroup, limits); + getBroadcastDims = (inShape, outShape) => { + const inRank = inShape.length; + const dims = []; + for (let i = 0; i < inRank; i++) { + const dim = inRank - 1 - i; + const a = inShape[dim] || 1; + const b = outShape[outShape.length - 1 - i] || 1; + if (b > 1 && a === 1) { + dims.unshift(dim); + } + } + return dims; + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/transpose.ts +var validateInputs, getAdjustedPerm, getOutputShape, permFunctionBody, createTransposeProgramInfo, transpose, parseTransposeAttributes; +var init_transpose = __esm({ + "web/lib/wasm/jsep/webgpu/ops/transpose.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_attribute_with_cache_key(); + init_common(); + validateInputs = (inputs) => { + if (!inputs || inputs.length !== 1) { + throw new Error("Transpose requires 1 input."); + } + }; + getAdjustedPerm = (inputRank, perm) => perm && perm.length !== inputRank ? [...new Array(inputRank).keys()].reverse() : perm; + getOutputShape = (inputShape, perm) => ShapeUtil.sortBasedOnPerm(inputShape, getAdjustedPerm(inputShape.length, perm)); + permFunctionBody = (perm, rank, input, output) => { + const reverseFunc = []; + reverseFunc.push(`fn perm(i: ${output.type.indices}) -> ${input.type.indices} { + var a: ${input.type.indices};`); + for (let i = 0; i < rank; ++i) { + reverseFunc.push(input.indicesSet("a", perm[i], `i[${i}]`)); + } + reverseFunc.push("return a;}"); + return reverseFunc.join("\n"); + }; + createTransposeProgramInfo = (inputTensor, permAttr) => { + const inputDataType = inputTensor.dataType; + const inputRank = inputTensor.dims.length; + const perm = getAdjustedPerm(inputRank, permAttr); + const outputShape = getOutputShape(inputTensor.dims, perm); + const output = outputVariable("output", inputDataType, outputShape.length); + const input = inputVariable("a", inputDataType, inputRank); + const getShaderSource = (shaderHelper) => ` + ${shaderHelper.registerUniform("output_size", "u32").declareVariables(input, output)} + + ${permFunctionBody(perm, inputRank, input, output)} + + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + + let indices = ${output.offsetToIndices("global_idx")}; + let aIndices = perm(indices); + + ${output.setByOffset("global_idx", input.getByIndices("aIndices"))} + }`; + return { + name: "Transpose", + shaderCache: { hint: `${permAttr}`, inputDependencies: ["rank"] }, + getRunData: (inputs) => { + const outputSize = ShapeUtil.size(outputShape); + return { + outputs: [{ dims: outputShape, dataType: inputs[0].dataType }], + dispatchGroup: { x: Math.ceil( + outputSize / 64 + /* workgroup size */ + ) }, + programUniforms: [{ type: 12 /* uint32 */, data: outputSize }, ...createTensorShapeVariables(inputs[0].dims, outputShape)] + }; + }, + getShaderSource + }; + }; + transpose = (context, attributes) => { + validateInputs(context.inputs); + context.compute(createTransposeProgramInfo(context.inputs[0], attributes.perm)); + }; + parseTransposeAttributes = (attributes) => createAttributeWithCacheKey({ perm: attributes.perm }); + } +}); + +// web/lib/wasm/jsep/webgpu/ops/reduce-shared.ts +var reduceOps, reduceSharedOps, reduceInitValues, reduceOutputValues, getInnerMostAxes, computeOutAndReduceShapes, expandShapeToKeepDim, areAxesInnerMostDims, getAxesPermutation, createReduceSharedProgramInfo, reduceCommon, reduceMeanShared, reduceL1Shared, reduceL2Shared, reduceLogSumExpShared, reduceMaxShared, reduceMinShared, reduceProdShared, reduceSumShared, reduceSumSquareShared, reduceLogSumShared; +var init_reduce_shared = __esm({ + "web/lib/wasm/jsep/webgpu/ops/reduce-shared.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_common(); + init_reduce(); + init_transpose(); + reduceOps = { + max: "select(bestValue, candidate, candidate > bestValue)", + min: "select(bestValue, candidate, candidate < bestValue)", + mean: "bestValue + candidate", + sum: "bestValue + candidate", + prod: "bestValue * candidate", + sumSquare: "bestValue + candidate * candidate", + logSumExp: "bestValue + exp(candidate)", + l1: "bestValue + abs(candidate)", + l2: "bestValue + candidate * candidate", + logSum: "bestValue + candidate" + }; + reduceSharedOps = { + max: "select(bestValue, candidate, candidate > bestValue)", + min: "select(bestValue, candidate, candidate < bestValue)", + mean: "bestValue + candidate", + sum: "bestValue + candidate", + prod: "bestValue * candidate", + sumSquare: "bestValue + candidate", + logSumExp: "bestValue + candidate", + l1: "bestValue + candidate", + l2: "bestValue + candidate", + logSum: "bestValue + candidate" + }; + reduceInitValues = { + max: "_A[offset]", + min: "_A[offset]", + mean: "0", + sum: "0", + prod: "1", + sumSquare: "0", + logSumExp: "0", + l1: "0", + l2: "0", + logSum: "0" + }; + reduceOutputValues = { + max: "bestValue", + min: "bestValue", + sum: "bestValue", + prod: "bestValue", + sumSquare: "bestValue", + logSumExp: "log(bestValue)", + l1: "bestValue", + l2: "sqrt(bestValue)", + logSum: "log(bestValue)" + }; + getInnerMostAxes = (numInnerAxes, rank) => { + const res = []; + for (let i = rank - numInnerAxes; i < rank; ++i) { + res.push(i); + } + return res; + }; + computeOutAndReduceShapes = (shape, axes) => { + const outputShape = []; + const rank = shape.length; + for (let dim = 0; dim < rank; dim++) { + if (axes.indexOf(dim) === -1) { + outputShape.push(shape[dim]); + } + } + const reduceShape = axes.map((dim) => shape[dim]); + return [outputShape, reduceShape]; + }; + expandShapeToKeepDim = (shape, axes) => { + const rank = shape.length + axes.length; + const expandShape = []; + let shapeIdx = 0; + for (let dim = 0; dim < rank; dim++) { + if (axes.indexOf(dim) === -1) { + expandShape.push(shape[shapeIdx++]); + } else { + expandShape.push(1); + } + } + return expandShape; + }; + areAxesInnerMostDims = (axes, rank) => { + for (let i = 0; i < axes.length; ++i) { + if (axes[axes.length - i - 1] !== rank - 1 - i) { + return false; + } + } + return true; + }; + getAxesPermutation = (axes, rank) => { + const res = []; + if (!areAxesInnerMostDims(axes, rank)) { + for (let i = 0; i < rank; ++i) { + if (axes.indexOf(i) === -1) { + res.push(i); + } + } + axes.forEach((axis) => res.push(axis)); + } + return res; + }; + createReduceSharedProgramInfo = (name, shaderCache, inputs, reduceType, outputDataType, outputShape, reduceShape) => { + const inputShape = inputs[0].dims; + const outputSize = ShapeUtil.size(outputShape); + const reduceSize = ShapeUtil.size(reduceShape); + const input = inputVariable("_A", inputs[0].dataType, inputShape); + const output = outputVariable("output", outputDataType, outputShape); + const workgroupSize = 32; + const sharedMemorySnippet = ` + var aBestValues : array; + `; + const getShaderSource = (shaderHelper) => ` + ${shaderHelper.registerUniform("reduceSize", "u32").declareVariables(input, output)} + ${sharedMemorySnippet} + fn DIV_CEIL(a : u32, b : u32) -> u32 { + return ((a - 1u) / b + 1u); + } + ${shaderHelper.mainStart(workgroupSize)} + + let outputIndex = global_idx / ${workgroupSize}; + let offset = outputIndex * uniforms.reduceSize; + + var bestValue = f32(${reduceInitValues[reduceType]}); + let Length = uniforms.reduceSize; + for (var k = local_idx; k < Length; k = k + ${workgroupSize}) { + let candidate = f32(${input.getByOffset("offset + k")}); + bestValue = ${reduceOps[reduceType]}; + } + aBestValues[local_idx] = bestValue; + workgroupBarrier(); + + var reduceSize = min(Length, ${workgroupSize}u); + for (var currentSize = reduceSize / 2u; reduceSize > 1u; + currentSize = reduceSize / 2u) { + let interval = DIV_CEIL(reduceSize, 2u); + if (local_idx < currentSize) { + let candidate = aBestValues[local_idx + interval]; + bestValue = ${reduceSharedOps[reduceType]}; + aBestValues[local_idx] = bestValue; + } + reduceSize = interval; + workgroupBarrier(); + } + + if (local_idx == 0u) { + ${output.setByOffset( + "outputIndex", + `${reduceType === "mean" ? `${output.type.storage}(bestValue / f32(uniforms.reduceSize))` : `${output.type.storage}(${reduceOutputValues[reduceType]})`}` + )}; + } + }`; + return { + name, + shaderCache, + getShaderSource, + getRunData: () => ({ + outputs: [{ dims: outputShape, dataType: outputDataType }], + dispatchGroup: { x: outputSize }, + programUniforms: [{ type: 12 /* uint32 */, data: reduceSize }] + }) + }; + }; + reduceCommon = (context, name, attributes, reduceType) => { + const updatedAttributes = context.inputs.length === 1 ? attributes : createReduceAttributesFromInputs(context.inputs, attributes); + let updatedAxes = updatedAttributes.axes; + if (updatedAxes.length === 0 && !updatedAttributes.noopWithEmptyAxes) { + updatedAxes = context.inputs[0].dims.map((_dim, i) => i); + } + const normalizeAxes = ShapeUtil.normalizeAxes(updatedAxes, context.inputs[0].dims.length); + let axes = normalizeAxes; + let input = context.inputs[0]; + const permutedAxes = getAxesPermutation(axes, context.inputs[0].dims.length); + if (permutedAxes.length > 0) { + input = context.compute( + createTransposeProgramInfo(context.inputs[0], permutedAxes), + { inputs: [0], outputs: [-1] } + )[0]; + axes = getInnerMostAxes(axes.length, input.dims.length); + } + const [outputShape, reduceShape] = computeOutAndReduceShapes(input.dims, axes); + let finalOutputShape = outputShape; + if (updatedAttributes.keepDims) { + finalOutputShape = expandShapeToKeepDim(outputShape, normalizeAxes); + } + context.compute( + createReduceSharedProgramInfo( + name, + { hint: updatedAttributes.cacheKey, inputDependencies: ["type"] }, + [input], + reduceType, + context.inputs[0].dataType, + finalOutputShape, + reduceShape + ), + { inputs: [input] } + ); + }; + reduceMeanShared = (context, attributes) => { + reduceCommon(context, "ReduceMeanShared", attributes, "mean"); + }; + reduceL1Shared = (context, attributes) => { + reduceCommon(context, "ReduceL1Shared", attributes, "l1"); + }; + reduceL2Shared = (context, attributes) => { + reduceCommon(context, "ReduceL2Shared", attributes, "l2"); + }; + reduceLogSumExpShared = (context, attributes) => { + reduceCommon(context, "ReduceLogSumExpShared", attributes, "logSumExp"); + }; + reduceMaxShared = (context, attributes) => { + reduceCommon(context, "ReduceMaxShared", attributes, "max"); + }; + reduceMinShared = (context, attributes) => { + reduceCommon(context, "ReduceMinShared", attributes, "min"); + }; + reduceProdShared = (context, attributes) => { + reduceCommon(context, "ReduceProdShared", attributes, "prod"); + }; + reduceSumShared = (context, attributes) => { + reduceCommon(context, "ReduceSumShared", attributes, "sum"); + }; + reduceSumSquareShared = (context, attributes) => { + reduceCommon(context, "ReduceSumSquareShared", attributes, "sumSquare"); + }; + reduceLogSumShared = (context, attributes) => { + reduceCommon(context, "ReduceLogSumShared", attributes, "logSum"); + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/reduce.ts +var validateInputs2, noOp, createReduceProgramInfo, createReduceAttributesFromInputs, runReduceProgram, reduceLogSumNaive, reduceL1Naive, reduceL2Naive, reduceLogSumExpNaive, reduceMaxNaive, reduceMeanNaive, reduceMinNaive, reduceProdNaive, reduceSumNaive, reduceSumSquareNaive, useNaiveReduceMethod, reduceMean, reduceL1, reduceL2, reduceLogSumExp, reduceMax, reduceMin, reduceProd, reduceSum, reduceSumSquare, reduceLogSum; +var init_reduce = __esm({ + "web/lib/wasm/jsep/webgpu/ops/reduce.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_attribute_with_cache_key(); + init_common(); + init_reduce_shared(); + validateInputs2 = (inputs) => { + if (!inputs || inputs.length === 0 || inputs.length > 2) { + throw new Error("Reduce op requires 1 or 2 inputs."); + } + if (inputs.length === 2 && inputs[1].dims.length !== 1) { + throw new Error("Invalid axes input dims."); + } + }; + noOp = (input) => ["", "", `var value = ${input.getByIndices("input_indices")};`, ""]; + createReduceProgramInfo = (name, shaderCache, inputs, reduceOp, axesInput, outputDataType, keepDims = false, noopWithEmptyAxes = false) => { + const outputShape = []; + const inputShape = inputs[0].dims; + const inputRank = inputShape.length; + const axes = ShapeUtil.normalizeAxes(axesInput, inputRank); + const reduceOnAllAxes = !noopWithEmptyAxes && axes.length === 0; + inputShape.forEach((d, i) => { + if (reduceOnAllAxes || axes.indexOf(i) >= 0) { + if (keepDims) { + outputShape.push(1); + } + } else { + outputShape.push(d); + } + }); + const outputRank = outputShape.length; + const outputSize = ShapeUtil.size(outputShape); + const getShaderSource = (shaderHelper) => { + const idxCopy = []; + const input = inputVariable("_A", inputs[0].dataType, inputRank); + const output = outputVariable("output", outputDataType, outputRank); + const ops = reduceOp(input, output, axes); + let reduceOps2 = ops[2]; + for (let k = 0, l = 0; k < inputRank; k++) { + if (reduceOnAllAxes || axes.indexOf(k) >= 0) { + if (keepDims) { + l++; + } + reduceOps2 = `for(var j${k}: u32 = 0; j${k} < ${inputShape[k]}; j${k}++) { + ${ops[2].includes("last_index") ? `let last_index = j${k};` : ""} + ${input.indicesSet("input_indices", k, `j${k}`)} + ${reduceOps2} + }`; + } else { + idxCopy.push(`${input.indicesSet("input_indices", k, output.indicesGet("output_indices", l))};`); + l++; + } + } + return ` + + ${shaderHelper.registerUniform("output_size", "u32").declareVariables(input, output)} + + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + var input_indices: ${input.type.indices}; + let output_indices = ${output.offsetToIndices("global_idx")}; + + ${idxCopy.join("\n")} + ${ops[0]} // init ops for reduce max/min + ${ops[1]} + ${reduceOps2} + ${ops[3]} + ${ops.length === 4 ? output.setByOffset("global_idx", "value") : ops.slice(4).join("\n")} + }`; + }; + return { + name, + shaderCache, + getShaderSource, + getRunData: () => ({ + outputs: [{ dims: outputShape, dataType: outputDataType }], + dispatchGroup: { x: Math.ceil( + outputSize / 64 + /* workgroup size */ + ) }, + programUniforms: [{ type: 12 /* uint32 */, data: outputSize }, ...createTensorShapeVariables(inputShape, outputShape)] + }) + }; + }; + createReduceAttributesFromInputs = (inputs, attributes) => { + const axes = []; + if (inputs[1].dims[0] > 0) { + inputs[1].getBigInt64Array().forEach((v) => axes.push(Number(v))); + } + return createAttributeWithCacheKey( + { axes, keepDims: attributes.keepDims, noopWithEmptyAxes: attributes.noopWithEmptyAxes } + ); + }; + runReduceProgram = (context, name, attributes, reduceOp) => { + const inputs = context.inputs; + const updatedAttributes = inputs.length === 1 ? attributes : createReduceAttributesFromInputs(inputs, attributes); + context.compute( + createReduceProgramInfo( + name, + { hint: updatedAttributes.cacheKey, inputDependencies: ["rank"] }, + [inputs[0]], + updatedAttributes.noopWithEmptyAxes && updatedAttributes.axes.length === 0 ? noOp : reduceOp, + updatedAttributes.axes, + inputs[0].dataType, + updatedAttributes.keepDims, + updatedAttributes.noopWithEmptyAxes + ), + { inputs: [0] } + ); + }; + reduceLogSumNaive = (context, attributes) => { + validateInputs2(context.inputs); + const reduceOp = (input, output) => [ + `var value = ${output.type.storage}(0);`, + "", + `value += ${input.getByIndices("input_indices")};`, + "value = log(value);" + ]; + runReduceProgram(context, "ReduceLogSum", attributes, reduceOp); + }; + reduceL1Naive = (context, attributes) => { + validateInputs2(context.inputs); + const reduceOp = (input, output) => [ + `var value = ${output.type.storage}(0);`, + "", + `value += abs(${input.getByIndices("input_indices")});`, + "" + ]; + runReduceProgram(context, "ReduceL1", attributes, reduceOp); + }; + reduceL2Naive = (context, attributes) => { + validateInputs2(context.inputs); + const reduceOp = (input, output) => [ + `var t = ${output.type.value}(0); var value = ${output.type.value}(0);`, + "", + `t = ${input.getByIndices("input_indices")}; value += (t * t);`, + "value = sqrt(value);" + ]; + runReduceProgram(context, "ReduceL2", attributes, reduceOp); + }; + reduceLogSumExpNaive = (context, attributes) => { + validateInputs2(context.inputs); + const reduceOp = (input, output) => [ + `var value = ${output.type.storage}(0);`, + "", + `value += exp(${input.getByIndices("input_indices")});`, + "value = log(value);" + ]; + runReduceProgram(context, "ReduceLogSumExp", attributes, reduceOp); + }; + reduceMaxNaive = (context, attributes) => { + validateInputs2(context.inputs); + const reduceOp = (input, _output, axes) => { + const idxZero = []; + for (let k = 0; k < input.rank; k++) { + if (axes.indexOf(k) >= 0 || axes.length === 0) { + idxZero.push(input.indicesSet("input_indices", k, 0)); + } + } + return [ + `${idxZero.join("\n")}`, + `var value = ${input.getByIndices("input_indices")};`, + `value = max(value, ${input.getByIndices("input_indices")});`, + "" + ]; + }; + runReduceProgram(context, "ReduceMax", attributes, reduceOp); + }; + reduceMeanNaive = (context, attributes) => { + validateInputs2(context.inputs); + const reduceOp = (input, output, axes) => { + let size = 1; + for (let k = 0; k < input.rank; k++) { + if (axes.indexOf(k) >= 0 || axes.length === 0) { + size *= context.inputs[0].dims[k]; + } + } + return [ + "var sum = f32(0);", + "", + `sum += f32(${input.getByIndices("input_indices")});`, + `let value = ${output.type.value}(sum / ${size});` + ]; + }; + runReduceProgram(context, "ReduceMean", attributes, reduceOp); + }; + reduceMinNaive = (context, attributes) => { + validateInputs2(context.inputs); + const reduceOp = (input, _output, axes) => { + const idxZero = []; + for (let k = 0; k < input.rank; k++) { + if (axes.indexOf(k) >= 0 || axes.length === 0) { + idxZero.push(`input_indices[${k}] = 0;`); + } + } + return [ + `${idxZero.join("\n")}`, + `var value = ${input.getByIndices("input_indices")};`, + `value = min(value, ${input.getByIndices("input_indices")});`, + "" + ]; + }; + runReduceProgram(context, "ReduceMin", attributes, reduceOp); + }; + reduceProdNaive = (context, attributes) => { + validateInputs2(context.inputs); + const reduceOp = (input, output) => [ + `var value = ${output.type.storage}(1);`, + "", + `value *= ${input.getByIndices("input_indices")};`, + "" + ]; + runReduceProgram(context, "ReduceProd", attributes, reduceOp); + }; + reduceSumNaive = (context, attributes) => { + validateInputs2(context.inputs); + const reduceOp = (input, output) => [ + `var value = ${output.type.storage}(0);`, + "", + `value += ${input.getByIndices("input_indices")};`, + "" + ]; + runReduceProgram(context, "ReduceSum", attributes, reduceOp); + }; + reduceSumSquareNaive = (context, attributes) => { + validateInputs2(context.inputs); + const reduceOp = (input, output) => [ + `var t = ${output.type.value}(0); var value = ${output.type.value}(0);`, + "", + `t = ${input.getByIndices("input_indices")}; value += t * t;`, + "" + ]; + runReduceProgram(context, "ReduceSumSquare", attributes, reduceOp); + }; + useNaiveReduceMethod = (shape, axes, noopWithEmptyAxes) => { + if (axes.length === 0) { + return noopWithEmptyAxes; + } + let outputSize = 1; + let reduceSize = 1; + for (let dim = 0; dim < axes.length; dim++) { + if (axes.indexOf(dim) === -1) { + outputSize *= shape[dim]; + } else { + reduceSize *= shape[dim]; + } + } + return reduceSize < 32 && outputSize > 1024; + }; + reduceMean = (context, attributes) => { + if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) { + reduceMeanNaive(context, attributes); + } else { + reduceMeanShared(context, attributes); + } + }; + reduceL1 = (context, attributes) => { + if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) { + reduceL1Naive(context, attributes); + } else { + reduceL1Shared(context, attributes); + } + }; + reduceL2 = (context, attributes) => { + if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) { + reduceL2Naive(context, attributes); + } else { + reduceL2Shared(context, attributes); + } + }; + reduceLogSumExp = (context, attributes) => { + if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) { + reduceLogSumExpNaive(context, attributes); + } else { + reduceLogSumExpShared(context, attributes); + } + }; + reduceMax = (context, attributes) => { + if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) { + reduceMaxNaive(context, attributes); + } else { + reduceMaxShared(context, attributes); + } + }; + reduceMin = (context, attributes) => { + if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) { + reduceMinNaive(context, attributes); + } else { + reduceMinShared(context, attributes); + } + }; + reduceProd = (context, attributes) => { + if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) { + reduceProdNaive(context, attributes); + } else { + reduceProdShared(context, attributes); + } + }; + reduceSum = (context, attributes) => { + if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) { + reduceSumNaive(context, attributes); + } else { + reduceSumShared(context, attributes); + } + }; + reduceSumSquare = (context, attributes) => { + if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) { + reduceSumSquareNaive(context, attributes); + } else { + reduceSumSquareShared(context, attributes); + } + }; + reduceLogSum = (context, attributes) => { + if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) { + reduceLogSumNaive(context, attributes); + } else { + reduceLogSumShared(context, attributes); + } + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/argminmax.ts +var validateInputs3, argMin, argMax, parseArgMinMaxAttributes; +var init_argminmax = __esm({ + "web/lib/wasm/jsep/webgpu/ops/argminmax.ts"() { + "use strict"; + init_wasm_common(); + init_attribute_with_cache_key(); + init_reduce(); + validateInputs3 = (inputs) => { + if (!inputs || inputs.length === 0 || inputs.length > 2) { + throw new Error("ArgMinMaxOp op requires 1 or 2 inputs."); + } + if (inputs[0].dataType !== 1 /* float */) { + throw new Error("Invalid input type."); + } + }; + argMin = (context, attributes) => { + validateInputs3(context.inputs); + const argMinMaxOp = (input, output, axes) => { + const idxZero = []; + for (let k = 0; k < input.rank; k++) { + if (axes.indexOf(k) >= 0 || axes.length === 0) { + idxZero.push(`input_indices[${k}] = 0;`); + } + } + return [ + `${idxZero.join("\n")}`, + `var value = ${input.getByIndices("input_indices")}; +var best_index : i32 = 0;`, + `if (${input.getByIndices("input_indices")} ${attributes.selectLastIndex > 0 ? "<=" : "<"} value) { + value = ${input.getByIndices("input_indices")}; + best_index = i32(last_index); + }`, + "", + output.setByOffset("global_idx", "best_index") + ]; + }; + context.compute( + createReduceProgramInfo( + "ArgMin", + { hint: attributes.cacheKey, inputDependencies: ["rank"] }, + [context.inputs[0]], + argMinMaxOp, + [attributes.axis], + 7 /* int64 */, + attributes.keepDims + ), + { inputs: [0] } + ); + }; + argMax = (context, attributes) => { + validateInputs3(context.inputs); + const argMinMaxOp = (input, output, axes) => { + const idxZero = []; + for (let k = 0; k < input.rank; k++) { + if (axes.indexOf(k) >= 0 || axes.length === 0) { + idxZero.push(`input_indices[${k}] = 0;`); + } + } + return [ + `${idxZero.join("\n")}`, + `var value = ${input.getByIndices("input_indices")}; +var best_index : i32 = 0;`, + `if (${input.getByIndices("input_indices")} ${attributes.selectLastIndex > 0 ? ">=" : ">"} value) { + value = ${input.getByIndices("input_indices")}; + best_index = i32(last_index); + }`, + "", + output.setByOffset("global_idx", "best_index") + ]; + }; + context.compute( + createReduceProgramInfo( + "argMax", + { hint: attributes.cacheKey, inputDependencies: ["rank"] }, + [context.inputs[0]], + argMinMaxOp, + [attributes.axis], + 7 /* int64 */, + attributes.keepDims + ), + { inputs: [0] } + ); + }; + parseArgMinMaxAttributes = (attributes) => createAttributeWithCacheKey(attributes); + } +}); + +// web/lib/wasm/jsep/webgpu/ops/concat.ts +var validateInputs4, calculateInputIndexImpl, assignOutputData, createConcatProgramInfo, concat, parseConcatAttributes; +var init_concat = __esm({ + "web/lib/wasm/jsep/webgpu/ops/concat.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_attribute_with_cache_key(); + init_common(); + validateInputs4 = (inputs, axis) => { + if (!inputs || inputs.length < 1) { + throw new Error("too few inputs"); + } + const referenceIndex = 0; + const referenceInput = inputs[referenceIndex]; + const inputType = referenceInput.dataType; + const inputRank = referenceInput.dims.length; + inputs.forEach((input, i) => { + if (i === referenceIndex) { + return; + } + if (input.dataType !== inputType) { + throw new Error("input tensors should be one type"); + } + if (input.dims.length !== inputRank) { + throw new Error("input tensors should have the same shape"); + } + input.dims.forEach((dim, i2) => { + if (i2 !== axis && dim !== referenceInput.dims[i2]) { + throw new Error("non concat dimensions must match"); + } + }); + }); + }; + calculateInputIndexImpl = (numberOfTensors, sizeInConcatAxisStr) => ` + fn calculateInputIndex(index: u32) -> u32 { + let sizeInConcatAxis = array(${sizeInConcatAxisStr}); + for (var i: u32 = 0u; i < ${numberOfTensors}; i += 1u ) { + if (index < sizeInConcatAxis[i]) { + return i; + } + } + return ${numberOfTensors}u; + }`; + assignOutputData = (inputs, output) => { + const numberOfTensors = inputs.length; + const codeLines = []; + for (let i = 0; i < numberOfTensors; ++i) { + const returnSnippet = output.setByOffset("global_idx", inputs[i].getByIndices("indices")); + if (numberOfTensors === 1) { + codeLines.push(returnSnippet); + } else if (i === 0) { + codeLines.push(`if (inputIndex == ${i}u) { ${returnSnippet} }`); + } else if (i === numberOfTensors - 1) { + codeLines.push(`else { ${returnSnippet} }`); + } else { + codeLines.push(`else if (inputIndex == ${i}) { ${returnSnippet} }`); + } + } + return codeLines.join("\n"); + }; + createConcatProgramInfo = (inputs, adjustedAxis, outputShape, dataType) => { + const outputSize = ShapeUtil.size(outputShape); + const sizeInConcatAxis = new Array(inputs.length); + const inputVars = new Array(inputs.length); + let previousSum = 0; + const inputDependencies = []; + const inputRanks = []; + const programUniforms = [{ type: 12 /* uint32 */, data: outputSize }]; + for (let i = 0; i < inputs.length; ++i) { + previousSum += inputs[i].dims[adjustedAxis]; + sizeInConcatAxis[i] = previousSum; + inputRanks.push(inputs[i].dims.length); + inputVars[i] = inputVariable(`input${i}`, dataType, inputRanks[i]); + inputDependencies.push("rank"); + programUniforms.push({ type: 12 /* uint32 */, data: sizeInConcatAxis[i] }); + } + for (let i = 0; i < inputs.length; ++i) { + programUniforms.push(...createTensorShapeVariables(inputs[i].dims)); + } + programUniforms.push(...createTensorShapeVariables(outputShape)); + const output = outputVariable("output", dataType, outputShape.length); + const indicesAxis = output.indicesGet("indices", adjustedAxis); + const sizeInConcatAxisStr = Array.from(Array(sizeInConcatAxis.length).keys()).map((i) => `uniforms.sizeInConcatAxis${i}`).join(","); + const getShaderSource = (shaderHelper) => ` + + ${(() => { + shaderHelper.registerUniform("outputSize", "u32"); + for (let i = 0; i < inputs.length; i++) { + shaderHelper.registerUniform(`sizeInConcatAxis${i}`, "u32"); + } + return shaderHelper.declareVariables(...inputVars, output); + })()} + + ${calculateInputIndexImpl(sizeInConcatAxis.length, sizeInConcatAxisStr)} + + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")} + + var indices = ${output.offsetToIndices("global_idx")}; + + let inputIndex = calculateInputIndex(${indicesAxis}); + if (inputIndex != 0u) { + let sizeInConcatAxis = array(${sizeInConcatAxisStr}); + ${indicesAxis} -= sizeInConcatAxis[inputIndex - 1u]; + } + + ${assignOutputData(inputVars, output)} + }`; + return { + name: "Concat", + shaderCache: { hint: `${adjustedAxis}`, inputDependencies }, + getRunData: () => ({ + outputs: [{ dims: outputShape, dataType }], + dispatchGroup: { x: Math.ceil( + outputSize / 64 + /* workgroup size */ + ) }, + programUniforms + }), + getShaderSource + }; + }; + concat = (context, attributes) => { + const inputs = context.inputs; + const inputShape = inputs[0].dims; + const adjustedAxis = ShapeUtil.normalizeAxis(attributes.axis, inputShape.length); + validateInputs4(inputs, adjustedAxis); + const outputShape = inputShape.slice(); + outputShape[adjustedAxis] = inputs.reduce((sum, input) => sum + (input.dims.length > adjustedAxis ? input.dims[adjustedAxis] : 0), 0); + const nonEmptyInputs = inputs.filter((input) => ShapeUtil.size(input.dims) > 0); + context.compute( + createConcatProgramInfo(nonEmptyInputs, adjustedAxis, outputShape, inputs[0].dataType), + { inputs: nonEmptyInputs } + ); + }; + parseConcatAttributes = (attributes) => createAttributeWithCacheKey({ axis: attributes.axis }); + } +}); + +// web/lib/wasm/jsep/webgpu/ops/attention.ts +var validateAttentionInputs, createInPlaceSoftmaxProgramInfo, createAttentionProbsProgramInfo, createVxAttentionScoreProgramInfo, applyAttention, prepare, attention; +var init_attention = __esm({ + "web/lib/wasm/jsep/webgpu/ops/attention.ts"() { + "use strict"; + init_wasm_common(); + init_types(); + init_common(); + init_concat(); + validateAttentionInputs = (inputs, attributes) => { + const input = inputs[0]; + const weights = inputs[1]; + const bias = inputs[2]; + const maskIndex = inputs[3]; + const past = inputs[4]; + const relativePositionBias = inputs[5]; + if (past && relativePositionBias) { + throw new Error("Attention cannot have both past and relative_position_bias"); + } + if (input.dims.length !== 3) { + throw new Error('Input "input" must have 3 dimensions'); + } + const batchSize = input.dims[0]; + const sequenceLength = input.dims[1]; + const inputHiddenSize = input.dims[2]; + if (bias.dims.length !== 1) { + throw new Error('Input "bias" is expected to have 1 dimensions'); + } + if (weights.dims.length !== 2) { + throw new Error('Input "weights" is expected to have 2 dimensions'); + } + if (weights.dims[0] !== inputHiddenSize) { + throw new Error("Input 1 dimension 0 should have same length as dimension 2 of input 0"); + } + if (bias.dims[0] !== weights.dims[1]) { + throw new Error('Input "bias" dimension 0 should have same length as dimension 1 of input "weights"'); + } + let qHiddenSize = bias.dims[0] / 3; + let kHiddenSize = qHiddenSize; + let vHiddenSize = kHiddenSize; + if (attributes.qkvHiddenSizes.length > 0) { + if (attributes.qkvHiddenSizes.length !== 3) { + throw new Error("qkv_hidden_sizes attribute should have 3 elements"); + } + for (const sz of attributes.qkvHiddenSizes) { + if (sz % attributes.numHeads !== 0) { + throw new Error("qkv_hidden_sizes should be divisible by num_heads"); + } + } + qHiddenSize = attributes.qkvHiddenSizes[0]; + kHiddenSize = attributes.qkvHiddenSizes[1]; + vHiddenSize = attributes.qkvHiddenSizes[2]; + } + const kvSequenceLength = sequenceLength; + if (qHiddenSize !== kHiddenSize) { + throw new Error("qkv_hidden_sizes first element should be same as the second"); + } + if (bias.dims[0] !== qHiddenSize + kHiddenSize + vHiddenSize) { + throw new Error('Input "bias" dimension 0 should have same length as sum of Q/K/V hidden sizes'); + } + let pastSequenceLength = 0; + if (past) { + if (kHiddenSize !== vHiddenSize) { + throw new Error('Input "past" expect k_hidden_size == v_hidden_size'); + } + if (past.dims.length !== 5) { + throw new Error('Input "past" must have 5 dimensions'); + } + if (past.dims[0] !== 2) { + throw new Error('Input "past" first dimension must be 2'); + } + if (past.dims[1] !== batchSize) { + throw new Error('Input "past" second dimension must be batch_size'); + } + if (past.dims[2] !== attributes.numHeads) { + throw new Error('Input "past" third dimension must be num_heads'); + } + if (past.dims[4] !== kHiddenSize / attributes.numHeads) { + throw new Error('Input "past" fifth dimension must be k_hidden_size / num_heads'); + } + if (!attributes.pastPresentShareBuffer) { + pastSequenceLength = past.dims[3]; + } + } + const totalSequenceLength = kvSequenceLength + pastSequenceLength; + const maxSequenceLength = -1; + const maskType = 0 /* none */; + if (maskIndex) { + throw new Error("Mask not supported"); + } + if (past) { + throw new Error("past is not supported"); + } + return { + batchSize, + sequenceLength, + pastSequenceLength, + kvSequenceLength, + totalSequenceLength, + maxSequenceLength, + inputHiddenSize, + hiddenSize: qHiddenSize, + vHiddenSize, + headSize: Math.floor(qHiddenSize / attributes.numHeads), + vHeadSize: Math.floor(vHiddenSize / attributes.numHeads), + numHeads: attributes.numHeads, + isUnidirectional: false, + pastPresentShareBuffer: false, + maskFilterValue: attributes.maskFilterValue, + maskType, + scale: attributes.scale, + broadcastResPosBias: false, + passPastInKv: false, + qkvFormat: 1 /* qkvBNSH */ + }; + }; + createInPlaceSoftmaxProgramInfo = (_context, input, n, d) => { + const components = getMaxComponents(d); + let WG = 64; + const dComp = d / components; + if (dComp < WG) { + WG = 1; + } else if (dComp / 8 < 64) { + WG = Math.ceil(dComp / 8); + } + const elementsPerThread = Math.ceil(d / components / WG); + const programUniforms = [ + { type: input.dataType, data: 1 / d }, + { type: 12 /* uint32 */, data: dComp }, + { type: 12 /* uint32 */, data: elementsPerThread } + ]; + const dataType = tensorTypeToWsglStorageType(input.dataType, components); + const f32Type = tensorTypeToWsglValueType(1 /* float */, components); + const getShaderSource = (shaderHelper) => { + const inputHelper = outputVariable("x", input.dataType, input.dims, components); + const elemValueType = tensorTypeToWsglValueType(input.dataType); + const uniforms = [ + { name: "d_inv", type: elemValueType }, + { name: "d_comp", type: "u32" }, + { name: "elements_per_thread", type: "u32" } + ]; + return ` + var thread_max: array; + var thread_sum: array; + ${shaderHelper.registerUniforms(uniforms).declareVariables(inputHelper)} + ${shaderHelper.mainStart([ + WG, + 1, + 1 + ])} + let local_offset = local_idx * uniforms.elements_per_thread; + let offset = workgroup_id.x * uniforms.d_comp + local_offset; + + var thread_max_vector = ${f32Type}(-3.402823e+38f); + for (var i: u32 = 0; i < uniforms.elements_per_thread && i + local_offset < uniforms.d_comp; i++) { + thread_max_vector = max(${f32Type}(x[offset + i]), thread_max_vector); + } + thread_max[local_idx] = ${(() => { + switch (components) { + case 1: + return "thread_max_vector"; + case 2: + return "max(thread_max_vector.x, thread_max_vector.y)"; + case 4: + return "max(max(thread_max_vector.x, thread_max_vector.y), max(thread_max_vector.z, thread_max_vector.w))"; + default: + throw new Error(`Unsupported components: ${components}`); + } + })()}; + workgroupBarrier(); + + var max_value = f32(-3.402823e+38f); + for (var i = 0u; i < ${WG}; i++) { + max_value = max(thread_max[i], max_value); + } + + var sum_vector = ${f32Type}(0); + for (var i: u32 = 0; i < uniforms.elements_per_thread && i + local_offset < uniforms.d_comp; i++) { + sum_vector += exp(${f32Type}(x[offset + i]) - max_value); + } + thread_sum[local_idx] = ${(() => { + switch (components) { + case 1: + return "sum_vector"; + case 2: + return "sum_vector.x + sum_vector.y"; + case 4: + return "sum_vector.x + sum_vector.y + sum_vector.z + sum_vector.w"; + default: + throw new Error(`Unsupported components: ${components}`); + } + })()}; + workgroupBarrier(); + + var sum: f32 = 0; + for (var i = 0u; i < ${WG}; i++) { + sum += thread_sum[i]; + } + + if (sum == 0) { + for (var i: u32 = 0; i < uniforms.elements_per_thread && i + local_offset < uniforms.d_comp; i++) { + x[offset + i] = ${inputHelper.type.value}(uniforms.d_inv); + } + } else { + for (var i: u32 = 0; i < uniforms.elements_per_thread && i + local_offset < uniforms.d_comp; i++) { + var f32input = ${f32Type}(x[offset + i]); + x[offset + i] = ${inputHelper.type.value}(exp(f32input - max_value) / sum); + } + } + }`; + }; + return { + name: "AttentionProbsSoftmax", + shaderCache: { hint: `${WG};${dataType};${components}` }, + getShaderSource, + getRunData: () => ({ outputs: [], dispatchGroup: { x: n }, programUniforms }) + }; + }; + createAttentionProbsProgramInfo = (_context, q, key, relativePositionBias, parameters, attributes, pastSequenceLength) => { + const totalSequenceLength = pastSequenceLength + parameters.kvSequenceLength; + const probsShape = [parameters.batchSize, parameters.numHeads, parameters.sequenceLength, totalSequenceLength]; + const alpha = attributes.scale === 0 ? 1 / Math.sqrt(parameters.headSize) : attributes.scale; + const components = getMaxComponents(parameters.headSize); + const vectorizedHeadSize = parameters.headSize / components; + const TILE_SIZE = 12; + const dispatch = { + x: Math.ceil(totalSequenceLength / TILE_SIZE), + y: Math.ceil(parameters.sequenceLength / TILE_SIZE), + z: parameters.batchSize * parameters.numHeads + }; + const programUniforms = [ + { type: 12 /* uint32 */, data: parameters.sequenceLength }, + { type: 12 /* uint32 */, data: vectorizedHeadSize }, + { type: 12 /* uint32 */, data: totalSequenceLength }, + { type: 12 /* uint32 */, data: parameters.numHeads }, + { type: 1 /* float */, data: alpha } + ]; + const inputDependencies = ["type", "type"]; + if (relativePositionBias) { + inputDependencies.push("rank"); + programUniforms.push(...createTensorShapeVariables(relativePositionBias.dims)); + } + const getShaderSource = (shaderHelper) => { + const qInput = inputVariable("q", q.dataType, q.dims, components); + const kInput = inputVariable("key", key.dataType, key.dims, components); + const inputVars = [qInput, kInput]; + const relativePositionBiasInput = relativePositionBias ? inputVariable("relative_position_bias", relativePositionBias.dataType, relativePositionBias.dims.length) : void 0; + if (relativePositionBiasInput) { + inputVars.push(relativePositionBiasInput); + } + const output = outputVariable("output", q.dataType, probsShape); + const f32Type = tensorTypeToWsglValueType(1 /* float */, components); + const uniforms = [ + { name: "M", type: "u32" }, + { name: "K", type: "u32" }, + { name: "N", type: "u32" }, + { name: "num_heads", type: "u32" }, + { name: "alpha", type: "f32" } + ]; + return ` + const TILE_SIZE = ${TILE_SIZE}u; + + var tileQ: array<${qInput.type.storage}, ${TILE_SIZE * TILE_SIZE}>; + var tileK: array<${qInput.type.storage}, ${TILE_SIZE * TILE_SIZE}>; + ${shaderHelper.registerUniforms(uniforms).declareVariables(...inputVars, output)} + ${shaderHelper.mainStart([ + TILE_SIZE, + TILE_SIZE, + 1 + ])} + // x holds the N and y holds the M + let headIdx = workgroup_id.z; + let m = workgroup_id.y * TILE_SIZE; + let n = workgroup_id.x * TILE_SIZE; + let qOffset = uniforms.M * uniforms.K * headIdx + m * uniforms.K; + let kOffset = uniforms.N * uniforms.K * headIdx + n * uniforms.K; + + var value = ${f32Type}(0); + for (var w: u32 = 0u; w < uniforms.K; w += TILE_SIZE) { + if (global_id.y < uniforms.M && w + local_id.x < uniforms.K) { + tileQ[TILE_SIZE * local_id.y + local_id.x] = q[qOffset + local_id.y * uniforms.K + w + local_id.x]; + } + if (n + local_id.y < uniforms.N && w + local_id.x < uniforms.K) { + tileK[TILE_SIZE * local_id.y + local_id.x] = key[kOffset + local_id.y * uniforms.K + w + local_id.x]; + } + workgroupBarrier(); + + for (var k: u32 = 0u; k < TILE_SIZE && w+k < uniforms.K; k++) { + value += ${f32Type}(tileQ[TILE_SIZE * local_id.y + k] * tileK[TILE_SIZE * local_id.x + k]); + } + + workgroupBarrier(); + } + + let headOffset = headIdx * uniforms.M * uniforms.N; + if (global_id.y < uniforms.M && global_id.x < uniforms.N) { + let outputIdx = headOffset + global_id.y * uniforms.N + global_id.x; + var sum: f32 = ${(() => { + switch (components) { + case 1: + return "value"; + case 2: + return "value.x + value.y"; + case 4: + return "value.x + value.y + value.z + value.w"; + default: + throw new Error(`Unsupported components: ${components}`); + } + })()}; + + ${(() => { + if (relativePositionBiasInput) { + return ` + let batch = workgroup_id.z / uniforms.num_heads; + let head = workgroup_id.z % uniforms.num_heads; + var indices = ${relativePositionBiasInput.type.indices}(batch, head, global_id.y, global_id.x); + output[outputIdx] = ${output.type.value}(sum * uniforms.alpha) + ${relativePositionBiasInput.getByIndices("indices")};`; + } + return `output[outputIdx] = ${output.type.value} (sum * uniforms.alpha);`; + })()} + } + }`; + }; + return { + name: "AttentionProbs", + shaderCache: { hint: `${components}`, inputDependencies }, + getRunData: () => ({ + outputs: [{ dims: probsShape, dataType: q.dataType, gpuDataType: 0 /* default */ }], + dispatchGroup: dispatch, + programUniforms + }), + getShaderSource + }; + }; + createVxAttentionScoreProgramInfo = (_context, probs, v, params, pastSequenceLength) => { + const totalSequenceLength = pastSequenceLength + params.kvSequenceLength; + const outputShape = [params.batchSize, params.sequenceLength, params.vHiddenSize]; + const TILE_SIZE = 12; + const dispatch = { + x: Math.ceil(params.vHeadSize / TILE_SIZE), + y: Math.ceil(params.sequenceLength / TILE_SIZE), + z: params.batchSize * params.numHeads + }; + const programUniforms = [ + { type: 12 /* uint32 */, data: params.sequenceLength }, + { type: 12 /* uint32 */, data: totalSequenceLength }, + { type: 12 /* uint32 */, data: params.vHeadSize }, + { type: 12 /* uint32 */, data: params.numHeads }, + { type: 12 /* uint32 */, data: params.vHiddenSize } + ]; + const inputDependencies = ["type", "type"]; + const getShaderSource = (shaderHelper) => { + const probsHelper = inputVariable("probs", probs.dataType, probs.dims); + const vHelper = inputVariable("v", v.dataType, v.dims); + const output = outputVariable("output", probs.dataType, outputShape); + const uniforms = [ + { name: "M", type: "u32" }, + { name: "K", type: "u32" }, + { name: "N", type: "u32" }, + { name: "num_heads", type: "u32" }, + { name: "v_hidden_size", type: "u32" } + ]; + return ` + const TILE_SIZE = ${TILE_SIZE}u; + var tileQ: array<${probsHelper.type.value}, ${TILE_SIZE * TILE_SIZE}>; + var tileK: array<${probsHelper.type.value}, ${TILE_SIZE * TILE_SIZE}>; + ${shaderHelper.registerUniforms(uniforms).declareVariables(probsHelper, vHelper, output)} + ${shaderHelper.mainStart([ + TILE_SIZE, + TILE_SIZE, + 1 + ])} + let headIdx = workgroup_id.z; + let m = global_id.y; + let n = global_id.x; + + let offsetA = headIdx * (uniforms.M * uniforms.K) + m * uniforms.K; + let offsetB = headIdx * (uniforms.N * uniforms.K) + n; + + var value = ${probsHelper.type.storage}(0); + for (var w: u32 = 0u; w < uniforms.K; w += TILE_SIZE) { + if (m < uniforms.M && w + local_id.x < uniforms.K) { + tileQ[TILE_SIZE * local_id.y + local_id.x] = probs[offsetA + w + local_id.x]; + } + if (n < uniforms.N && w + local_id.y < uniforms.K) { + tileK[TILE_SIZE * local_id.y + local_id.x] = v[offsetB + (w + local_id.y) * uniforms.N]; + } + workgroupBarrier(); + for (var k: u32 = 0u; k < TILE_SIZE && w+k < uniforms.K; k++) { + value += tileQ[TILE_SIZE * local_id.y + k] * tileK[TILE_SIZE * k + local_id.x]; + } + workgroupBarrier(); + } + + // we need to transpose output from BNSH_v to BSND_v + let batchIdx = workgroup_id.z / uniforms.num_heads; + let currentBatchHeadNumber = workgroup_id.z % uniforms.num_heads; + if (m < uniforms.M && n < uniforms.N) { + let outputIdx = batchIdx * uniforms.M * uniforms.v_hidden_size + m * uniforms.v_hidden_size + + currentBatchHeadNumber * uniforms.N + n; + output[outputIdx] = value; + } + }`; + }; + return { + name: "AttentionScore", + shaderCache: { inputDependencies }, + getRunData: () => ({ + outputs: [{ dims: outputShape, dataType: probs.dataType, gpuDataType: 0 /* default */ }], + dispatchGroup: dispatch, + programUniforms + }), + getShaderSource + }; + }; + applyAttention = (context, q, k, v, _maskIndex, _past, pastKey, pastValue, relativePositionBias, parameters, attributes) => { + const outputPresentKey = context.outputCount > 1; + const outputPresentValue = context.outputCount > 2; + const pastSequenceLength = outputPresentKey && outputPresentValue ? parameters.pastSequenceLength : 0; + const totalSequenceLength = pastSequenceLength + parameters.kvSequenceLength; + const presentKeyShape = [parameters.batchSize, parameters.numHeads, totalSequenceLength, parameters.headSize]; + const concatKeyInputs = pastKey ? [pastKey, k] : [k]; + const key = outputPresentKey ? context.compute( + createConcatProgramInfo(concatKeyInputs, 2, presentKeyShape, k.dataType), + { inputs: concatKeyInputs, outputs: [1] } + )[0] : k; + const presentValueShape = [parameters.batchSize, parameters.numHeads, totalSequenceLength, parameters.headSize]; + const concatValueInputs = pastValue ? [pastValue, v] : [v]; + const value = outputPresentValue ? context.compute( + createConcatProgramInfo(concatValueInputs, 2, presentValueShape, v.dataType), + { inputs: concatValueInputs, outputs: [2] } + )[0] : v; + const inputsK = [q, key]; + if (relativePositionBias) { + inputsK.push(relativePositionBias); + } + const probs = context.compute( + createAttentionProbsProgramInfo( + context, + q, + key, + relativePositionBias, + parameters, + attributes, + pastSequenceLength + ), + { inputs: inputsK, outputs: [-1] } + )[0]; + context.compute( + createInPlaceSoftmaxProgramInfo( + context, + probs, + parameters.batchSize * parameters.numHeads * parameters.sequenceLength, + totalSequenceLength + ), + { inputs: [probs], outputs: [] } + ); + const inputsV = [probs, value]; + context.compute( + createVxAttentionScoreProgramInfo(context, probs, value, parameters, pastSequenceLength), + { inputs: inputsV, outputs: [0] } + ); + }; + prepare = (context, parameters) => { + const outputShape = [ + parameters.batchSize, + parameters.numHeads, + parameters.sequenceLength, + parameters.headSize + ]; + const M = parameters.sequenceLength; + const K = parameters.inputHiddenSize; + const N = parameters.headSize; + const TILE_SIZE = 12; + const dispatch = { + x: Math.ceil(parameters.headSize / TILE_SIZE), + y: Math.ceil(parameters.sequenceLength / TILE_SIZE), + z: parameters.batchSize * parameters.numHeads + }; + const inputs = [context.inputs[0], context.inputs[1], context.inputs[2]]; + const programUniforms = [ + { type: 12 /* uint32 */, data: M }, + { type: 12 /* uint32 */, data: K }, + { type: 12 /* uint32 */, data: N }, + { type: 12 /* uint32 */, data: parameters.numHeads }, + { type: 12 /* uint32 */, data: parameters.headSize }, + { type: 12 /* uint32 */, data: parameters.hiddenSize }, + { type: 12 /* uint32 */, data: parameters.hiddenSize + parameters.hiddenSize + parameters.vHiddenSize } + ]; + const getShaderSource = (shaderHelper) => { + const outputQ = outputVariable("output_q", inputs[0].dataType, outputShape); + const outputK = outputVariable("output_k", inputs[0].dataType, outputShape); + const outputV = outputVariable("output_v", inputs[0].dataType, outputShape); + const input = inputVariable("input", inputs[0].dataType, inputs[0].dims); + const weight = inputVariable("weight", inputs[1].dataType, inputs[1].dims); + const bias = inputVariable("bias", inputs[2].dataType, inputs[2].dims); + const dataType = input.type.storage; + const uniforms = [ + { name: "M", type: "u32" }, + { name: "K", type: "u32" }, + { name: "N", type: "u32" }, + { name: "num_heads", type: "u32" }, + { name: "head_size", type: "u32" }, + { name: "hidden_size", type: "u32" }, + { name: "ldb", type: "u32" } + ]; + return ` + const TILE_SIZE = ${TILE_SIZE}u; + var tileInput: array<${dataType}, ${TILE_SIZE * TILE_SIZE}>; + var tileWeightQ: array<${dataType}, ${TILE_SIZE * TILE_SIZE}>; + var tileWeightK: array<${dataType}, ${TILE_SIZE * TILE_SIZE}>; + var tileWeightV: array<${dataType}, ${TILE_SIZE * TILE_SIZE}>; + ${shaderHelper.registerUniforms(uniforms).declareVariables(input, weight, bias, outputQ, outputK, outputV)} + ${shaderHelper.mainStart([ + TILE_SIZE, + TILE_SIZE, + 1 + ])} + let batchIndex = workgroup_id.z / uniforms.num_heads; + let headNumber = workgroup_id.z % uniforms.num_heads; + let m = global_id.y; + let n = global_id.x; + + let inputOffset = batchIndex * (uniforms.M * uniforms.K) + m * uniforms.K; + let biasOffsetQ = headNumber * uniforms.head_size; + let biasOffsetK = uniforms.hidden_size + biasOffsetQ; + let biasOffsetV = uniforms.hidden_size + biasOffsetK; + + var valueQ = ${dataType}(0); + var valueK = ${dataType}(0); + var valueV = ${dataType}(0); + for (var w: u32 = 0u; w < uniforms.K; w += TILE_SIZE) { + if (m < uniforms.M && w + local_id.x < uniforms.K) { + tileInput[TILE_SIZE * local_id.y + local_id.x] = input[inputOffset + w + local_id.x]; + } + if (n < uniforms.N && w + local_id.y < uniforms.K) { + let offset = n + (w + local_id.y) * uniforms.ldb; + tileWeightQ[TILE_SIZE * local_id.y + local_id.x] = weight[biasOffsetQ + offset]; + tileWeightK[TILE_SIZE * local_id.y + local_id.x] = weight[biasOffsetK + offset]; + tileWeightV[TILE_SIZE * local_id.y + local_id.x] = weight[biasOffsetV + offset]; + } + workgroupBarrier(); + for (var k: u32 = 0u; k ({ + outputs: [ + { dims: outputShape, dataType: context.inputs[0].dataType, gpuDataType: 0 /* default */ }, + { dims: outputShape, dataType: context.inputs[0].dataType, gpuDataType: 0 /* default */ }, + { dims: outputShape, dataType: context.inputs[0].dataType, gpuDataType: 0 /* default */ } + ], + dispatchGroup: dispatch, + programUniforms + }), + getShaderSource + }, + { inputs, outputs: [-1, -1, -1] } + ); + }; + attention = (context, attributes) => { + const params = validateAttentionInputs(context.inputs, attributes); + const [q, k, v] = prepare(context, params); + return applyAttention( + context, + q, + k, + v, + context.inputs[4], + void 0, + void 0, + void 0, + context.inputs[5], + params, + attributes + ); + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/batch-norm.ts +var validateInputs5, createBatchNormInferenceProgramInfo, parseBatchNormAttributes, batchNorm; +var init_batch_norm = __esm({ + "web/lib/wasm/jsep/webgpu/ops/batch-norm.ts"() { + "use strict"; + init_esm(); + init_wasm_common(); + init_util(); + init_attribute_with_cache_key(); + init_common(); + validateInputs5 = (inputs, attributes) => { + if (!inputs || inputs.length !== 5) { + throw new Error("BatchNormalization requires 5 inputs"); + } + const checkShapeEqual = (actual, expected, message) => { + const r = expected.length; + if (r !== actual.length) { + throw new Error(`${message}: num dimensions != ${r}`); + } + expected.forEach((v, i) => { + if (v !== actual[i]) { + throw new Error(`${message}: dim[${i}] do not match`); + } + }); + }; + if (inputs[0].dims.length > 1) { + const shape = attributes.format === "NHWC" ? attributes.spatial ? inputs[0].dims.slice(-1) : inputs[0].dims.slice(-1).concat(inputs[0].dims.slice(1, inputs[0].dims.length - 1)) : inputs[0].dims.slice(1, attributes.spatial ? 2 : void 0); + checkShapeEqual(inputs[1].dims, shape, "Invalid input scale"); + checkShapeEqual(inputs[2].dims, shape, "Invalid input B"); + checkShapeEqual(inputs[3].dims, shape, "Invalid input mean"); + checkShapeEqual(inputs[4].dims, shape, "Invalid input var"); + } else { + checkShapeEqual(inputs[1].dims, [1], "Invalid input scale"); + checkShapeEqual(inputs[2].dims, [1], "Invalid input B"); + checkShapeEqual(inputs[3].dims, [1], "Invalid input mean"); + checkShapeEqual(inputs[4].dims, [1], "Invalid input var"); + } + }; + createBatchNormInferenceProgramInfo = (inputs, attributes) => { + const { epsilon, spatial, format } = attributes; + const yShape = inputs[0].dims; + const components = spatial ? getMaxComponents(yShape[yShape.length - 1]) : 1; + const cComponents = format === "NHWC" && yShape.length > 1 ? components : 1; + const outputSize = ShapeUtil.size(yShape) / components; + const useShapesUniforms = spatial; + const shapeOrRank = useShapesUniforms ? yShape.length : yShape; + const x = inputVariable("x", inputs[0].dataType, inputs[0].dims, components); + const scale = inputVariable("scale", inputs[1].dataType, inputs[1].dims, cComponents); + const bias = inputVariable("bias", inputs[2].dataType, inputs[2].dims, cComponents); + const inputMean = inputVariable("inputMean", inputs[3].dataType, inputs[3].dims, cComponents); + const inputVar = inputVariable("inputVar", inputs[4].dataType, inputs[4].dims, cComponents); + const y = outputVariable("y", inputs[0].dataType, shapeOrRank, components); + const calcCOffset = () => { + let cOffset = ""; + if (spatial) { + cOffset = `let cOffset = ${yShape.length === 1 ? "0u" : format === "NHWC" ? `outputIndices[${yShape.length - 1}] / ${components}` : "outputIndices[1]"};`; + } else { + if (format === "NCHW") { + cOffset = ` + ${y.indicesSet("outputIndices", "0", "0")} + let cOffset = ${y.indicesToOffset("outputIndices")};`; + } else { + cOffset = `var cIndices = ${scale.type.indices}(0); + cIndices[0] = outputIndices[${yShape.length - 1}];`; + for (let i = 1; i < scale.rank; i++) { + cOffset += `cIndices[${i}] = outputIndices[${i}];`; + } + cOffset += `let cOffset = ${scale.indicesToOffset("cIndices")};`; + } + } + return cOffset; + }; + const getInferenceModeShaderSource = (helper) => ` + const epsilon = ${epsilon}; + ${helper.registerUniform("outputSize", "u32").declareVariables(x, scale, bias, inputMean, inputVar, y)} + ${helper.mainStart()} + ${helper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")} + var outputIndices = ${y.offsetToIndices(`global_idx * ${components}`)}; + ${calcCOffset()} + let scale = ${scale.getByOffset("cOffset")}; + let bias = ${bias.getByOffset("cOffset")}; + let inputMean = ${inputMean.getByOffset("cOffset")}; + let inputVar = ${inputVar.getByOffset("cOffset")}; + let x = ${x.getByOffset("global_idx")}; + let value = (x - inputMean) * inverseSqrt(inputVar + epsilon) * scale + bias; + ${y.setByOffset("global_idx", "value")} + }`; + return { + name: "BatchNormalization", + shaderCache: { + hint: `${attributes.epsilon}_${attributes.format}_${spatial}_${components}`, + inputDependencies: useShapesUniforms ? ["rank", "type", "type", "type", "type"] : void 0 + }, + getShaderSource: getInferenceModeShaderSource, + getRunData: () => ({ + outputs: [{ dims: inputs[0].dims, dataType: inputs[0].dataType }], + dispatchGroup: { x: Math.ceil( + outputSize / 64 + /* workgroup size */ + ) }, + programUniforms: useShapesUniforms ? [ + { type: 12 /* uint32 */, data: outputSize }, + ...createTensorShapeVariables(yShape) + ] : [ + { type: 12 /* uint32 */, data: outputSize } + ] + }) + }; + }; + parseBatchNormAttributes = (attributes) => createAttributeWithCacheKey(attributes); + batchNorm = (context, attributes) => { + const { inputs, outputCount } = context; + const updatedAttributes = parseBatchNormAttributes({ ...attributes, outputCount }); + if (env2.webgpu.validateInputContent) { + validateInputs5(inputs, updatedAttributes); + } + if (attributes.trainingMode) { + throw new Error("BatchNormalization trainingMode is not supported yet."); + } else { + context.compute(createBatchNormInferenceProgramInfo(inputs, updatedAttributes)); + } + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/bias-add.ts +var validateInputs6, createBiasAddProgramInfo, biasAdd; +var init_bias_add = __esm({ + "web/lib/wasm/jsep/webgpu/ops/bias-add.ts"() { + "use strict"; + init_util(); + init_common(); + validateInputs6 = (inputs) => { + if (inputs[0].dims.length !== 3) { + throw new Error("input should have 3 dimensions"); + } + if (![320, 640, 1280].includes(inputs[0].dims[2])) { + throw new Error("number of channels should be 320, 640 or 1280"); + } + if (inputs[1].dims.length !== 1) { + throw new Error("bias is expected to have 1 dimensions"); + } + if (inputs[0].dims[2] !== inputs[1].dims[0]) { + throw new Error("last dimension of input and bias are not the same"); + } + }; + createBiasAddProgramInfo = (inputs) => { + const outputShape = inputs[0].dims; + const channels = inputs[0].dims[2]; + const outputSize = ShapeUtil.size(outputShape) / 4; + const dataType = inputs[0].dataType; + const input = inputVariable("input", dataType, outputShape, 4); + const bias = inputVariable("bias", dataType, [channels], 4); + const residual = inputVariable("residual", dataType, outputShape, 4); + const output = outputVariable("output", dataType, outputShape, 4); + const getShaderSource = (shaderHelper) => ` + const channels = ${channels}u / 4; + ${shaderHelper.declareVariables(input, bias, residual, output)} + + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes(outputSize)} + let value = ${input.getByOffset("global_idx")} + + ${bias.getByOffset("global_idx % channels")} + ${residual.getByOffset("global_idx")}; + ${output.setByOffset("global_idx", "value")} + }`; + return { + name: "BiasAdd", + getRunData: () => ({ + outputs: [{ dims: outputShape, dataType: inputs[0].dataType }], + dispatchGroup: { x: Math.ceil( + outputSize / 64 + /* workgroup size */ + ) } + }), + getShaderSource + }; + }; + biasAdd = (context) => { + validateInputs6(context.inputs); + context.compute(createBiasAddProgramInfo(context.inputs)); + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/unary-op.ts +var createElementwiseProgramShader, createElementwiseProgramInfo, abs, acos, acosh, asin, asinh, atan, atanh, parseCastAttributes, cast, generateClipAttributesFromInputs, clip, ceil, cos, cosh, parseAlphaAttributes, elu, erfImpl, erf, exp, floor, gelu, leakyRelu, not, neg, reciprocal, relu, sigmoid, parseHardSigmoidAttributes, hardSigmoid, sin, sinh, sqrt, tan, tanhExpression, tanh, fastGeluImpl, fastGeluExpression, fastGelu, thresholdedRelu, log; +var init_unary_op = __esm({ + "web/lib/wasm/jsep/webgpu/ops/unary-op.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_attribute_with_cache_key(); + init_common(); + createElementwiseProgramShader = (shaderHelper, datasize, inputDataType, outputDataType, funcCall, additionalImplementation) => { + const vecSize = Math.ceil(datasize / 4); + let expression = ""; + if (typeof funcCall === "string") { + expression = `${funcCall}(a)`; + } else { + expression = funcCall("a"); + } + const input = inputVariable("inputData", inputDataType, [vecSize], 4); + const output = outputVariable("outputData", outputDataType, [vecSize], 4); + return ` + ${shaderHelper.registerUniform("vec_size", "u32").declareVariables(input, output)} + + ${additionalImplementation ?? ""} + + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.vec_size")} + + let a = ${input.getByOffset("global_idx")}; + ${output.setByOffset("global_idx", expression)} + }`; + }; + createElementwiseProgramInfo = (input, name, funcCall, additionalImplementation, cacheKey, outputDataType = input.dataType) => ({ + name, + shaderCache: { hint: cacheKey, inputDependencies: ["type"] }, + getShaderSource: (shaderHelper) => createElementwiseProgramShader( + shaderHelper, + ShapeUtil.size(input.dims), + input.dataType, + outputDataType, + funcCall, + additionalImplementation + ), + getRunData: (inputTensors) => ({ + outputs: [{ dims: input.dims, dataType: outputDataType }], + dispatchGroup: { x: Math.ceil( + ShapeUtil.size(inputTensors[0].dims) / 64 / 4 + /* vec size */ + ) }, + programUniforms: [ + { type: 12 /* uint32 */, data: Math.ceil(ShapeUtil.size(input.dims) / 4) } + ] + }) + }); + abs = (context) => { + context.compute(createElementwiseProgramInfo(context.inputs[0], "Abs", "abs")); + }; + acos = (context) => { + context.compute(createElementwiseProgramInfo(context.inputs[0], "Acos", "acos")); + }; + acosh = (context) => { + context.compute(createElementwiseProgramInfo(context.inputs[0], "Acosh", "acosh")); + }; + asin = (context) => { + context.compute(createElementwiseProgramInfo(context.inputs[0], "Asin", "asin")); + }; + asinh = (context) => { + context.compute(createElementwiseProgramInfo(context.inputs[0], "Asinh", "asinh")); + }; + atan = (context) => { + context.compute(createElementwiseProgramInfo(context.inputs[0], "Atan", "atan")); + }; + atanh = (context) => { + context.compute(createElementwiseProgramInfo(context.inputs[0], "Atanh", "atanh")); + }; + parseCastAttributes = (attributes) => createAttributeWithCacheKey(attributes); + cast = (context, attributes) => { + let func; + switch (attributes.to) { + case 10 /* float16 */: + func = "vec4"; + break; + case 1 /* float */: + func = "vec4"; + break; + case 12 /* uint32 */: + func = "vec4"; + break; + case 6 /* int32 */: + func = "vec4"; + break; + case 9 /* bool */: + func = "vec4"; + break; + default: + throw new RangeError(`not supported type (specified in attribute 'to' from 'Cast' operator): ${attributes.to}`); + } + context.compute( + createElementwiseProgramInfo(context.inputs[0], "Cast", func, void 0, attributes.cacheKey, attributes.to) + ); + }; + generateClipAttributesFromInputs = (inputs) => { + const min = inputs.length >= 2 && inputs[1].data !== 0 ? inputs[1].getFloat32Array()[0] : MIN_CLIP; + const max = inputs.length >= 3 && inputs[2].data !== 0 ? inputs[2].getFloat32Array()[0] : MAX_CLIP; + return createAttributeWithCacheKey({ min, max }); + }; + clip = (context, clipAttributes) => { + const attributes = context.inputs.length === 1 ? clipAttributes : generateClipAttributesFromInputs(context.inputs); + const dataType = tensorTypeToWsglValueType(context.inputs[0].dataType); + context.compute( + createElementwiseProgramInfo( + context.inputs[0], + "Clip", + (a) => `clamp(${a}, clip_min_, clip_max_)`, + ` + const clip_min_: vec4<${dataType}> = vec4(${dataType}(${attributes.min})); + const clip_max_: vec4<${dataType}> = vec4(${dataType}(${attributes.max})); +`, + attributes.cacheKey + ), + { inputs: [0] } + ); + }; + ceil = (context) => { + context.compute(createElementwiseProgramInfo(context.inputs[0], "Ceil", "ceil")); + }; + cos = (context) => { + context.compute(createElementwiseProgramInfo(context.inputs[0], "Cos", "cos")); + }; + cosh = (context) => { + context.compute(createElementwiseProgramInfo(context.inputs[0], "Cosh", "cosh")); + }; + parseAlphaAttributes = (attributes) => createAttributeWithCacheKey(attributes); + elu = (context, attributes) => { + const dataType = tensorTypeToWsglValueType(context.inputs[0].dataType); + context.compute(createElementwiseProgramInfo( + context.inputs[0], + "Elu", + (a) => `elu_vf32(${a})`, + ` + const elu_alpha_ = ${dataType}(${attributes.alpha}); + + fn elu_f32(a: ${dataType}) -> ${dataType} { + return select((exp(a) - 1.0) * elu_alpha_, a, a >= 0.0); + } + + fn elu_vf32(v: vec4<${dataType}>) -> vec4<${dataType}> { + return vec4(elu_f32(v.x), elu_f32(v.y), elu_f32(v.z), elu_f32(v.w)); + }`, + attributes.cacheKey + )); + }; + erfImpl = (varType = "f32") => ` +const r0: ${varType} = 0.3275911; +const r1: ${varType} = 0.254829592; +const r2: ${varType} = -0.284496736; +const r3: ${varType} = 1.421413741; +const r4: ${varType} = -1.453152027; +const r5: ${varType} = 1.061405429; + +fn erf_vf32(v: vec4<${varType}>) -> vec4<${varType}> { + let absv = abs(v); + let x = 1.0 / (1.0 + r0 * absv); + return sign(v) * (1.0 - ((((r5 * x + r4) * x + r3) * x + r2) * x + r1) * x * exp(-absv * absv)); +}`; + erf = (context) => { + const dataType = tensorTypeToWsglValueType(context.inputs[0].dataType); + context.compute(createElementwiseProgramInfo(context.inputs[0], "Erf", (a) => `erf_vf32(${a})`, erfImpl(dataType))); + }; + exp = (context) => { + context.compute(createElementwiseProgramInfo(context.inputs[0], "Exp", "exp")); + }; + floor = (context) => { + context.compute(createElementwiseProgramInfo(context.inputs[0], "Floor", "floor")); + }; + gelu = (context) => { + const dataType = tensorTypeToWsglValueType(context.inputs[0].dataType); + context.compute(createElementwiseProgramInfo( + context.inputs[0], + "Gelu", + (a) => `0.5 * ${a} * (1.0 + erf_vf32(${a} * 0.7071067811865475))`, + erfImpl(dataType) + )); + }; + leakyRelu = (context, attributes) => { + const dataType = tensorTypeToWsglValueType(context.inputs[0].dataType); + context.compute(createElementwiseProgramInfo( + context.inputs[0], + "LeakyRelu", + (a) => `select(leaky_relu_alpha_ * ${a}, ${a}, ${a} >= vec4<${dataType}>(0.0))`, + `const leaky_relu_alpha_ = ${dataType}(${attributes.alpha});`, + attributes.cacheKey + )); + }; + not = (context) => { + context.compute(createElementwiseProgramInfo(context.inputs[0], "Not", (a) => `!${a}`)); + }; + neg = (context) => { + context.compute(createElementwiseProgramInfo(context.inputs[0], "Neg", (a) => `-${a}`)); + }; + reciprocal = (context) => { + context.compute(createElementwiseProgramInfo(context.inputs[0], "Reciprocal", (a) => `1.0/${a}`)); + }; + relu = (context) => { + const dataType = tensorTypeToWsglValueType(context.inputs[0].dataType); + context.compute(createElementwiseProgramInfo( + context.inputs[0], + "Relu", + (a) => `select(vec4<${dataType}>(0.0), ${a}, ${a} > vec4<${dataType}>(0.0))` + )); + }; + sigmoid = (context) => { + context.compute(createElementwiseProgramInfo(context.inputs[0], "Sigmoid", (a) => `(1.0 / (1.0 + exp(-${a})))`)); + }; + parseHardSigmoidAttributes = (attributes) => createAttributeWithCacheKey(attributes); + hardSigmoid = (context, attributes) => { + const dataType = tensorTypeToWsglValueType(context.inputs[0].dataType); + context.compute(createElementwiseProgramInfo( + context.inputs[0], + "HardSigmoid", + (a) => `max(vec4<${dataType}>(0.0), min(vec4<${dataType}>(1.0), ${attributes.alpha} * ${a} + vec4<${dataType}>(${attributes.beta})))`, + void 0, + attributes.cacheKey + )); + }; + sin = (context) => { + context.compute(createElementwiseProgramInfo(context.inputs[0], "Sin", "sin")); + }; + sinh = (context) => { + context.compute(createElementwiseProgramInfo(context.inputs[0], "Sinh", "sinh")); + }; + sqrt = (context) => { + context.compute(createElementwiseProgramInfo(context.inputs[0], "Sqrt", "sqrt")); + }; + tan = (context) => { + context.compute(createElementwiseProgramInfo(context.inputs[0], "Tan", "tan")); + }; + tanhExpression = (a) => `sign(${a}) * (1 - exp(-2 * abs(${a}))) / (1 + exp(-2 * abs(${a})))`; + tanh = (context) => { + context.compute(createElementwiseProgramInfo(context.inputs[0], "Tanh", tanhExpression)); + }; + fastGeluImpl = (varType = "f32") => ` +const fast_gelu_a: ${varType} = 0.5; +const fast_gelu_b: ${varType} = 0.7978845608028654; +const fast_gelu_c: ${varType} = 0.035677408136300125; + +fn tanh_v(v: vec4<${varType}>) -> vec4<${varType}> { + return ${tanhExpression("v")}; +} +`; + fastGeluExpression = (x) => `(fast_gelu_a + fast_gelu_a * tanh_v(${x} * (fast_gelu_c * ${x} * ${x} + fast_gelu_b))) * ${x}`; + fastGelu = (context) => { + const dataType = tensorTypeToWsglValueType(context.inputs[0].dataType); + context.compute(createElementwiseProgramInfo( + context.inputs[0], + "FastGelu", + fastGeluExpression, + fastGeluImpl(dataType), + void 0, + context.inputs[0].dataType + )); + }; + thresholdedRelu = (context, attributes) => { + const dataType = tensorTypeToWsglValueType(context.inputs[0].dataType); + context.compute(createElementwiseProgramInfo( + context.inputs[0], + "ThresholdedRelu", + (a) => `select(vec4<${dataType}>(0.0), ${a}, ${a} > thresholded_relu_alpha_)`, + `const thresholded_relu_alpha_ = vec4<${dataType}>(${attributes.alpha});`, + attributes.cacheKey + )); + return 0; + }; + log = (context) => { + context.compute(createElementwiseProgramInfo(context.inputs[0], "Log", "log")); + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/bias-split-gelu.ts +var validateInputs7, createBiasSplitGeluProgramInfo, biasSplitGelu; +var init_bias_split_gelu = __esm({ + "web/lib/wasm/jsep/webgpu/ops/bias-split-gelu.ts"() { + "use strict"; + init_util(); + init_common(); + init_unary_op(); + validateInputs7 = (inputs) => { + if (inputs[0].dims.length !== 3) { + throw new Error("input should have 3 dimensions"); + } + if (![2560, 5120, 10240].includes(inputs[0].dims[2])) { + throw new Error("hidden state should be 2560, 5120 or 10240"); + } + if (inputs[1].dims.length !== 1) { + throw new Error("bias is expected to have 1 dimensions"); + } + if (inputs[0].dims[2] !== inputs[1].dims[0]) { + throw new Error("last dimension of input and bias are not the same"); + } + }; + createBiasSplitGeluProgramInfo = (inputs) => { + const outputShape = inputs[0].dims.slice(); + outputShape[2] = outputShape[2] / 2; + const input = inputVariable("input", inputs[0].dataType, inputs[0].dims, 4); + const bias = inputVariable("bias", inputs[0].dataType, [inputs[0].dims[2]], 4); + const output = outputVariable("output", inputs[0].dataType, outputShape, 4); + const outputSize = ShapeUtil.size(outputShape) / 4; + const dataType = tensorTypeToWsglStorageType(inputs[0].dataType); + const getShaderSource = (shaderHelper) => ` + const M_SQRT2 = sqrt(2.0); + const halfChannels = ${inputs[0].dims[2] / 4 / 2}u; + + ${shaderHelper.declareVariables(input, bias, output)} + + ${erfImpl(dataType)} + + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes(outputSize)} + let biasIdx = global_idx % halfChannels; + let batchIndex = global_idx / halfChannels; + let inputOffset = biasIdx + batchIndex * halfChannels * 2; + let valueLeft = input[inputOffset] + bias[biasIdx]; + let valueRight = input[inputOffset + halfChannels] + bias[biasIdx + halfChannels]; + let geluRight = valueRight * 0.5 * (erf_vf32(valueRight / M_SQRT2) + 1); + + ${output.setByOffset("global_idx", "valueLeft * geluRight")} + }`; + return { + name: "BiasSplitGelu", + getRunData: () => ({ + outputs: [{ dims: outputShape, dataType: inputs[0].dataType }], + dispatchGroup: { x: Math.ceil( + outputSize / 64 + /* workgroup size */ + ) } + }), + getShaderSource + }; + }; + biasSplitGelu = (context) => { + validateInputs7(context.inputs); + context.compute(createBiasSplitGeluProgramInfo(context.inputs)); + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/binary-op.ts +var createBinaryOpProgramShader, createBinaryOpProgramInfo, runBinaryOp, add, div, equal, mul, pow, sub, greater, less, greaterOrEqual, lessOrEqual; +var init_binary_op = __esm({ + "web/lib/wasm/jsep/webgpu/ops/binary-op.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_common(); + createBinaryOpProgramShader = (shaderHelper, dimsA, dimsB, dimsOutput, vectorize, doBroadcast, sharedDimensionDivisibleBy4, funcCall, typeA, typeB, typeOutput, additionalImplementation) => { + let expressionScalar; + let expressionVector; + if (typeof funcCall === "string") { + expressionScalar = expressionVector = (a2, b2) => `${funcCall}((${a2}),(${b2}))`; + } else if (typeof funcCall === "function") { + expressionScalar = expressionVector = funcCall; + } else { + expressionScalar = funcCall.scalar; + expressionVector = funcCall.vector; + } + const output = outputVariable("outputData", typeOutput, dimsOutput.length, 4); + const a = inputVariable("aData", typeA, dimsA.length, 4); + const b = inputVariable("bData", typeB, dimsB.length, 4); + let assignment; + if (vectorize) { + if (doBroadcast) { + const isAOneElement = ShapeUtil.size(dimsA) === 1; + const isBOneElement = ShapeUtil.size(dimsB) === 1; + const aLastDimDivisibleBy4 = dimsA.length > 0 && dimsA[dimsA.length - 1] % 4 === 0; + const bLastDimDivisibleBy4 = dimsB.length > 0 && dimsB[dimsB.length - 1] % 4 === 0; + if (isAOneElement || isBOneElement) { + assignment = output.setByOffset( + "global_idx", + expressionVector( + isAOneElement ? `${a.type.value}(${a.getByOffset("0")}.x)` : a.getByOffset("global_idx"), + isBOneElement ? `${b.type.value}(${b.getByOffset("0")}.x)` : b.getByOffset("global_idx") + ) + ); + } else { + assignment = ` + let outputIndices = ${output.offsetToIndices("global_idx * 4u")}; + let offsetA = ${a.broadcastedIndicesToOffset("outputIndices", output)}; + let offsetB = ${b.broadcastedIndicesToOffset("outputIndices", output)}; + ${output.setByOffset( + "global_idx", + expressionVector( + sharedDimensionDivisibleBy4 || aLastDimDivisibleBy4 ? a.getByOffset("offsetA / 4u") : `${a.type.value}(${a.getByOffset("offsetA / 4u")}[offsetA % 4u])`, + sharedDimensionDivisibleBy4 || bLastDimDivisibleBy4 ? b.getByOffset("offsetB / 4u") : `${b.type.value}(${b.getByOffset("offsetB / 4u")}[offsetB % 4u])` + ) + )} + `; + } + } else { + assignment = output.setByOffset( + "global_idx", + expressionVector(a.getByOffset("global_idx"), b.getByOffset("global_idx")) + ); + } + } else { + if (!doBroadcast) { + throw new Error("no necessary to use scalar implementation for element-wise binary op implementation."); + } + const singleAssignment = (resStr, x, typeCast = "") => { + const expressionA = `aData[indexA${x}][componentA${x}]`; + const expressionB = `bData[indexB${x}][componentB${x}]`; + return ` + let outputIndices${x} = ${output.offsetToIndices(`global_idx * 4u + ${x}u`)}; + let offsetA${x} = ${a.broadcastedIndicesToOffset(`outputIndices${x}`, output)}; + let offsetB${x} = ${b.broadcastedIndicesToOffset(`outputIndices${x}`, output)}; + let indexA${x} = offsetA${x} / 4u; + let indexB${x} = offsetB${x} / 4u; + let componentA${x} = offsetA${x} % 4u; + let componentB${x} = offsetB${x} % 4u; + ${resStr}[${x}] = ${typeCast}(${expressionScalar(expressionA, expressionB)}); + `; + }; + if (typeOutput === 9 /* bool */) { + assignment = ` + var data = vec4(0); + ${singleAssignment("data", 0, "u32")} + ${singleAssignment("data", 1, "u32")} + ${singleAssignment("data", 2, "u32")} + ${singleAssignment("data", 3, "u32")} + outputData[global_idx] = dot(vec4(0x1, 0x100, 0x10000, 0x1000000), vec4(data));`; + } else { + assignment = ` + ${singleAssignment("outputData[global_idx]", 0)} + ${singleAssignment("outputData[global_idx]", 1)} + ${singleAssignment("outputData[global_idx]", 2)} + ${singleAssignment("outputData[global_idx]", 3)} + `; + } + } + return ` + ${shaderHelper.registerUniform("vec_size", "u32").declareVariables(a, b, output)} + + ${additionalImplementation ?? ""} + + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.vec_size")} + ${assignment} + }`; + }; + createBinaryOpProgramInfo = (name, cacheKey, a, b, funcCall, additionalImplementation, outputDataType = a.dataType) => { + const isBroadcast = !ShapeUtil.areEqual(a.dims, b.dims); + let outputShape = a.dims; + let outputSize = ShapeUtil.size(a.dims); + let vectorize = false; + let sharedDimensionDivisibleBy4 = false; + const cacheKeyAux = [isBroadcast]; + if (isBroadcast) { + const calculatedShape = BroadcastUtil.calcShape(a.dims, b.dims, false); + if (!calculatedShape) { + throw new Error("Can't perform binary op on the given tensors"); + } + outputShape = calculatedShape; + outputSize = ShapeUtil.size(outputShape); + const isAOneElement = ShapeUtil.size(a.dims) === 1; + const isBOneElement = ShapeUtil.size(b.dims) === 1; + const aLastDimDivisibleBy4 = a.dims.length > 0 && a.dims[a.dims.length - 1] % 4 === 0; + const bLastDimDivisibleBy4 = b.dims.length > 0 && b.dims[b.dims.length - 1] % 4 === 0; + cacheKeyAux.push(isAOneElement); + cacheKeyAux.push(isBOneElement); + cacheKeyAux.push(aLastDimDivisibleBy4); + cacheKeyAux.push(bLastDimDivisibleBy4); + let sharedDimension = 1; + for (let i = 1; i < outputShape.length; i++) { + const dimA = a.dims[a.dims.length - i] ?? 1; + const dimB = b.dims[b.dims.length - i] ?? 1; + if (dimA === dimB) { + sharedDimension *= dimA; + } else { + break; + } + } + if (sharedDimension % 4 === 0) { + sharedDimensionDivisibleBy4 = true; + vectorize = true; + } else if (isAOneElement || isBOneElement || aLastDimDivisibleBy4 || bLastDimDivisibleBy4) { + vectorize = true; + } + } else { + vectorize = true; + } + cacheKeyAux.push(vectorize); + return { + name, + shaderCache: { + hint: cacheKey + cacheKeyAux.map((x) => x.toString()).join("_"), + inputDependencies: ["rank", "rank"] + }, + getShaderSource: (shaderHelper) => createBinaryOpProgramShader( + shaderHelper, + a.dims, + b.dims, + outputShape, + vectorize, + isBroadcast, + sharedDimensionDivisibleBy4, + funcCall, + a.dataType, + b.dataType, + outputDataType, + additionalImplementation + ), + getRunData: () => ({ + outputs: [{ dims: outputShape, dataType: outputDataType }], + dispatchGroup: { x: Math.ceil( + outputSize / 64 / 4 + /* component size */ + ) }, + programUniforms: [ + { type: 12 /* uint32 */, data: Math.ceil(ShapeUtil.size(outputShape) / 4) }, + ...createTensorShapeVariables(a.dims, b.dims, outputShape) + ] + }) + }; + }; + runBinaryOp = (context, name, funcCall, additionalImplementation, cacheKey, outputDataType) => { + context.compute(createBinaryOpProgramInfo( + name, + cacheKey ?? "", + context.inputs[0], + context.inputs[1], + funcCall, + additionalImplementation, + outputDataType + )); + }; + add = (context) => { + runBinaryOp(context, "Add", (a, b) => `${a}+${b}`); + }; + div = (context) => { + runBinaryOp(context, "Div", (a, b) => `${a}/${b}`); + }; + equal = (context) => { + runBinaryOp( + context, + "Equal", + { scalar: (a, b) => `u32(${a}==${b})`, vector: (a, b) => `vec4(${a}==${b})` }, + void 0, + void 0, + 9 /* bool */ + ); + }; + mul = (context) => { + runBinaryOp(context, "Mul", (a, b) => `${a}*${b}`); + }; + pow = (context) => { + const type = inputVariable("input", context.inputs[0].dataType, context.inputs[0].dims).type.value; + const roundStr = type === "i32" ? "round" : ""; + runBinaryOp( + context, + "Pow", + { scalar: (a, b) => `pow_custom(${a},${b})`, vector: (a, b) => `pow_vector_custom(${a},${b})` }, + ` + fn pow_custom(a : ${type}, b : ${type}) -> ${type} { + if (b == ${type}(0.0)) { + return ${type}(1.0); + } else if (a < ${type}(0.0) && f32(b) != floor(f32(b))) { + return ${type}(pow(f32(a), f32(b))); // NaN + } + return select(sign(a), ${type}(1.0), round(f32(abs(b) % ${type}(2.0))) != 1.0) * ${type}(${roundStr}(pow(f32(abs(a)), f32(b)))); + } + fn pow_vector_custom(a : vec4<${type}>, b : vec4<${type}>) -> vec4<${type}> { + // TODO: implement vectorized pow + return vec4<${type}>(pow_custom(a.x, b.x), pow_custom(a.y, b.y), pow_custom(a.z, b.z), pow_custom(a.w, b.w)); + } + ` + ); + }; + sub = (context) => { + runBinaryOp(context, "Sub", (a, b) => `${a}-${b}`); + }; + greater = (context) => { + runBinaryOp( + context, + "Greater", + { scalar: (a, b) => `u32(${a}>${b})`, vector: (a, b) => `vec4(${a}>${b})` }, + void 0, + void 0, + 9 /* bool */ + ); + }; + less = (context) => { + runBinaryOp( + context, + "Less", + { scalar: (a, b) => `u32(${a}<${b})`, vector: (a, b) => `vec4(${a}<${b})` }, + void 0, + void 0, + 9 /* bool */ + ); + }; + greaterOrEqual = (context) => { + runBinaryOp( + context, + "GreaterOrEqual", + { scalar: (a, b) => `u32(${a}>=${b})`, vector: (a, b) => `vec4(${a}>=${b})` }, + void 0, + void 0, + 9 /* bool */ + ); + }; + lessOrEqual = (context) => { + runBinaryOp( + context, + "LessOrEqual", + { scalar: (a, b) => `u32(${a}<=${b})`, vector: (a, b) => `vec4(${a}<=${b})` }, + void 0, + void 0, + 9 /* bool */ + ); + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/fuse-utils.ts +var getActivationSnippet, appendActivationUniformsData, appendActivationUniforms, parseInternalActivationAttributes; +var init_fuse_utils = __esm({ + "web/lib/wasm/jsep/webgpu/ops/fuse-utils.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + getActivationSnippet = (attributes, valueType, baseType = "f32") => { + switch (attributes.activation) { + case "Relu": + return `value = max(value, ${valueType}(0.0));`; + case "Sigmoid": + return `value = (${valueType}(1.0) / (${valueType}(1.0) + exp(-value)));`; + case "Clip": + return `value = clamp(value, ${valueType}(${baseType}(uniforms.clip_min)), ${valueType}(${baseType}(uniforms.clip_max)));`; + case "HardSigmoid": + return `value = max(${valueType}(0.0), min(${valueType}(1.0), ${baseType}(uniforms.alpha) * value + ${baseType}(uniforms.beta)));`; + case "LeakyRelu": + return `value = select(${baseType}(uniforms.alpha) * value, value, value >= ${valueType}(0.0));`; + case "": + return ""; + default: + throw new Error(`Unsupported activation ${attributes.activation}`); + } + }; + appendActivationUniformsData = (attributes, programUniform) => { + if (attributes.activation === "Clip") { + programUniform.push( + { type: 1 /* float */, data: attributes.clipMax }, + { type: 1 /* float */, data: attributes.clipMin } + ); + } else if (attributes.activation === "HardSigmoid") { + programUniform.push( + { type: 1 /* float */, data: attributes.alpha }, + { type: 1 /* float */, data: attributes.beta } + ); + } else if (attributes.activation === "LeakyRelu") { + programUniform.push({ type: 1 /* float */, data: attributes.alpha }); + } + }; + appendActivationUniforms = (attributes, uniforms) => { + if (attributes.activation === "Clip") { + uniforms.push({ name: "clip_max", type: "f32" }, { name: "clip_min", type: "f32" }); + } else if (attributes.activation === "HardSigmoid") { + uniforms.push({ name: "alpha", type: "f32" }, { name: "beta", type: "f32" }); + } else if (attributes.activation === "LeakyRelu") { + uniforms.push({ name: "alpha", type: "f32" }); + } + }; + parseInternalActivationAttributes = (attributes) => { + const activation = attributes?.activation || ""; + if (activation === "HardSigmoid") { + const [alpha, beta] = attributes?.activation_params || [0.2, 0.5]; + return { activation, alpha, beta }; + } else if (activation === "Clip") { + const [clipMin, clipMax] = attributes?.activation_params || [MIN_CLIP, MAX_CLIP]; + return { activation, clipMax, clipMin }; + } else if (activation === "LeakyRelu") { + const [alpha] = attributes?.activation_params || [0.01]; + return { activation, alpha }; + } + return { activation }; + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/3rd-party/activation_util.ts +var typeSnippet, biasSnippet; +var init_activation_util = __esm({ + "web/lib/wasm/jsep/webgpu/ops/3rd-party/activation_util.ts"() { + "use strict"; + typeSnippet = (component, dataType) => { + switch (component) { + case 1: + return dataType; + case 2: + return `vec2<${dataType}>`; + case 3: + return `vec3<${dataType}>`; + case 4: + return `vec4<${dataType}>`; + default: + throw new Error(`${component}-component is not supported.`); + } + }; + biasSnippet = (hasBias) => ` + ${hasBias ? "value = value + getBiasByOutputCoords(coords);" : ""} + `; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/3rd-party/conv_util.ts +var utilFunctions; +var init_conv_util = __esm({ + "web/lib/wasm/jsep/webgpu/ops/3rd-party/conv_util.ts"() { + "use strict"; + utilFunctions = (strideStr) => ` +fn getIndexFromCoords4D(coords : vec4, shape : vec4) -> i32 { + return dot(coords, vec4( + shape.y * shape.z * shape.w, shape.z * shape.w, shape.w, 1)); +} +fn getOutputIndexFromCoords(coords : vec4) -> i32 { + return dot(coords, vec4( + i32(${strideStr}.x), i32(${strideStr}.y), i32(${strideStr}.z), 1)); +} +`; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/3rd-party/matmul_packed_webgpu.ts +var writeDataToSubAVec4Snippet, calculateResultSnippet, makeMatMulPackedVec4Source, writeDataToSubASnippet, readDataFromSubASnippet, makeMatMulPackedSource, matMulReadWriteFnSource, createMatmulProgramInfo; +var init_matmul_packed_webgpu = __esm({ + "web/lib/wasm/jsep/webgpu/ops/3rd-party/matmul_packed_webgpu.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_common(); + init_fuse_utils(); + init_activation_util(); + writeDataToSubAVec4Snippet = (transpose2, batchDims) => { + if (transpose2) { + return ` + mm_Asub[inputRow][inputCol] = mm_readA(batch, + kStart + inputRow, + globalRowStart / innerElementSize + inputCol${batchDims ? ", batchIndices" : ""}); + `; + } else { + return ` + mm_Asub[inputRow][inputCol] = mm_readA(batch, + globalRow + innerRow, + kStart / innerElementSize + inputCol${batchDims ? ", batchIndices" : ""}); + `; + } + }; + calculateResultSnippet = (transposeA, innerElementSize) => { + if (transposeA) { + return ` + let ACached0 = mm_Asub[k * innerElementSize][localRow]; + let ACached1 = mm_Asub[k * innerElementSize + 1][localRow]; + let ACached2 = mm_Asub[k * innerElementSize + 2][localRow]; + ${innerElementSize === 3 ? "" : "let ACached3 = mm_Asub[k * innerElementSize + 3][localRow];"} + for (var i = 0; i < rowPerThread; i = i + 1) { + acc[i] = BCached0 * ACached0[i] + acc[i]; + acc[i] = BCached1 * ACached1[i] + acc[i]; + acc[i] = BCached2 * ACached2[i] + acc[i]; + ${innerElementSize === 3 ? "" : "acc[i] = BCached3 * ACached3[i] + acc[i];"} + }`; + } else { + return ` + for (var i = 0; i < rowPerThread; i = i + 1) { + let ACached = mm_Asub[tileRow + i][k]; + acc[i] = BCached0 * ACached.x + acc[i]; + acc[i] = BCached1 * ACached.y + acc[i]; + acc[i] = BCached2 * ACached.z + acc[i]; + ${innerElementSize === 3 ? "" : "acc[i] = BCached3 * ACached.w + acc[i];"} + }`; + } + }; + makeMatMulPackedVec4Source = (workPerThread, workgroupSize, type = "f32", batchDims, transposeA = false, tileInner = 32, splitK = false, splitedDimInner = 32) => { + const tileAOuter = workgroupSize[1] * workPerThread[1]; + const tileBOuter = workgroupSize[0] * workPerThread[0]; + const tileAWidth = transposeA ? tileAOuter : tileInner; + const tileAHight = transposeA ? tileInner : tileAOuter; + const innerElementSize = tileAWidth / workgroupSize[0]; + const rowPerThreadB = tileInner / workgroupSize[1]; + if (!((transposeA && innerElementSize === 4 && workPerThread[1] === 4 || !transposeA && (innerElementSize === 3 || innerElementSize === 4)) && tileAWidth % workgroupSize[0] === 0 && tileInner % workgroupSize[1] === 0 && workPerThread[0] === 4)) { + throw new Error(`If transposeA ${transposeA} is true, innerElementSize ${innerElementSize} and workPerThread[1] ${workPerThread[1]} must be 4. + Otherwise, innerElementSize ${innerElementSize} must be 3 or 4. + tileAWidth ${tileAWidth} must be divisible by workgroupSize[0]${workgroupSize[0]}. tileInner ${tileInner} must be divisible by workgroupSize[1] ${workgroupSize[1]}. colPerThread ${workPerThread[0]} must be 4.`); + } + return ` +var mm_Asub: array, ${tileAWidth / innerElementSize}>, ${tileAHight}>; +var mm_Bsub: array, ${tileBOuter / workPerThread[0]}>, ${tileInner}>; + +const rowPerThread = ${workPerThread[1]}; +const colPerThread = ${workPerThread[0]}; +const innerElementSize = ${innerElementSize}; +const tileInner = ${tileInner}; + +@compute @workgroup_size(${workgroupSize[0]}, ${workgroupSize[1]}, ${workgroupSize[2]}) +fn main(@builtin(local_invocation_id) localId : vec3, + @builtin(global_invocation_id) globalId : vec3, + @builtin(workgroup_id) workgroupId : vec3) { + let localRow = i32(localId.y); + let tileRow = localRow * rowPerThread; + let tileCol = i32(localId.x); + + let globalRow =i32(globalId.y) * rowPerThread; + let globalCol = i32(globalId.x); + let batch = ${splitK ? "0" : "i32(globalId.z)"}; + ${batchDims ? `let batchIndices = ${batchDims.offsetToIndices("u32(batch)")};` : ""} + let globalRowStart = i32(workgroupId.y) * ${tileAOuter}; + + let num_tiles = ${splitK ? `${Math.ceil(splitedDimInner / tileInner)}` : "(uniforms.dim_inner - 1) / tileInner + 1"}; + var kStart = ${splitK ? `i32(globalId.z) * ${splitedDimInner}` : "0"}; + + var acc: array, rowPerThread>; + + // Loop over shared dimension. + let tileRowB = localRow * ${rowPerThreadB}; + for (var t = 0; t < num_tiles; t = t + 1) { + // Load one tile of A into local memory. + for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) { + let inputRow = tileRow + innerRow; + let inputCol = tileCol; + ${writeDataToSubAVec4Snippet(transposeA, batchDims)} + } + + // Load one tile of B into local memory. + for (var innerRow = 0; innerRow < ${rowPerThreadB}; innerRow = innerRow + 1) { + let inputRow = tileRowB + innerRow; + let inputCol = tileCol; + mm_Bsub[inputRow][inputCol] = mm_readB(batch, kStart + inputRow, globalCol${batchDims ? ", batchIndices" : ""}); + } + kStart = kStart + tileInner; + workgroupBarrier(); + + // Compute acc values for a single thread. + for (var k = 0; k < tileInner / innerElementSize; k = k + 1) { + let BCached0 = mm_Bsub[k * innerElementSize][tileCol]; + let BCached1 = mm_Bsub[k * innerElementSize + 1][tileCol]; + let BCached2 = mm_Bsub[k * innerElementSize + 2][tileCol]; + ${innerElementSize === 3 ? "" : "let BCached3 = mm_Bsub[k * innerElementSize + 3][tileCol];"} + + ${calculateResultSnippet(transposeA, innerElementSize)} + } + + workgroupBarrier(); + } + + for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) { + mm_write(batch, globalRow + innerRow, globalCol, acc[innerRow]); + } +}`; + }; + writeDataToSubASnippet = (transpose2, batchDims) => { + if (transpose2) { + return ` + mm_Asub[inputRow][inputCol] = mm_readA(batch, + kStart + inputRow, + globalRowStart + inputCol${batchDims ? ", batchIndices" : ""}); + `; + } else { + return ` + mm_Asub[inputRow][inputCol] = mm_readA(batch, + globalRowStart + inputRow, + kStart + inputCol${batchDims ? ", batchIndices" : ""}); + `; + } + }; + readDataFromSubASnippet = (transposeA) => transposeA ? "let ACached = mm_Asub[k][tileRow + innerRow];" : "let ACached = mm_Asub[tileRow + innerRow][k];"; + makeMatMulPackedSource = (workPerThread, workgroupSize, type = "f32", batchDims, transposeA = false, tileInner = 32, splitK = false, splitedDimInner = 32, sequentialAccessByThreads = false) => { + const tileAOuter = workPerThread[1] * workgroupSize[1]; + const tileBOuter = workPerThread[0] * workgroupSize[0]; + const tileAWidth = transposeA ? tileAOuter : tileInner; + const tileAHight = transposeA ? tileInner : tileAOuter; + if (!(tileAHight % workgroupSize[1] === 0 && tileAWidth % workgroupSize[0] === 0 && tileInner % workgroupSize[1] === 0)) { + throw new Error(`tileAHight ${tileAHight} must be divisible by workgroupSize[1]${workgroupSize[1]}, tileAWidth ${tileAWidth} must be divisible by workgroupSize[0]${workgroupSize[0]}, tileInner ${tileInner} must be divisible by workgroupSize[1]${workgroupSize[1]}`); + } + const rowPerThreadA = tileAHight / workgroupSize[1]; + const colPerThreadA = tileAWidth / workgroupSize[0]; + const rowPerThreadB = tileInner / workgroupSize[1]; + const matmulSnippet = sequentialAccessByThreads ? ` + let localRow = i32(localId.y); + let localCol = i32(localId.x); + let globalRowStart = i32(workgroupId.y) * ${tileAOuter}; + let globalColStart = i32(workgroupId.x) * ${tileBOuter}; + + // Loop over shared dimension. + for (var t = 0; t < num_tiles; t = t + 1) { + // Load one tile of A into local memory. + for (var inputRow = localRow; inputRow < ${tileAHight}; inputRow = inputRow + ${workgroupSize[1]}) { + for (var inputCol = localCol; inputCol < ${tileAWidth}; inputCol = inputCol + ${workgroupSize[0]}) { + ${writeDataToSubASnippet(transposeA, batchDims)} + } + } + // Load one tile of B into local memory. + for (var inputRow = localRow; inputRow < ${tileInner}; inputRow = inputRow + ${workgroupSize[1]}) { + for (var inputCol = localCol; inputCol < ${tileBOuter}; inputCol = inputCol + ${workgroupSize[0]}) { + mm_Bsub[inputRow][inputCol] = mm_readB(batch, + kStart + inputRow, + globalColStart + inputCol${batchDims ? ", batchIndices" : ""}); + } + } + kStart = kStart + tileInner; + workgroupBarrier(); + + // Compute acc values for a single thread. + var BCached : array<${type}, colPerThread>; + for (var k = 0; k < tileInner; k = k + 1) { + for (var inner = 0; inner < colPerThread; inner = inner + 1) { + BCached[inner] = mm_Bsub[k][localCol + inner * ${workgroupSize[0]}]; + } + for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) { + let ACached = ${transposeA ? `mm_Asub[k][localRow + innerRow * ${workgroupSize[1]}];` : `mm_Asub[localRow + innerRow * ${workgroupSize[1]}][k];`} + for (var innerCol = 0; innerCol < colPerThread; innerCol = innerCol + 1) { + acc[innerRow][innerCol] = acc[innerRow][innerCol] + + ACached * BCached[innerCol]; + } + } + } + workgroupBarrier(); + } + for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) { + let gRow = globalRowStart + localRow + innerRow * ${workgroupSize[1]}; + for (var innerCol = 0; innerCol < colPerThread; innerCol = innerCol + 1) { + let gCol = globalColStart + localCol + innerCol * ${workgroupSize[0]}; + mm_write(batch, gRow, gCol, acc[innerRow][innerCol]); + } + } + ` : ` +let tileRow = i32(localId.y) * rowPerThread; +let tileCol = i32(localId.x) * colPerThread; + +let globalRow = i32(globalId.y) * rowPerThread; +let globalCol = i32(globalId.x) * colPerThread; +let globalRowStart = i32(workgroupId.y) * ${tileAOuter}; + +let tileRowA = i32(localId.y) * ${rowPerThreadA}; +let tileColA = i32(localId.x) * ${colPerThreadA}; +let tileRowB = i32(localId.y) * ${rowPerThreadB}; +// Loop over shared dimension. +for (var t = 0; t < num_tiles; t = t + 1) { + // Load one tile of A into local memory. + for (var innerRow = 0; innerRow < ${rowPerThreadA}; innerRow = innerRow + 1) { + for (var innerCol = 0; innerCol < ${colPerThreadA}; innerCol = innerCol + 1) { + let inputRow = tileRowA + innerRow; + let inputCol = tileColA + innerCol; + ${writeDataToSubASnippet(transposeA, batchDims)} + } + } + + // Load one tile of B into local memory. + for (var innerRow = 0; innerRow < ${rowPerThreadB}; innerRow = innerRow + 1) { + for (var innerCol = 0; innerCol < colPerThread; innerCol = innerCol + 1) { + let inputRow = tileRowB + innerRow; + let inputCol = tileCol + innerCol; + mm_Bsub[inputRow][inputCol] = mm_readB(batch, + kStart + inputRow, + globalCol + innerCol${batchDims ? ", batchIndices" : ""}); + } + } + kStart = kStart + tileInner; + workgroupBarrier(); + + // Compute acc values for a single thread. + var BCached : array<${type}, colPerThread>; + for (var k = 0; k < tileInner; k = k + 1) { + for (var inner = 0; inner < colPerThread; inner = inner + 1) { + BCached[inner] = mm_Bsub[k][tileCol + inner]; + } + + for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) { + ${readDataFromSubASnippet(transposeA)} + for (var innerCol = 0; innerCol < colPerThread; innerCol = innerCol + 1) { + acc[innerRow][innerCol] = acc[innerRow][innerCol] + ACached * BCached[innerCol]; + } + } + } + + workgroupBarrier(); +} + +for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) { + for (var innerCol = 0; innerCol < colPerThread; innerCol = innerCol + 1) { + mm_write(batch, globalRow + innerRow, globalCol + innerCol, + acc[innerRow][innerCol]); + } +} +`; + return ` + var mm_Asub : array, ${tileAHight}>; + var mm_Bsub : array, ${tileInner}>; + const rowPerThread = ${workPerThread[1]}; + const colPerThread = ${workPerThread[0]}; + const tileInner = ${tileInner}; + +@compute @workgroup_size(${workgroupSize[0]}, ${workgroupSize[1]}, ${workgroupSize[2]}) +fn main(@builtin(local_invocation_id) localId : vec3, + @builtin(global_invocation_id) globalId : vec3, + @builtin(workgroup_id) workgroupId : vec3) { + let batch = ${splitK ? "0" : "i32(globalId.z)"}; + ${batchDims ? `let batchIndices = ${batchDims.offsetToIndices("u32(batch)")};` : ""} + let num_tiles = ${splitK ? `${Math.ceil(splitedDimInner / tileInner)}` : "(uniforms.dim_inner - 1) / tileInner + 1"}; + var kStart = ${splitK ? `i32(globalId.z) * ${splitedDimInner}` : "0"}; + + var acc : array, rowPerThread>; + + // Without this initialization strange values show up in acc. + for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) { + for (var innerCol = 0; innerCol < colPerThread; innerCol = innerCol + 1) { + acc[innerRow][innerCol] = 0.0; + } + } + ${matmulSnippet} + } +`; + }; + matMulReadWriteFnSource = (component, hasBias, applyActivation, variables, batchShapes, isChannelsLast = false) => { + const [batchAShape, batchBShape, batchShape] = batchShapes; + const [batchVariable, aVariable, bVariable, outputVariable2] = variables; + const broadCastADims = getBroadcastDims(batchAShape, batchShape); + const broadCastBDims = getBroadcastDims(batchBShape, batchShape); + const dataType = tensorTypeToWsglStorageType(variables[0].type.tensor); + const getAIndices = () => { + const aRank = aVariable.rank; + const batchRank = batchVariable.rank; + let resStr = `var aIndices: ${aVariable.type.indices};`; + for (let i = aRank - 2 - 1, j = batchRank - 1; i >= 0; i--, j--) { + resStr += ` +aIndices[${i}] = ${batchRank > 1 ? `batchIndices[${j}]` : "batchIndices"};`; + } + broadCastADims.forEach((i) => { + resStr += ` +aIndices[${i}] = 0;`; + }); + resStr += ` +aIndices[${aRank - 2}] = u32(row); + aIndices[${aRank - 1}] = u32(colIn);`; + return resStr; + }; + const getBIndices = () => { + const bRank = bVariable.rank; + const batchRank = batchVariable.rank; + let resStr = `var bIndices: ${bVariable.type.indices};`; + for (let i = bRank - 2 - 1, j = batchRank - 1; i >= 0; i--, j--) { + resStr += ` +bIndices[${i}] = ${batchRank > 1 ? `batchIndices[${j}]` : "batchIndices"};`; + } + broadCastBDims.forEach((i) => { + resStr += ` +bIndices[${i}] = 0;`; + }); + resStr += ` +bIndices[${bRank - 2}] = u32(row); + bIndices[${bRank - 1}] = u32(colIn);`; + return resStr; + }; + const source = ` + fn mm_readA(batch: i32, row: i32, colIn: i32, batchIndices: ${batchVariable.type.indices}) -> ${typeSnippet(component, dataType)} { + var value = ${typeSnippet(component, dataType)}(0.0); + let col = colIn * ${component}; + if(row < uniforms.dim_a_outer && col < uniforms.dim_inner) + { + ${getAIndices()} + value = ${aVariable.getByIndices("aIndices")}; + } + return value; + } + + fn mm_readB(batch: i32, row: i32, colIn: i32, batchIndices: ${batchVariable.type.indices}) -> ${typeSnippet(component, dataType)} { + var value = ${typeSnippet(component, dataType)}(0.0); + let col = colIn * ${component}; + if(row < uniforms.dim_inner && col < uniforms.dim_b_outer) + { + ${getBIndices()} + value = ${bVariable.getByIndices("bIndices")}; + } + return value; + } + + fn mm_write(batch: i32, row: i32, colIn: i32, valueIn: ${typeSnippet(component, dataType)}) { + let col = colIn * ${component}; + if (row < uniforms.dim_a_outer && col < uniforms.dim_b_outer) { + var value = valueIn; + let coords = vec3(batch, row, colIn); + ${hasBias ? `value = value + ${isChannelsLast ? "bias[colIn]" : `${typeSnippet(component, dataType)}(bias[row])`};` : ""} + ${applyActivation} + ${outputVariable2.setByIndices("vec3(coords)", "value")} + } + } + `; + return source; + }; + createMatmulProgramInfo = (inputs, activationAttributes, outputShape, reshapedOutputShape, isChannelsLast = false) => { + const aShape = inputs[0].dims; + const bShape = inputs[1].dims; + const outerDimsA = aShape.slice(0, -2); + const outerDimsB = bShape.slice(0, -2); + const outerDims = reshapedOutputShape ? reshapedOutputShape.slice(0, -2) : outputShape.slice(0, -2); + const batchSize = ShapeUtil.size(outerDims); + const dimAOuter = aShape[aShape.length - 2]; + const dimInner = aShape[aShape.length - 1]; + const dimBOuter = bShape[bShape.length - 1]; + const isVec4 = dimInner % 4 === 0 && dimBOuter % 4 === 0; + const elementsPerThread = dimAOuter <= 8 ? [4, 1, 1] : [4, 4, 1]; + const workgroupSize = [8, 8, 1]; + const dispatch = [ + Math.ceil(dimBOuter / workgroupSize[0] / elementsPerThread[0]), + Math.ceil(dimAOuter / workgroupSize[1] / elementsPerThread[1]), + Math.ceil(batchSize / workgroupSize[2] / elementsPerThread[2]) + ]; + const components = isVec4 ? 4 : 1; + const aShapeTemp = [...outerDimsA, dimAOuter, dimInner / components]; + const aRank = aShapeTemp.length; + const bShapeTemp = [...outerDimsB, dimInner, dimBOuter / components]; + const bRank = bShapeTemp.length; + const outputShapeTemp = [batchSize, dimAOuter, dimBOuter / components]; + const programUniforms = [ + { type: 6 /* int32 */, data: dimAOuter }, + { type: 6 /* int32 */, data: dimBOuter }, + { type: 6 /* int32 */, data: dimInner } + ]; + appendActivationUniformsData(activationAttributes, programUniforms); + programUniforms.push(...createTensorShapeVariables(outerDims, aShapeTemp, bShapeTemp)); + const inputDependencies = ["rank", "rank"]; + const hasBias = inputs.length > 2; + if (hasBias) { + programUniforms.push(...createTensorShapeVariables(inputs[2].dims)); + inputDependencies.push("rank"); + } + programUniforms.push(...createTensorShapeVariables(outputShapeTemp)); + const getShaderSource = (shaderHelper) => { + const batchRank = outerDims.length; + const batchDims = internalVariable("batchDims", inputs[0].dataType, batchRank, 1); + const dataType = tensorTypeToWsglStorageType(inputs[0].dataType); + const A = inputVariable("a", inputs[0].dataType, aRank, components); + const B = inputVariable("b", inputs[1].dataType, bRank, components); + const output = outputVariable("result", inputs[0].dataType, outputShapeTemp.length, components); + const inputVariables = [A, B]; + if (hasBias) { + const biasComponents = isChannelsLast ? components : 1; + inputVariables.push(inputVariable("bias", inputs[2].dataType, inputs[2].dims.length, biasComponents)); + } + const uniforms = [{ name: "dim_a_outer", type: "i32" }, { name: "dim_b_outer", type: "i32" }, { name: "dim_inner", type: "i32" }]; + appendActivationUniforms(activationAttributes, uniforms); + const baseType = tensorTypeToWsglStorageType(output.type.tensor); + const applyActivation = getActivationSnippet(activationAttributes, output.type.value, baseType); + const declareFunctions = matMulReadWriteFnSource( + components, + hasBias, + applyActivation, + [batchDims, A, B, output], + [outerDimsA, outerDimsB, outerDims], + isChannelsLast + ); + return ` + ${shaderHelper.registerUniforms(uniforms).registerInternalVariables(batchDims).declareVariables( + ...inputVariables, + output + )} + ${declareFunctions} + ${isVec4 ? makeMatMulPackedVec4Source(elementsPerThread, workgroupSize, dataType, batchDims) : makeMatMulPackedSource(elementsPerThread, workgroupSize, dataType, batchDims)} + `; + }; + return { + name: "MatMul", + shaderCache: { + hint: `${elementsPerThread};${activationAttributes.activation};${isVec4};${isChannelsLast}`, + inputDependencies + }, + getRunData: () => ({ + outputs: [{ dims: outputShape, dataType: inputs[0].dataType }], + dispatchGroup: { x: dispatch[0], y: dispatch[1], z: dispatch[2] }, + programUniforms + }), + getShaderSource + }; + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/3rd-party/conv2d_mm_webgpu.ts +var conv2dCommonSnippet, createConv2DMatMulProgramInfo; +var init_conv2d_mm_webgpu = __esm({ + "web/lib/wasm/jsep/webgpu/ops/3rd-party/conv2d_mm_webgpu.ts"() { + "use strict"; + init_wasm_common(); + init_log(); + init_common(); + init_fuse_utils(); + init_activation_util(); + init_conv_util(); + init_matmul_packed_webgpu(); + conv2dCommonSnippet = (isChannelsLast, fitAOuter, fitBOuter, fitInner, addBias = false, attributes, innerElementSizeX = 4, innerElementSizeW = 4, innerElementSize = 4, dataType = "f32") => { + const getXSnippet = (innerElementSize2) => { + switch (innerElementSize2) { + case 1: + return "resData = x[xIndex];"; + case 3: + return `resData = vec3<${dataType}>(x[xIndex], x[xIndex + 1], x[xIndex + 2]);`; + case 4: + return "resData = x[xIndex / 4];"; + default: + throw new Error(`innerElementSize ${innerElementSize2} is not supported.`); + } + }; + const getWSnippet = (innerElementSize2) => { + switch (innerElementSize2) { + case 1: + return "return w[row * i32(uniforms.w_shape[3]) + colIn];"; + case 4: + return "return w[row * i32(uniforms.w_shape[3]) / 4 + colIn];"; + default: + throw new Error(`innerElementSize ${innerElementSize2} is not supported.`); + } + }; + const coordASnippet = isChannelsLast ? ` + let coord = vec4(batch, xRow, xCol, xCh); + ` : ` + let coord = vec4(batch, xCh, xRow, xCol); + `; + const coordResSnippet = isChannelsLast ? ` + let coords = vec4( + batch, + row / outWidth, + row % outWidth, + col); + ` : ` + let coords = vec4( + batch, + row, + col / outWidth, + col % outWidth); + `; + const xHeight = isChannelsLast ? "i32(uniforms.x_shape[1])" : "i32(uniforms.x_shape[2])"; + const xWidth = isChannelsLast ? "i32(uniforms.x_shape[2])" : "i32(uniforms.x_shape[3])"; + const row = isChannelsLast ? "row" : "col"; + const col = isChannelsLast ? "col" : "row"; + const readXSnippet = ` + let inChannels = i32(uniforms.w_shape[2]); + let outWidth = ${isChannelsLast ? "i32(uniforms.result_shape[2])" : "i32(uniforms.result_shape[3])"}; + let outRow = ${row} / outWidth; + let outCol = ${row} % outWidth; + + let WRow = ${col} / (i32(uniforms.w_shape[1]) * inChannels); + let WCol = ${col} / inChannels % i32(uniforms.w_shape[1]); + let xRow = outRow * uniforms.stride[0] + uniforms.dilation[0] * WRow - uniforms.pad[0]; + let xCol = outCol * uniforms.stride[1] + uniforms.dilation[1] * WCol - uniforms.pad[1]; + let xCh = ${col} % inChannels; + var resData = ${typeSnippet(innerElementSizeX, dataType)}(0.0); + // The bounds checking is always needed since we use it to pad zero for + // the 'same' padding type. + if (xRow >= 0 && xRow < ${xHeight} && xCol >= 0 && xCol < ${xWidth}) { + ${coordASnippet} + let xIndex = getIndexFromCoords4D(coord, vec4(uniforms.x_shape)); + ${getXSnippet(innerElementSizeX)} + } + return resData;`; + const sampleX = isChannelsLast ? fitAOuter && fitInner ? ` + let col = colIn * ${innerElementSizeX}; + ${readXSnippet}` : ` + let col = colIn * ${innerElementSizeX}; + if (row < uniforms.dim_a_outer && col < uniforms.dim_inner) { + ${readXSnippet} + } + return ${typeSnippet(innerElementSizeX, dataType)}(0.0);` : fitInner && fitBOuter ? ` + let col = colIn * ${innerElementSizeX}; + ${readXSnippet}` : ` + let col = colIn * ${innerElementSizeX}; + if (row < uniforms.dim_inner && col < uniforms.dim_b_outer) { + ${readXSnippet} + } + return ${typeSnippet(innerElementSizeX, dataType)}(0.0);`; + const sampleW = `${getWSnippet(innerElementSizeW)}`; + const resType = typeSnippet(innerElementSize, dataType); + const aType = isChannelsLast ? typeSnippet(innerElementSizeX, dataType) : typeSnippet(innerElementSizeW, dataType); + const bType = isChannelsLast ? typeSnippet(innerElementSizeW, dataType) : typeSnippet(innerElementSizeX, dataType); + const applyActivation = getActivationSnippet(attributes, resType, dataType); + const userCode = ` + fn mm_readA(batch: i32, row : i32, colIn : i32) -> ${aType} { + ${isChannelsLast ? sampleX : sampleW} + } + + fn mm_readB(batch: i32, row : i32, colIn : i32) -> ${bType} { + ${isChannelsLast ? sampleW : sampleX} + } + + fn mm_write(batch: i32, row : i32, colIn : i32, valueIn : ${resType}) { + let col = colIn * ${innerElementSize}; + if (row < uniforms.dim_a_outer && col < uniforms.dim_b_outer) + { + var value = valueIn; + let outWidth = ${isChannelsLast ? "i32(uniforms.result_shape[2])" : "i32(uniforms.result_shape[3])"}; + ${coordResSnippet} + ${biasSnippet(addBias)} + ${applyActivation} + setOutputAtCoords(coords[0], coords[1], coords[2], coords[3], value); + } + }`; + return userCode; + }; + createConv2DMatMulProgramInfo = (inputs, attributes, outputShape, dimAOuter, dimBOuter, dimInner, hasBias, sequentialAccessByThreads) => { + const isChannelsLast = attributes.format === "NHWC"; + const inChannels = isChannelsLast ? inputs[0].dims[3] : inputs[0].dims[1]; + const batchSize = outputShape[0]; + const outWidth = isChannelsLast ? outputShape[2] : outputShape[3]; + const outHeight = isChannelsLast ? outputShape[1] : outputShape[2]; + const outChannels = isChannelsLast ? outputShape[3] : outputShape[1]; + const isVec4 = isChannelsLast && (inChannels % 4 === 0 || inChannels % 3 === 0) && outChannels % 4 === 0; + const dispatchX = isChannelsLast ? outChannels : outWidth * outHeight; + const dispatchY = isChannelsLast ? outWidth * outHeight : outChannels; + const workGroupSize = [8, 8, 1]; + const elementsPerThread = dimAOuter <= 8 ? [4, 1, 1] : [4, 4, 1]; + const dispatch = [ + Math.ceil(dispatchX / workGroupSize[0] / elementsPerThread[0]), + Math.ceil(dispatchY / workGroupSize[1] / elementsPerThread[1]), + Math.ceil(batchSize / workGroupSize[2] / elementsPerThread[2]) + ]; + LOG_DEBUG("verbose", () => `[conv2d_mm_webgpu] dispatch = ${dispatch}`); + const innerElementSize = isVec4 ? isChannelsLast && inChannels % 4 !== 0 ? 3 : 4 : 1; + const tileAOuter = workGroupSize[1] * elementsPerThread[1]; + const tileBOuter = workGroupSize[0] * elementsPerThread[0]; + const tileInner = Math.max(workGroupSize[0] * innerElementSize, workGroupSize[1]); + const fitAOuter = dimAOuter % tileAOuter === 0; + const fitBOuter = dimBOuter % tileBOuter === 0; + const fitInner = dimInner % tileInner === 0; + const elementsSize = isVec4 ? [innerElementSize, 4, 4] : [1, 1, 1]; + const programUniforms = [ + { type: 6 /* int32 */, data: dimAOuter }, + { type: 6 /* int32 */, data: dimBOuter }, + { type: 6 /* int32 */, data: dimInner }, + { type: 6 /* int32 */, data: [attributes.pads[0], attributes.pads[1]] }, + { type: 6 /* int32 */, data: attributes.strides }, + { type: 6 /* int32 */, data: attributes.dilations } + ]; + appendActivationUniformsData(attributes, programUniforms); + programUniforms.push(...createTensorShapeVariables(inputs[0].dims, inputs[1].dims)); + const inputDependencies = ["rank", "rank"]; + if (hasBias) { + programUniforms.push(...createTensorShapeVariables(inputs[2].dims)); + inputDependencies.push("rank"); + } + programUniforms.push(...createTensorShapeVariables(outputShape)); + const getShaderSource = (shaderHelper) => { + const uniforms = [ + { name: "dim_a_outer", type: "i32" }, + { name: "dim_b_outer", type: "i32" }, + { name: "dim_inner", type: "i32" }, + { name: "pad", type: "i32", length: 2 }, + { name: "stride", type: "i32", length: 2 }, + { name: "dilation", type: "i32", length: 2 } + ]; + appendActivationUniforms(attributes, uniforms); + const components = isVec4 ? 4 : 1; + const t = tensorTypeToWsglStorageType(inputs[0].dataType); + let declareFunctions = ` + fn setOutputAtIndex(flatIndex : i32, value : ${isVec4 ? `vec4<${t}>` : t}) { + result[flatIndex] = ${isVec4 ? `vec4<${t}>` : t}(value); + } + fn setOutputAtCoords(d0 : i32, d1 : i32, d2 : i32, d3 : i32, value : ${isVec4 ? `vec4<${t}>` : t}) { + let flatIndex = getOutputIndexFromCoords(vec4(d0, d1, d2, d3)); + setOutputAtIndex(flatIndex ${isVec4 ? "/ 4" : ""}, value); + }`; + const x = inputVariable( + "x", + inputs[0].dataType, + inputs[0].dims.length, + innerElementSize === 3 ? 1 : innerElementSize + ); + const w = inputVariable("w", inputs[1].dataType, inputs[1].dims.length, components); + const inputVariables = [x, w]; + const output = outputVariable("result", inputs[0].dataType, outputShape.length, components); + if (hasBias) { + const bias = inputVariable("bias", inputs[2].dataType, inputs[2].dims.length, components); + inputVariables.push(bias); + declareFunctions += ` + fn getBiasByOutputCoords(coords : vec4) -> ${isVec4 ? `vec4<${t}>` : t} { + return bias[coords.${isChannelsLast ? "w" : "y"}${isVec4 ? "/ 4" : ""}]; + }`; + } + return ` + ${utilFunctions("uniforms.result_strides")} + //struct Uniforms { xShape : vec4, wShape : vec4, outShape : vec4, + // outShapeStrides: vec3, filterDims : vec2, pad : vec2, stride : vec2, + // dilation : vec2, dimAOuter : i32, dimBOuter : i32, dimInner : i32 }; + ${shaderHelper.registerUniforms(uniforms).declareVariables(...inputVariables, output)} + ${declareFunctions} + ${conv2dCommonSnippet( + isChannelsLast, + fitAOuter, + fitBOuter, + fitInner, + hasBias, + attributes, + elementsSize[0], + elementsSize[1], + elementsSize[2], + t + )} + ${isVec4 ? makeMatMulPackedVec4Source(elementsPerThread, workGroupSize, t, void 0, !isChannelsLast, tileInner) : makeMatMulPackedSource( + elementsPerThread, + workGroupSize, + t, + void 0, + !isChannelsLast, + tileInner, + false, + void 0, + sequentialAccessByThreads + )}`; + }; + return { + name: "Conv2DMatMul", + shaderCache: { + hint: `${attributes.cacheKey};${innerElementSize};${isVec4};${fitAOuter};${fitBOuter};${fitInner};${tileAOuter};${tileBOuter};${tileInner}`, + inputDependencies + }, + getRunData: () => ({ + outputs: [{ dims: outputShape, dataType: inputs[0].dataType }], + dispatchGroup: { x: dispatch[0], y: dispatch[1], z: dispatch[2] }, + programUniforms + }), + getShaderSource + }; + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/conv-grouped.ts +var createGroupedConvProgramInfo, createGroupedConvVectorizeProgramInfo; +var init_conv_grouped = __esm({ + "web/lib/wasm/jsep/webgpu/ops/conv-grouped.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_common(); + init_conv(); + init_fuse_utils(); + createGroupedConvProgramInfo = (inputs, attributes, squeezeOutputShapeFunction) => { + const hasBias = inputs.length > 2; + const processBias = hasBias ? "value += b[output_channel];" : ""; + const xShape = inputs[0].dims; + const wShape = inputs[1].dims; + const outputChannelsPerGroup = wShape[0] / attributes.group; + const isChannelLast = attributes.format === "NHWC"; + const outputShape = calculateOutputShape( + xShape, + wShape, + attributes.dilations, + attributes.pads, + attributes.strides, + isChannelLast + ); + const outputSize = ShapeUtil.size(outputShape); + const programUniforms = [ + { type: 12 /* uint32 */, data: outputSize }, + { type: 12 /* uint32 */, data: attributes.dilations }, + { type: 12 /* uint32 */, data: [attributes.strides[0], attributes.strides[1]] }, + { type: 12 /* uint32 */, data: [attributes.pads[0], attributes.pads[1]] }, + { type: 12 /* uint32 */, data: outputChannelsPerGroup } + ]; + appendActivationUniformsData(attributes, programUniforms); + programUniforms.push(...createTensorShapeVariables(xShape, wShape)); + const inputDependencies = ["rank", "rank"]; + if (hasBias) { + programUniforms.push(...createTensorShapeVariables(inputs[2].dims)); + inputDependencies.push("rank"); + } + programUniforms.push(...createTensorShapeVariables(outputShape)); + const getShaderSource = (shaderHelper) => { + const output = outputVariable("output", inputs[0].dataType, outputShape.length); + const baseType = tensorTypeToWsglStorageType(output.type.tensor); + const applyActivation = getActivationSnippet(attributes, output.type.value, baseType); + const x = inputVariable("x", inputs[0].dataType, xShape.length); + const w = inputVariable("w", inputs[1].dataType, wShape.length); + const inputVars = [x, w]; + if (hasBias) { + inputVars.push(inputVariable("b", inputs[2].dataType, inputs[2].dims.length)); + } + const uniforms = [ + { name: "output_size", type: "u32" }, + { name: "dilations", type: "u32", length: attributes.dilations.length }, + { name: "strides", type: "u32", length: 2 }, + { name: "pads", type: "u32", length: 2 }, + { name: "output_channels_per_group", type: "u32" } + ]; + appendActivationUniforms(attributes, uniforms); + return ` + ${shaderHelper.registerUniforms(uniforms).declareVariables(...inputVars, output)} + + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + + let outputIndices = ${output.offsetToIndices("global_idx")}; + let batch: u32 = outputIndices[0]; + let output_channel: u32 = outputIndices[${isChannelLast ? 3 : 1}]; + let xRCCorner: vec2 = vec2(outputIndices[${isChannelLast ? 1 : 2}], outputIndices[${isChannelLast ? 2 : 3}]) * uniforms.strides - uniforms.pads; + let group_id: u32 = output_channel / uniforms.output_channels_per_group; + + var value: ${output.type.value} = ${output.type.value}(0); + for (var wInChannel: u32 = 0u; wInChannel < uniforms.w_shape[1]; wInChannel++) { + let input_channel = group_id * uniforms.w_shape[1] + wInChannel; + for (var wHeight: u32 = 0u; wHeight < uniforms.w_shape[2]; wHeight++) { + let xHeight = xRCCorner.x + wHeight * uniforms.dilations[0]; + + if (xHeight < 0u || xHeight >= uniforms.x_shape[${isChannelLast ? 1 : 2}]) { + continue; + } + + for (var wWidth: u32 = 0u; wWidth < uniforms.w_shape[3]; wWidth++) { + let xWidth = xRCCorner.y + wWidth * uniforms.dilations[1]; + if (xWidth < 0u || xWidth >= uniforms.x_shape[${isChannelLast ? 2 : 3}]) { + continue; + } + + let xVal = ${isChannelLast ? x.get("batch", "xHeight", "xWidth", "input_channel") : x.get("batch", "input_channel", "xHeight", "xWidth")}; + let wVal = ${w.get("output_channel", "wInChannel", "wHeight", "wWidth")}; + value += xVal*wVal; + } + } + } + ${processBias} + ${applyActivation} + ${output.setByOffset("global_idx", "value")} + }`; + }; + return { + name: "GroupedConv", + shaderCache: { hint: attributes.cacheKey, inputDependencies }, + getRunData: () => ({ + outputs: [{ + dims: squeezeOutputShapeFunction ? squeezeOutputShapeFunction(outputShape) : outputShape, + dataType: inputs[0].dataType + }], + dispatchGroup: { x: Math.ceil( + outputSize / 64 + /* workgroup size */ + ) }, + programUniforms + }), + getShaderSource + }; + }; + createGroupedConvVectorizeProgramInfo = (inputs, attributes, outputShape) => { + const hasBias = inputs.length > 2; + const components = getMaxComponents(outputShape[3]); + const outputNumber = getMaxComponents(outputShape[2]); + const outputSize = ShapeUtil.size(outputShape) / components / outputNumber; + const xShape = [inputs[0].dims[0], inputs[0].dims[1], inputs[0].dims[2], inputs[0].dims[3] / components]; + const wShape = [inputs[1].dims[0], inputs[1].dims[1], inputs[1].dims[2], inputs[1].dims[3] / components]; + const outputShapeInShader = [outputShape[0], outputShape[1], outputShape[2], outputShape[3] / components]; + const programUniforms = [ + { type: 12 /* uint32 */, data: outputSize }, + { type: 6 /* int32 */, data: [attributes.strides[0], attributes.strides[1]] }, + { type: 6 /* int32 */, data: [attributes.pads[0], attributes.pads[1]] } + ]; + appendActivationUniformsData(attributes, programUniforms); + programUniforms.push(...createTensorShapeVariables(xShape, wShape, outputShapeInShader)); + const xNumber = (outputNumber - 1) * attributes.strides[1] + wShape[1]; + const getShaderSource = (shaderHelper) => { + const output = outputVariable("output", inputs[0].dataType, outputShapeInShader.length, components); + const baseType = tensorTypeToWsglStorageType(output.type.tensor); + const applyActivation = getActivationSnippet(attributes, output.type.value, baseType); + const x = inputVariable("x", inputs[0].dataType, xShape.length, components); + const w = inputVariable("w", inputs[1].dataType, wShape.length, components); + const inputVars = [x, w]; + if (hasBias) { + inputVars.push(inputVariable("b", inputs[2].dataType, inputs[2].dims, components)); + } + const processBias = hasBias ? "value += b[output_channel];" : ""; + const uniforms = [ + { name: "output_size", type: "u32" }, + { name: "strides", type: "i32", length: 2 }, + { name: "pads", type: "i32", length: 2 } + ]; + appendActivationUniforms(attributes, uniforms); + return ` + ${shaderHelper.registerUniforms(uniforms).declareVariables(...inputVars, output)} + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + let width0 = uniforms.output_shape[3]; + let output_channel = global_idx % width0; + var index1 = global_idx / width0; + let width1 = uniforms.output_shape[2] / ${outputNumber}u; + let col = (index1 % width1) * ${outputNumber}u; + index1 = index1 / width1; + let row = index1 % uniforms.output_shape[1]; + let batch = index1 / uniforms.output_shape[1]; + + let x_corner = vec2(i32(row), i32(col)) * uniforms.strides - uniforms.pads; + + var x_vals: array<${x.type.value}, ${xNumber}>; + var values: array<${output.type.value}, ${outputNumber}>; + let input_channel = output_channel; + // Use constant instead of uniform can give better performance for w's height/width. + for (var w_height: u32 = 0u; w_height < ${wShape[0]}; w_height++) { + let x_height = x_corner.x + i32(w_height); + if (x_height >= 0 && u32(x_height) < uniforms.x_shape[1]) { + for (var i = 0; i < ${xNumber}; i++) { + let x_width = x_corner.y + i; + if (x_width >= 0 && u32(x_width) < uniforms.x_shape[2]) { + x_vals[i] = ${x.get("batch", "u32(x_height)", "u32(x_width)", "input_channel")}; + } else { + x_vals[i] = ${x.type.value}(0); + } + } + for (var w_width: u32 = 0u; w_width < ${wShape[1]}; w_width++) { + let w_val = ${w.get("w_height", "w_width", "0", "output_channel")}; + for (var i = 0u; i < ${outputNumber}u; i++) { + values[i] = fma(x_vals[i * u32(uniforms.strides[1]) + w_width], w_val, values[i]); + } + } + } + } + + for (var i = 0u; i < ${outputNumber}u; i++) { + var value = values[i]; + ${processBias} + ${applyActivation} + ${output.set("batch", "row", "col + i", "output_channel", "value")}; + } + }`; + }; + return { + name: "GroupedConv-Vectorize", + shaderCache: { + hint: `${attributes.cacheKey};${components};${outputNumber};${xNumber};${wShape[0]};${wShape[1]}`, + inputDependencies: hasBias ? ["rank", "rank", "type"] : ["rank", "rank"] + }, + getRunData: () => ({ + outputs: [{ dims: outputShape, dataType: inputs[0].dataType }], + dispatchGroup: { x: Math.ceil( + outputSize / 64 + /* workgroup size */ + ) }, + programUniforms + }), + getShaderSource + }; + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/matmul.ts +var createNaiveMatmulProgramInfo, validateInputs8, matMul; +var init_matmul = __esm({ + "web/lib/wasm/jsep/webgpu/ops/matmul.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_matmul_packed_webgpu(); + init_common(); + init_fuse_utils(); + createNaiveMatmulProgramInfo = (inputs, activationAttributes, outputShape, reshapedOutputShape, isChannelsLast = false) => { + const aShape = inputs[0].dims; + const bShape = inputs[1].dims; + const M = aShape[aShape.length - 2]; + const N = bShape[bShape.length - 1]; + const K = aShape[aShape.length - 1]; + const components = getMaxComponents(N); + const aComponents = getMaxComponents(K); + const outputNumber = getMaxComponents(M); + const outputSize = ShapeUtil.size(outputShape) / components / outputNumber; + const hasBias = inputs.length > 2; + const outerDims = reshapedOutputShape ? reshapedOutputShape.slice(0, -2) : outputShape.slice(0, -2); + const batchSize = ShapeUtil.size(outerDims); + const outputShapeInShader = [batchSize, M, N]; + const programUniforms = [ + { type: 12 /* uint32 */, data: outputSize }, + { type: 12 /* uint32 */, data: M }, + { type: 12 /* uint32 */, data: N }, + { type: 12 /* uint32 */, data: K } + ]; + appendActivationUniformsData(activationAttributes, programUniforms); + programUniforms.push(...createTensorShapeVariables(outerDims, aShape, bShape)); + if (hasBias) { + programUniforms.push(...createTensorShapeVariables(inputs[2].dims)); + } + programUniforms.push(...createTensorShapeVariables(outputShapeInShader)); + const getShaderSource = (shaderHelper) => { + const batchDims = internalVariable("batch_dims", inputs[0].dataType, outerDims.length); + const a = inputVariable("a", inputs[0].dataType, aShape.length, aComponents); + const b = inputVariable("b", inputs[1].dataType, bShape.length, components); + const output = outputVariable("output", inputs[0].dataType, outputShapeInShader.length, components); + const baseType = tensorTypeToWsglStorageType(output.type.tensor); + const applyActivation = getActivationSnippet(activationAttributes, output.type.value, baseType); + const inputVariables = [a, b]; + let processBias = ""; + if (hasBias) { + const biasComponents = isChannelsLast ? components : 1; + inputVariables.push(inputVariable("bias", inputs[2].dataType, inputs[2].dims.length, biasComponents)); + processBias = `${isChannelsLast ? `value += bias[col / ${biasComponents}];` : `value += ${output.type.value}(bias[row + i]);`}`; + } + const outerDimsA = aShape.slice(0, -2); + const outerDimsB = bShape.slice(0, -2); + const broadCastADims = getBroadcastDims(outerDimsA, outerDims); + const broadCastBDims = getBroadcastDims(outerDimsB, outerDims); + const uniforms = [ + { name: "output_size", type: "u32" }, + { name: "M", type: "u32" }, + { name: "N", type: "u32" }, + { name: "K", type: "u32" } + ]; + appendActivationUniforms(activationAttributes, uniforms); + const getIndices = (variable, broadCastDims) => { + const rank = variable.rank; + const name = variable.name; + if (rank === 2) { + return `var ${name}_indices = ${variable.type.indices}(0u, 0u);`; + } + const batchRank = batchDims.rank; + let resStr = `var ${name}_indices: ${variable.type.indices};`; + for (let i = rank - 2 - 1, j = batchRank - 1; i >= 0; i--, j--) { + resStr += ` +${name}_indices[${i}] = ${batchRank > 1 ? `batch_indices[${j}]` : "batch_indices"};`; + } + broadCastDims.forEach((i) => { + resStr += ` +${name}_indices[${i}] = 0;`; + }); + resStr += `${name}_indices[${rank - 2}] = 0u; + ${name}_indices[${rank - 1}] = 0u;`; + return resStr; + }; + const calcResult = () => { + let calcStr = `var a_data: ${a.type.value};`; + for (let i = 0; i < aComponents; i++) { + calcStr += ` + let b_data${i} = b[(b_offset + (k + ${i}) * uniforms.N + col) / ${components}];`; + } + for (let i = 0; i < outputNumber; i++) { + calcStr += `a_data = a[(a_offset + (row + ${i}) * uniforms.K + k) / ${aComponents}];`; + for (let j = 0; j < aComponents; j++) { + calcStr += ` + values[${i}] = fma(${b.type.value}(a_data${aComponents === 1 ? "" : `[${j}]`}), b_data${j}, values[${i}]); +`; + } + } + return calcStr; + }; + return ` + ${shaderHelper.registerUniforms(uniforms).registerInternalVariables(batchDims).declareVariables( + ...inputVariables, + output + )} + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + let col = (global_idx % (uniforms.N / ${components})) * ${components}; + var index1 = global_idx / (uniforms.N / ${components}); + let stride1 = uniforms.M / ${outputNumber}; + let row = (index1 % stride1) * ${outputNumber}; + let batch = index1 / stride1; + + ${outputShape.length === 2 ? "" : `let batch_indices = ${batchDims.offsetToIndices("batch")};`} + ${getIndices(a, broadCastADims)} + let a_offset = ${a.indicesToOffset("a_indices")}; + ${getIndices(b, broadCastBDims)} + let b_offset = ${b.indicesToOffset("b_indices")}; + var values: array<${output.type.value}, ${outputNumber}>; + for (var k: u32 = 0u; k < uniforms.K; k = k + ${aComponents}) { + ${calcResult()} + } + for (var i = 0u; i < ${outputNumber}u; i++) { + var value = values[i]; + ${processBias} + ${applyActivation} + let cur_indices = ${output.type.indices}(batch, row + i, col); + let offset = ${output.indicesToOffset("cur_indices")}; + ${output.setByOffset(`offset / ${components}`, "value")}; + } + } + `; + }; + return { + name: "MatMulNaive", + shaderCache: { + hint: `${activationAttributes.activation};${components};${aComponents};${outputNumber};${isChannelsLast}`, + inputDependencies: hasBias ? ["rank", "rank", "rank"] : ["rank", "rank"] + }, + getRunData: () => ({ + outputs: [{ dims: outputShape, dataType: inputs[0].dataType }], + dispatchGroup: { x: Math.ceil( + outputSize / 64 + /* workgroup size */ + ) }, + programUniforms + }), + getShaderSource + }; + }; + validateInputs8 = (inputs) => { + if (!inputs || inputs.length !== 2) { + throw new Error("MatMul requires 2 inputs."); + } + if (inputs[0].dims[inputs[0].dims.length - 1] !== inputs[1].dims[inputs[1].dims.length - 2]) { + throw new Error("shared dimension does not match."); + } + }; + matMul = (context) => { + validateInputs8(context.inputs); + const outputShape = BroadcastUtil.calcShape(context.inputs[0].dims, context.inputs[1].dims, true); + if (!outputShape) { + throw new Error("Can't use matmul on the given tensors"); + } + const N = outputShape[outputShape.length - 1]; + const K = context.inputs[0].dims[context.inputs[0].dims.length - 1]; + if (N < 8 && K < 8) { + context.compute(createNaiveMatmulProgramInfo(context.inputs, { activation: "" }, outputShape)); + } else { + context.compute(createMatmulProgramInfo(context.inputs, { activation: "" }, outputShape)); + } + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/conv.ts +var calculateOutputShape, weightTransposeAttribute, validateInputs9, getAdjustedConvAttributes, parseConvAttributes, conv2d, conv1d, conv; +var init_conv = __esm({ + "web/lib/wasm/jsep/webgpu/ops/conv.ts"() { + "use strict"; + init_util(); + init_conv2d_mm_webgpu(); + init_matmul_packed_webgpu(); + init_conv_grouped(); + init_fuse_utils(); + init_matmul(); + init_transpose(); + calculateOutputShape = (inputShape, kernelShape, dilations, adjustPads, strides, isChannelLast) => { + const batchSize = inputShape[0]; + const inputSpatialShape = inputShape.slice(isChannelLast ? 1 : 2, isChannelLast ? 3 : 4); + const spatialRank = inputSpatialShape.length; + const outChannels = kernelShape[0]; + const kernelSpatialShape = kernelShape.slice(2); + const dilatedKernelShape = kernelSpatialShape.map((v, i) => v + (v - 1) * (dilations[i] - 1)); + const inputSpatialShapeWithPad = inputSpatialShape.map((v, i) => v + adjustPads[i] + adjustPads[i + spatialRank]); + const outputShape = inputSpatialShapeWithPad.map((v, i) => Math.floor((v - dilatedKernelShape[i] + strides[i]) / strides[i])); + outputShape.splice(0, 0, batchSize); + outputShape.splice(isChannelLast ? 3 : 1, 0, outChannels); + return outputShape; + }; + weightTransposeAttribute = [2, 3, 1, 0]; + validateInputs9 = (inputs, attributes) => { + if (!inputs || inputs.length !== 2 && inputs.length !== 3) { + throw new Error("Conv requires 2 or 3 inputs"); + } + if (inputs[0].dims.length !== 4 && inputs[0].dims.length !== 3) { + throw new Error("currently only support conv 1D and 2D"); + } + if (inputs[0].dims.length !== inputs[1].dims.length) { + throw new Error("filter does not have same dimension as input"); + } + const dataChannel = inputs[0].dims[attributes.format === "NHWC" ? inputs[0].dims.length - 1 : 1]; + const filterInChannel = inputs[1].dims[1] * attributes.group; + if (dataChannel !== filterInChannel) { + throw new Error("FILTER_IN_CHANNEL should be equal to DATA_CHANNEL"); + } + if (inputs.length === 3 && (inputs[2].dims.length !== 1 || inputs[1].dims[0] !== inputs[2].dims[0])) { + throw new Error("invalid bias"); + } + const spatialRank = inputs[0].dims.length - 2; + if (attributes.dilations.length !== spatialRank) { + throw new Error(`dilations should be ${spatialRank}D`); + } + if (attributes.strides.length !== spatialRank) { + throw new Error(`strides should be ${spatialRank}D`); + } + if (attributes.pads.length !== spatialRank * 2) { + throw new Error(`pads should be ${spatialRank * 2}D`); + } + if (attributes.kernelShape.length !== 0 && attributes.kernelShape.length !== inputs[1].dims.length - 2) { + throw new Error("invalid kernel shape"); + } + }; + getAdjustedConvAttributes = (attributes, inputs) => { + const kernelShape = attributes.kernelShape.slice(); + for (let i = 2; i < inputs[1].dims.length; ++i) { + if (kernelShape[i - 2] === 0) { + kernelShape[i - 2] = inputs[1].dims[i]; + } + } + const pads = attributes.pads.slice(); + PoolConvUtil.adjustPadsBasedOnAutoPad( + inputs[0].dims, + attributes.strides, + attributes.dilations, + kernelShape, + pads, + attributes.format === "NHWC", + attributes.autoPad + ); + const newAttributes = Object.assign({}, attributes); + Object.assign(newAttributes, { kernelShape, pads }); + return newAttributes; + }; + parseConvAttributes = (attributes) => { + const activationAttributes = parseInternalActivationAttributes(attributes); + const format = attributes.format; + const autoPad = ["NOTSET", "VALID", "SAME_UPPER", "SAME_LOWER"][attributes.auto_pad]; + const dilations = attributes.dilations; + const group = attributes.group; + const kernelShape = attributes.kernel_shape; + const pads = attributes.pads; + const strides = attributes.strides; + const wIsConst = attributes.w_is_const(); + return { + autoPad, + format, + dilations, + group, + kernelShape, + pads, + strides, + wIsConst, + ...activationAttributes, + cacheKey: `${attributes.format};${activationAttributes.activation};` + }; + }; + conv2d = (context, inputs, attributes) => { + const adjustedAttributes = getAdjustedConvAttributes(attributes, inputs); + const isChannelsLast = attributes.format === "NHWC"; + if (attributes.group !== 1) { + const enableGroupedConvVectorize = !context.adapterInfo.isArchitecture("ampere"); + if (enableGroupedConvVectorize && isChannelsLast && inputs[1].dims[0] === attributes.group && inputs[1].dims[1] === 1 && attributes.dilations[0] === 1 && attributes.dilations[1] === 1) { + const outputShape2 = calculateOutputShape( + inputs[0].dims, + inputs[1].dims, + attributes.dilations, + adjustedAttributes.pads, + attributes.strides, + isChannelsLast + ); + const transposedWeight2 = context.kernelCustomData.wT ?? context.compute( + createTransposeProgramInfo(inputs[1], weightTransposeAttribute), + { inputs: [1], outputs: [attributes.wIsConst ? -2 : -1] } + )[0]; + if (attributes.wIsConst && !context.kernelCustomData.wT) { + context.kernelCustomData.wT = transposedWeight2; + } + const convInputs2 = [inputs[0], transposedWeight2]; + if (inputs.length === 3) { + convInputs2.push(inputs[2]); + } + context.compute( + createGroupedConvVectorizeProgramInfo(convInputs2, adjustedAttributes, outputShape2), + { inputs: convInputs2 } + ); + } else { + context.compute(createGroupedConvProgramInfo(inputs, adjustedAttributes)); + } + return; + } + const hasBias = inputs.length === 3; + const inputHeight = inputs[0].dims[isChannelsLast ? 1 : 2]; + const inputWidth = inputs[0].dims[isChannelsLast ? 2 : 3]; + const inputChannels = inputs[0].dims[isChannelsLast ? 3 : 1]; + const weightHeight = inputs[1].dims[2]; + const weightWidth = inputs[1].dims[3]; + const outputShape = calculateOutputShape( + inputs[0].dims, + inputs[1].dims, + attributes.dilations, + adjustedAttributes.pads, + attributes.strides, + isChannelsLast + ); + const outHeight = outputShape[isChannelsLast ? 1 : 2]; + const outWidth = outputShape[isChannelsLast ? 2 : 3]; + const outChannels = outputShape[isChannelsLast ? 3 : 1]; + const sameSize = isChannelsLast && weightHeight === inputHeight && weightWidth === inputWidth && attributes.pads[0] === 0 && attributes.pads[1] === 0; + if (sameSize || weightHeight === 1 && weightWidth === 1 && attributes.dilations[0] === 1 && attributes.dilations[1] === 1 && attributes.strides[0] === 1 && attributes.strides[1] === 1 && attributes.pads[0] === 0 && attributes.pads[1] === 0) { + const batch = outputShape[0]; + let xReshaped, wReshaped, matmulOutputShape; + const matmulInputs = []; + if (isChannelsLast) { + const transposedWeight2 = context.kernelCustomData.wT ?? context.compute( + createTransposeProgramInfo(inputs[1], weightTransposeAttribute), + { inputs: [1], outputs: [attributes.wIsConst ? -2 : -1] } + )[0]; + if (attributes.wIsConst && !context.kernelCustomData.wT) { + context.kernelCustomData.wT = transposedWeight2; + } + if (sameSize) { + const sharedDim = inputHeight * inputWidth * inputChannels; + xReshaped = inputs[0].reshape([1, batch, sharedDim]); + wReshaped = transposedWeight2.reshape([1, sharedDim, outChannels]); + matmulOutputShape = [1, batch, outChannels]; + } else { + xReshaped = inputs[0].reshape([batch, inputHeight * inputWidth, inputChannels]); + wReshaped = transposedWeight2.reshape([1, inputChannels, outChannels]); + matmulOutputShape = [batch, outHeight * outWidth, outChannels]; + } + matmulInputs.push(xReshaped); + matmulInputs.push(wReshaped); + } else { + xReshaped = inputs[0].reshape([batch, inputChannels, inputHeight * inputWidth]); + wReshaped = inputs[1].reshape([1, outChannels, inputChannels]); + matmulOutputShape = [batch, outChannels, outHeight * outWidth]; + matmulInputs.push(wReshaped); + matmulInputs.push(xReshaped); + } + if (hasBias) { + matmulInputs.push(inputs[2]); + } + const N = matmulOutputShape[2]; + const K = matmulInputs[0].dims[matmulInputs[0].dims.length - 1]; + if (N < 8 && K < 8) { + context.compute( + createNaiveMatmulProgramInfo( + matmulInputs, + adjustedAttributes, + outputShape, + matmulOutputShape, + isChannelsLast + ), + { inputs: matmulInputs } + ); + } else { + context.compute( + createMatmulProgramInfo(matmulInputs, adjustedAttributes, outputShape, matmulOutputShape, isChannelsLast), + { inputs: matmulInputs } + ); + } + return; + } + const sequentialAccessByThreads = ( + /* backend.adapterInfo.isIntel() */ + true + ); + const transposedWeight = context.kernelCustomData.wT ?? context.compute( + createTransposeProgramInfo(inputs[1], weightTransposeAttribute), + { inputs: [1], outputs: [attributes.wIsConst ? -2 : -1] } + )[0]; + if (attributes.wIsConst && !context.kernelCustomData.wT) { + context.kernelCustomData.wT = transposedWeight; + } + const convInputs = [inputs[0], transposedWeight]; + if (hasBias) { + convInputs.push(inputs[2]); + } + const dimAOuter = isChannelsLast ? outHeight * outWidth : outChannels; + const dimBOuter = isChannelsLast ? outChannels : outHeight * outWidth; + const dimInner = weightHeight * weightWidth * inputChannels; + context.compute( + createConv2DMatMulProgramInfo( + convInputs, + adjustedAttributes, + outputShape, + dimAOuter, + dimBOuter, + dimInner, + hasBias, + sequentialAccessByThreads + ), + { inputs: convInputs } + ); + }; + conv1d = (context, attributes) => { + const isChannelLast = attributes.format === "NHWC"; + const inputs = [ + context.inputs[0].reshape( + isChannelLast ? ( + // [N, W, C] -> [N, H=1, W, C] + [context.inputs[0].dims[0], 1, context.inputs[0].dims[1], context.inputs[0].dims[2]] + ) : ( + // [N, C, W] -> [N, C, H=1, W] + [context.inputs[0].dims[0], context.inputs[0].dims[1], 1, context.inputs[0].dims[2]] + ) + ), + //[FILTER_OUT_CHANNEL, FILTER_IN_CHANNEL, kW] -> [FILTER_OUT_CHANNEL, FILTER_IN_CHANNEL, kH=1, kW] + context.inputs[1].reshape([context.inputs[1].dims[0], context.inputs[1].dims[1], 1, context.inputs[1].dims[2]]) + ]; + if (context.inputs.length === 3) { + inputs.push(context.inputs[2]); + } + const pads = [0, attributes.pads[0], 0, attributes.pads[1]]; + const strides = [1].concat(attributes.strides); + const dilations = [1].concat(attributes.dilations); + const kernelShape = [1].concat(attributes.kernelShape); + const adjustedAttributes = getAdjustedConvAttributes({ ...attributes, pads, strides, dilations, kernelShape }, inputs); + context.compute(createGroupedConvProgramInfo( + inputs, + adjustedAttributes, + (outputShape) => isChannelLast ? [outputShape[0], outputShape[2], outputShape[3]] : [] + )); + }; + conv = (context, attributes) => { + validateInputs9(context.inputs, attributes); + if (context.inputs[0].dims.length === 3) { + conv1d(context, attributes); + } else { + conv2d(context, context.inputs, attributes); + } + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/3rd-party/conv_backprop_mm_webgpu.ts +var conv2dTransposeCommonSnippet, createConv2DTransposeMatMulProgramInfo; +var init_conv_backprop_mm_webgpu = __esm({ + "web/lib/wasm/jsep/webgpu/ops/3rd-party/conv_backprop_mm_webgpu.ts"() { + "use strict"; + init_wasm_common(); + init_log(); + init_common(); + init_fuse_utils(); + init_activation_util(); + init_conv_util(); + init_matmul_packed_webgpu(); + conv2dTransposeCommonSnippet = (isChannelsLast, addBias = false, attributes, type, innerElementSize = 4) => { + const getWSnippet = (innerElementSize2) => { + switch (innerElementSize2) { + case 1: + return "return w[getIndexFromCoords4D(coord, vec4(uniforms.w_shape))];"; + case 4: + return ` + let coord1 = vec4(coordX, coordY, col + 1, rowInner); + let coord2 = vec4(coordX, coordY, col + 2, rowInner); + let coord3 = vec4(coordX, coordY, col + 3, rowInner); + let v0 = w[getIndexFromCoords4D(coord, vec4(uniforms.w_shape))]; + let v1 = w[getIndexFromCoords4D(coord1, vec4(uniforms.w_shape))]; + let v2 = w[getIndexFromCoords4D(coord2, vec4(uniforms.w_shape))]; + let v3 = w[getIndexFromCoords4D(coord3, vec4(uniforms.w_shape))]; + return ${type}(v0, v1, v2, v3); + `; + default: + throw new Error(`innerElementSize ${innerElementSize2} is not supported.`); + } + }; + const coordASnippet = isChannelsLast ? ` + let coord = vec4(batch, iXR, iXC, xCh); + ` : ` + let coord = vec4(batch, xCh, iXR, iXC); + `; + const coordResSnippet = isChannelsLast ? ` + let coords = vec4( + batch, + row / outWidth, + row % outWidth, + col); + ` : ` + let coords = vec4( + batch, + row, + col / outWidth, + col % outWidth); + `; + const xHeight = isChannelsLast ? "i32(uniforms.x_shape[1])" : "i32(uniforms.x_shape[2])"; + const xWidth = isChannelsLast ? "i32(uniforms.x_shape[2])" : "i32(uniforms.x_shape[3])"; + const row = isChannelsLast ? "row" : "col"; + const col = isChannelsLast ? "col" : "row"; + const readASnippet = ` + let inChannels = ${isChannelsLast ? "i32(uniforms.x_shape[3])" : "i32(uniforms.x_shape[1])"}; + let outWidth = ${isChannelsLast ? "i32(uniforms.result_shape[2])" : "i32(uniforms.result_shape[3])"}; + let outRow = ${row} / outWidth; + let outCol = ${row} % outWidth; + + let WRow = ${col} / (uniforms.filter_dims[1] * inChannels); + let WCol = ${col} / inChannels % uniforms.filter_dims[1]; + let xR = f32(outRow - uniforms.pads[0] + uniforms.dilations[0] * WRow) / f32(uniforms.strides[0]); + let xC = f32(outCol - uniforms.pads[1] + uniforms.dilations[1] * WCol) / f32(uniforms.strides[1]); + if (xR < 0.0 || xR >= f32(${xHeight}) || fract(xR) > 0.0) { + return ${type}(0.0); + } + if (xC < 0.0 || xC >= f32(${xWidth}) || fract(xC) > 0.0) { + return ${type}(0.0); + } + let iXR = i32(xR); + let iXC = i32(xC); + let xCh = ${col} % inChannels; + ${coordASnippet} + return x[getIndexFromCoords4D(coord, vec4(uniforms.x_shape))/${innerElementSize}];`; + const sampleA = isChannelsLast ? ` + let col = colIn * ${innerElementSize}; + if (row < uniforms.dim_a_outer && col < uniforms.dim_inner) { + ${readASnippet} + } + return ${type}(0.0);` : ` + let col = colIn * ${innerElementSize}; + if (row < uniforms.dim_inner && col < uniforms.dim_b_outer) { + ${readASnippet} + } + return ${type}(0.0);`; + const sampleW = ` + let col = colIn * ${innerElementSize}; + let inChannels = ${isChannelsLast ? "i32(uniforms.x_shape[3])" : "i32(uniforms.x_shape[1])"}; + let coordX = uniforms.filter_dims[0] - 1 - row / (uniforms.filter_dims[1] * inChannels); + let coordY = uniforms.filter_dims[1] - 1 - (row / inChannels) % uniforms.filter_dims[1]; + if (${isChannelsLast ? "row < uniforms.dim_inner && col < uniforms.dim_b_outer" : "row < uniforms.dim_inner && col < uniforms.dim_a_outer"} && coordX >= 0 && coordY >= 0) { + let rowInner = row % inChannels; + let coord = vec4(coordX, coordY, col, rowInner); + ${getWSnippet(innerElementSize)} + } + return ${type}(0.0); + `; + const applyActivation = getActivationSnippet(attributes, type); + const userCode = ` + fn mm_readA(batch: i32, row : i32, colIn : i32) -> ${type} { + ${isChannelsLast ? sampleA : sampleW} + } + + fn mm_readB(batch: i32, row : i32, colIn : i32) -> ${type} { + ${isChannelsLast ? sampleW : sampleA} + } + + fn mm_write(batch: i32, row : i32, colIn : i32, valueInput : ${type}) { + let col = colIn * ${innerElementSize}; + if (row < uniforms.dim_a_outer && col < uniforms.dim_b_outer) { + var value = valueInput; + let outWidth = ${isChannelsLast ? "i32(uniforms.result_shape[2])" : "i32(uniforms.result_shape[3])"}; + ${coordResSnippet} + ${biasSnippet(addBias)} + ${applyActivation} + result[getIndexFromCoords4D(coords, vec4(uniforms.result_shape))/${innerElementSize}] = value; + } + }`; + return userCode; + }; + createConv2DTransposeMatMulProgramInfo = (inputs, attributes, outputShape, dimAOuter, dimBOuter, dimInner, hasBias, sequentialAccessByThreads) => { + const isChannelsLast = attributes.format === "NHWC"; + const inChannels = isChannelsLast ? inputs[0].dims[3] : inputs[0].dims[1]; + const batchSize = outputShape[0]; + const outWidth = isChannelsLast ? outputShape[2] : outputShape[3]; + const outHeight = isChannelsLast ? outputShape[1] : outputShape[2]; + const outChannels = isChannelsLast ? outputShape[3] : outputShape[1]; + const isVec4 = isChannelsLast && (inChannels % 4 === 0 && inChannels % 3) && outChannels % 4 === 0; + const dispatchX = isChannelsLast ? outChannels : outWidth * outHeight; + const dispatchY = isChannelsLast ? outWidth * outHeight : outChannels; + const workGroupSize = [8, 8, 1]; + const elementsPerThread = dimAOuter <= 8 ? [4, 1, 1] : [4, 4, 1]; + const dispatch = [ + Math.ceil(dispatchX / workGroupSize[0] / elementsPerThread[0]), + Math.ceil(dispatchY / workGroupSize[1] / elementsPerThread[1]), + Math.ceil(batchSize / workGroupSize[2] / elementsPerThread[2]) + ]; + LOG_DEBUG("verbose", () => `[conv_backprop_mm_webgpu] dispatch = ${dispatch}`); + const innerElementSize = isVec4 ? 4 : 1; + const tileInner = Math.max(workGroupSize[0] * innerElementSize, workGroupSize[1]); + const components = isVec4 ? 4 : 1; + const filterDims = [attributes.kernelShape[isChannelsLast ? 1 : 2], attributes.kernelShape[isChannelsLast ? 2 : 3]]; + const effectiveFilterDims = [ + filterDims[0] + (attributes.dilations[0] <= 1 ? 0 : (filterDims[0] - 1) * (attributes.dilations[0] - 1)), + filterDims[1] + (attributes.dilations[1] <= 1 ? 0 : (filterDims[1] - 1) * (attributes.dilations[1] - 1)) + ]; + const pads = [ + effectiveFilterDims[0] - 1 - Math.floor((attributes.pads[0] + attributes.pads[2]) / 2), + effectiveFilterDims[1] - 1 - Math.floor((attributes.pads[1] + attributes.pads[3]) / 2) + ]; + const programUniforms = [ + { type: 6 /* int32 */, data: dimAOuter }, + { type: 6 /* int32 */, data: dimBOuter }, + { type: 6 /* int32 */, data: dimInner }, + { type: 6 /* int32 */, data: attributes.strides }, + { type: 6 /* int32 */, data: attributes.dilations }, + { type: 6 /* int32 */, data: filterDims }, + { type: 6 /* int32 */, data: pads } + ]; + appendActivationUniformsData(attributes, programUniforms); + programUniforms.push(...createTensorShapeVariables(inputs[0].dims, inputs[1].dims)); + const inputDependencies = ["rank", "rank"]; + if (hasBias) { + programUniforms.push(...createTensorShapeVariables(inputs[2].dims)); + inputDependencies.push("rank"); + } + programUniforms.push(...createTensorShapeVariables(outputShape)); + const getShaderSource = (shaderHelper) => { + const x = inputVariable("x", inputs[0].dataType, inputs[0].dims.length, components); + const w = inputVariable("w", inputs[1].dataType, inputs[1].dims.length, 1); + const output = outputVariable("result", inputs[0].dataType, outputShape.length, components); + const inputVariables = [x, w]; + let declareFunctions = ""; + if (hasBias) { + const bias = inputVariable("bias", inputs[2].dataType, inputs[2].dims.length, components); + inputVariables.push(bias); + declareFunctions += ` + fn getBiasByOutputCoords(coords : vec4) -> ${bias.type.value} { + return bias[coords.${isChannelsLast ? "w" : "y"}${isVec4 ? "/ 4" : ""}]; + }`; + } + const uniforms = [ + { name: "dim_a_outer", type: "i32" }, + { name: "dim_b_outer", type: "i32" }, + { name: "dim_inner", type: "i32" }, + { name: "strides", type: "i32", length: 2 }, + { name: "dilations", type: "i32", length: 2 }, + { name: "filter_dims", type: "i32", length: filterDims.length }, + { name: "pads", type: "i32", length: pads.length } + ]; + appendActivationUniforms(attributes, uniforms); + const elemType = tensorTypeToWsglStorageType(inputs[0].dataType, 1); + if (elemType !== "f16" && elemType !== "f32") { + throw new Error(`elemType ${elemType} is not supported.`); + } + return ` + ${utilFunctions("uniforms.result_strides")} + ${shaderHelper.registerUniforms(uniforms).declareVariables(...inputVariables, output)}; + ${declareFunctions} + ${conv2dTransposeCommonSnippet(isChannelsLast, hasBias, attributes, x.type.value, innerElementSize)} + ${isVec4 ? makeMatMulPackedVec4Source( + elementsPerThread, + workGroupSize, + elemType, + void 0, + !isChannelsLast, + tileInner + ) : makeMatMulPackedSource( + elementsPerThread, + workGroupSize, + elemType, + void 0, + !isChannelsLast, + tileInner, + false, + void 0, + sequentialAccessByThreads + )}`; + }; + return { + name: "Conv2DTransposeMatMul", + shaderCache: { hint: `${attributes.cacheKey};${elementsPerThread};${workGroupSize};${isVec4}`, inputDependencies }, + getRunData: () => ({ + outputs: [{ dims: outputShape, dataType: inputs[0].dataType }], + dispatchGroup: { x: dispatch[0], y: dispatch[1], z: dispatch[2] }, + programUniforms + }), + getShaderSource + }; + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/3rd-party/conv_backprop_webgpu.ts +var createConvTranspose2DOpProgramShaderSource, createConvTranspose2DProgramInfo; +var init_conv_backprop_webgpu = __esm({ + "web/lib/wasm/jsep/webgpu/ops/3rd-party/conv_backprop_webgpu.ts"() { + "use strict"; + init_wasm_common(); + init_log(); + init_util(); + init_common(); + createConvTranspose2DOpProgramShaderSource = (shaderHelper, inputs, outputShape, hasBias, is1DimensionDispatch, isVec4 = false, dataType, uniforms, isChannelsLast = false) => { + const rowDim = isChannelsLast ? 1 : 2; + const colDim = isChannelsLast ? 2 : 3; + const channelDim = isChannelsLast ? 3 : 1; + const workPerThread = isVec4 ? 2 : 1; + let declareFunctions = ` + fn setOutputAtIndex(flatIndex : u32, value : ${isVec4 ? `vec4<${dataType}>` : dataType}) { + result[flatIndex] = ${isVec4 ? `vec4<${dataType}>` : dataType}(value); + }`; + if (hasBias) { + declareFunctions += ` + fn getBiasByOutputCoords(coords : vec4) -> ${isVec4 ? `vec4<${dataType}>` : dataType} { + return bias[coords.${isChannelsLast ? "w" : "y"}${isVec4 ? "/ 4" : ""}]; + }`; + } + const components = isVec4 ? 4 : 1; + const w = inputVariable("W", inputs[1].dataType, inputs[1].dims.length, components); + const dy = inputVariable("Dy", inputs[0].dataType, inputs[0].dims.length, components); + const inputVariables = [dy, w]; + if (hasBias) { + inputVariables.push(inputVariable("bias", inputs[2].dataType, [outputShape[channelDim]].length, components)); + } + const output = outputVariable("result", inputs[0].dataType, outputShape.length, components); + const codeSnippet4 = `{ + let batch: u32 = ${is1DimensionDispatch ? "global_id.z" : "workgroup_id.z"} / uniforms.result_shape[1]; + let r = ${is1DimensionDispatch ? "global_id.z" : "workgroup_id.z"} % uniforms.result_shape[1]; + let c = ${is1DimensionDispatch ? "global_id.y" : "workgroup_id.y"} * ${workPerThread}; + let d1: u32 = ${is1DimensionDispatch ? "global_id.x" : "workgroup_id.x"} * 4; + + let dyCorner = vec2(i32(r), i32(c)) - vec2(uniforms.pads); + + // Convolve dy(?, ?, d2) with w(:, :, d1, d2) to compute dx(xR, xC, d1). + // ? = to be determined. : = across all values in that axis. + var dotProd: array, ${workPerThread}>; + for (var i = 0; i < ${workPerThread}; i++) { + dotProd[i] = vec4<${dataType}>(0.0); + } + for (var wR: u32 = 0; wR < uniforms.filter_dims[0]; wR = wR + 1) { + var dyR = (${dataType}(dyCorner.x) + ${dataType}(wR)) / ${dataType}(uniforms.strides.x); + let wRPerm = uniforms.filter_dims[0] - 1 - wR; + if (dyR < 0.0 || dyR >= ${dataType}(uniforms.Dy_shape[1]) || + fract(dyR) > 0.0 || wRPerm < 0) { + continue; + } + let idyR: u32 = u32(dyR); + + for (var wC: u32 = 0; wC < uniforms.filter_dims[1]; wC = wC + 1) { + let dyC = (${dataType}(dyCorner.y) + ${dataType}(wC)) / ${dataType}(uniforms.strides.y); + let dyC2 = (${dataType}(dyCorner.y) + 1.0 + ${dataType}(wC)) / ${dataType}(uniforms.strides.y); + let wCPerm = uniforms.filter_dims[1] - 1 - wC; + if (wCPerm < 0) { + continue; + } + var bDyCVal = true; + var bDyCVal2 = true; + if (dyC < 0.0 || dyC >= ${dataType}(uniforms.Dy_shape[2]) || + fract(dyC) > 0.0) { + bDyCVal = false; + } + if (dyC2 < 0.0 || dyC2 >= ${dataType}(uniforms.Dy_shape[2]) || + fract(dyC2) > 0.0) { + bDyCVal2 = false; + } + + let idyC: u32 = u32(dyC); + let idyC2: u32 = u32(dyC2); + if (bDyCVal && bDyCVal2) { + let d2Length = uniforms.Dy_shape[3]; + for (var d2 :u32 = 0; d2 < d2Length; d2 = d2 + 4) { + let wValue0 = ${w.get("u32(wRPerm)", "u32(wCPerm)", "d1", "d2")}; + let wValue1 = ${w.get("u32(wRPerm)", "u32(wCPerm)", "d1 + 1", "d2")}; + let wValue2 = ${w.get("u32(wRPerm)", "u32(wCPerm)", "d1 + 2", "d2")}; + let wValue3 = ${w.get("u32(wRPerm)", "u32(wCPerm)", "d1 + 3", "d2")}; + + var xValue = ${dy.get("batch", "idyR", "idyC", "d2")}; + let tmpval = vec4<${dataType}>(dot(xValue, wValue0), + dot(xValue, wValue1), + dot(xValue, wValue2), + dot(xValue, wValue3)); + dotProd[0] = dotProd[0] + tmpval; + + xValue = ${dy.get("batch", "idyR", "idyC2", "d2")}; + + dotProd[1] = dotProd[1] + vec4<${dataType}>(dot(xValue, wValue0), + dot(xValue, wValue1), + dot(xValue, wValue2), + dot(xValue, wValue3)); + } + } else if (bDyCVal) { + let d2Length = uniforms.Dy_shape[${channelDim}]; + for (var d2: u32 = 0; d2 < d2Length; d2 = d2 + 4) { + let wValue0 = ${w.get("u32(wRPerm)", "u32(wCPerm)", "d1", "d2")}; + let wValue1 = ${w.get("u32(wRPerm)", "u32(wCPerm)", "d1 + 1", "d2")}; + let wValue2 = ${w.get("u32(wRPerm)", "u32(wCPerm)", "d1 + 2", "d2")}; + let wValue3 = ${w.get("u32(wRPerm)", "u32(wCPerm)", "d1 + 3", "d2")}; + + var xValue = ${dy.get("batch", "idyR", "idyC", "d2")}; + let tmpval = vec4<${dataType}>(dot(xValue, wValue0), + dot(xValue, wValue1), + dot(xValue, wValue2), + dot(xValue, wValue3)); + dotProd[0] = dotProd[0] + tmpval; + } + } else if (bDyCVal2) { + let d2Length = uniforms.Dy_shape[3]; + for (var d2: u32 = 0; d2 < d2Length; d2 = d2 + 4) { + let wValue0 = ${w.get("u32(wRPerm)", "u32(wCPerm)", "d1", "d2")}; + let wValue1 = ${w.get("u32(wRPerm)", "u32(wCPerm)", "d1 + 1", "d2")}; + let wValue2 = ${w.get("u32(wRPerm)", "u32(wCPerm)", "d1 + 2", "d2")}; + let wValue3 = ${w.get("u32(wRPerm)", "u32(wCPerm)", "d1 + 3", "d2")}; + + var xValue = ${dy.get("batch", "idyR", "idyC2", "d2")}; + let tmpval = vec4<${dataType}>(dot(xValue, wValue0), + dot(xValue, wValue1), + dot(xValue, wValue2), + dot(xValue, wValue3)); + dotProd[1] = dotProd[1] + tmpval; + } + } + } + } + + for (var i: u32 = 0; i < ${workPerThread}; i = i + 1) { + let value = dotProd[i] + ${hasBias ? "bias[c+i]" : `vec4<${dataType}>(0.0)`}; + ${output.set("batch", "r", "c + i", "d1", "value")}; + } + }`; + const codeSnippet = ` + let outputIndices = ${output.offsetToIndices("global_idx")}; + let batch = ${output.indicesGet("outputIndices", 0)}; + let d1 = ${output.indicesGet("outputIndices", channelDim)}; + let r = ${output.indicesGet("outputIndices", rowDim)}; + let c = ${output.indicesGet("outputIndices", colDim)}; + let dyCorner = vec2(i32(r), i32(c)) - uniforms.pads; + let dyRCorner = dyCorner.x; + let dyCCorner = dyCorner.y; + let groupId = d1 / uniforms.output_channels_per_group; + let wOutChannel = d1 - groupId * uniforms.output_channels_per_group; + // Convolve dy(?, ?, d2) with w(:, :, d1, d2) to compute dx(xR, xC, d1). + // ? = to be determined. : = across all values in that axis. + var dotProd = ${dataType}(0.0); + for (var wR: u32 = 0; wR < uniforms.effective_filter_dims.x; wR = wR + 1) { + if (wR % uniforms.dilations.x != 0) { + continue; + } + let dyR = (${dataType}(dyRCorner) + ${dataType}(wR)) / ${dataType}(uniforms.strides[0]); + let wRPerm = uniforms.filter_dims.x - 1 - wR / uniforms.dilations.x; + if (dyR < 0.0 || dyR >= ${dataType}(uniforms.Dy_shape[${rowDim}]) || fract(dyR) > 0.0 || + wRPerm < 0) { + continue; + } + let idyR: u32 = u32(dyR); + + for (var wC: u32 = 0; wC < uniforms.effective_filter_dims.y; wC = wC + 1) { + if (wC % uniforms.dilations.y != 0) { + continue; + } + let dyC = (${dataType}(dyCCorner) + ${dataType}(wC)) / ${dataType}(uniforms.strides.y); + let wCPerm = uniforms.filter_dims.y - 1 - wC / uniforms.dilations.y; + if (dyC < 0.0 || dyC >= ${dataType}(uniforms.Dy_shape[${colDim}]) || + fract(dyC) > 0.0 || wCPerm < 0) { + continue; + } + let idyC: u32 = u32(dyC); + var inputChannel = groupId * uniforms.input_channels_per_group; + for (var d2: u32 = 0; d2 < uniforms.input_channels_per_group; d2 = d2 + 1) { + let xValue = ${isChannelsLast ? dy.get("batch", "idyR", "idyC", "inputChannel") : dy.get("batch", "inputChannel", "idyR", "idyC")}; + let wValue = ${w.get("inputChannel", "wOutChannel", "u32(wRPerm)", "u32(wCPerm)")}; + dotProd = dotProd + xValue * wValue; + inputChannel = inputChannel + 1; + } + } + } + let value = dotProd + ${hasBias ? "bias[d1]" : `${dataType}(0.0)`}; + ${output.setByOffset("global_idx", "value")}; + `; + return ` + ${shaderHelper.registerUniforms(uniforms).declareVariables(...inputVariables, output)} + ${declareFunctions} + + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}; + ${isVec4 ? codeSnippet4 : codeSnippet}}`; + }; + createConvTranspose2DProgramInfo = (inputs, attributes, squeezeOutputShapeFunction) => { + const hasBias = inputs.length > 2; + const outputShape = attributes.outputShape; + const outputSize = ShapeUtil.size(outputShape); + const dispatch = [ + Math.ceil(outputSize / 64), + 1, + 1 + ]; + LOG_DEBUG("verbose", () => `[conv2d_backprop_webgpu] dispatch = ${dispatch}`); + const isChannelsLast = attributes.format === "NHWC"; + const inputDependencies = ["rank", "rank"]; + const strides = [attributes.strides[0], attributes.strides[1]]; + const filterDims = [attributes.kernelShape[isChannelsLast ? 1 : 2], attributes.kernelShape[isChannelsLast ? 2 : 3]]; + const dilations = [attributes.dilations[0], attributes.dilations[1]]; + const effectiveFilterDims = [ + filterDims[0] + (attributes.dilations[0] <= 1 ? 0 : (attributes.kernelShape[isChannelsLast ? 1 : 2] - 1) * (attributes.dilations[0] - 1)), + filterDims[1] + (attributes.dilations[1] <= 1 ? 0 : (attributes.kernelShape[isChannelsLast ? 2 : 3] - 1) * (attributes.dilations[1] - 1)) + ]; + const pads = [ + effectiveFilterDims[0] - 1 - Math.floor((attributes.pads[0] + attributes.pads[2]) / 2), + effectiveFilterDims[1] - 1 - Math.floor(attributes.pads[1] + attributes.pads[3]) / 2 + ]; + const isVec4 = false; + const group = attributes.group; + const wShape = inputs[1].dims; + const inputChannelsPerGroup = wShape[0] / group; + const outputChannelsPerGroup = wShape[1]; + const programUniforms = [ + { type: 12 /* uint32 */, data: outputSize }, + { type: 12 /* uint32 */, data: strides }, + { type: 12 /* uint32 */, data: filterDims }, + { type: 12 /* uint32 */, data: dilations }, + { type: 12 /* uint32 */, data: effectiveFilterDims }, + { type: 6 /* int32 */, data: pads }, + { type: 12 /* uint32 */, data: inputChannelsPerGroup }, + { type: 12 /* uint32 */, data: outputChannelsPerGroup }, + ...createTensorShapeVariables(inputs[0].dims, inputs[1].dims) + ]; + if (hasBias) { + programUniforms.push(...createTensorShapeVariables(inputs[2].dims)); + inputDependencies.push("rank"); + } + programUniforms.push(...createTensorShapeVariables(outputShape)); + const is1DimensionDispatch = dispatch[1] === 1 && dispatch[2] === 1; + const getShaderSource = (shaderHelper) => { + const uniforms = [ + { name: "output_size", type: "u32" }, + { name: "strides", type: "u32", length: strides.length }, + { name: "filter_dims", type: "u32", length: filterDims.length }, + { name: "dilations", type: "u32", length: filterDims.length }, + { name: "effective_filter_dims", type: "u32", length: effectiveFilterDims.length }, + { name: "pads", type: "i32", length: pads.length }, + { name: "input_channels_per_group", type: "u32" }, + { name: "output_channels_per_group", type: "u32" } + ]; + const dataType = tensorTypeToWsglStorageType(inputs[0].dataType); + return `${createConvTranspose2DOpProgramShaderSource( + shaderHelper, + inputs, + outputShape, + hasBias, + is1DimensionDispatch, + isVec4, + dataType, + uniforms, + isChannelsLast + )}`; + }; + return { + name: "ConvTranspose2D", + shaderCache: { hint: `${attributes.cacheKey};`, inputDependencies }, + getRunData: () => ({ + dispatchGroup: { x: dispatch[0], y: dispatch[1], z: dispatch[2] }, + outputs: [{ + dims: squeezeOutputShapeFunction ? squeezeOutputShapeFunction(outputShape) : outputShape, + dataType: inputs[0].dataType + }], + programUniforms + }), + getShaderSource + }; + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/conv-transpose.ts +var computeTotalPad, distributePadding, calculateOutputShapeAndPads, getAdjustedConvTransposeAttributes, parseConvTransposeAttributes, validateInputs10, weightTransposePerm, convTranspose2d, convTranspose1d, convTranspose; +var init_conv_transpose = __esm({ + "web/lib/wasm/jsep/webgpu/ops/conv-transpose.ts"() { + "use strict"; + init_conv_backprop_mm_webgpu(); + init_conv_backprop_webgpu(); + init_fuse_utils(); + init_transpose(); + computeTotalPad = (inDim, stride, adj, kernel, dilation, outSize) => (inDim - 1) * stride + adj + (kernel - 1) * dilation + 1 - outSize; + distributePadding = (totalPad, autoPad, pads, head, tail) => { + const smallPad = Math.floor(totalPad / 2); + if (autoPad === "SAME_UPPER") { + pads[head] = smallPad; + pads[tail] = totalPad - smallPad; + } else if (autoPad === "SAME_LOWER") { + pads[head] = totalPad - smallPad; + pads[tail] = smallPad; + } + }; + calculateOutputShapeAndPads = (inputShape, kernelShape, dilations, autoPad, group, pads, strides, isChannelLast, outputPadding, outputShape) => { + const spatialRank = inputShape.length - 2; + const updateOutputShape = outputShape.length === 0; + if (outputPadding.length === 0) { + for (let i = 0; i < spatialRank; ++i) { + outputPadding.push(0); + } + } + const batchSize = inputShape[0]; + const outChannels = kernelShape[isChannelLast ? 3 : 1] * group; + for (let i = 0, j = inputShape.length - spatialRank - (isChannelLast ? 1 : 0); i < spatialRank; ++i, ++j) { + const inSize = inputShape[j]; + const outSize = updateOutputShape ? inSize * strides[i] : outputShape[i]; + const totalPad = computeTotalPad(inSize, strides[i], pads[i], kernelShape[j], dilations[i], outSize); + distributePadding(totalPad, autoPad, pads, i, i + spatialRank); + if (updateOutputShape) { + outputShape.push( + strides[i] * (inSize - 1) + outputPadding[i] + (kernelShape[j] - 1) * dilations[i] + 1 - pads[i] - pads[i + spatialRank] + ); + } + } + outputShape.splice(0, 0, batchSize); + outputShape.splice(isChannelLast ? 3 : 1, 0, outChannels); + }; + getAdjustedConvTransposeAttributes = (attributes, inputs) => { + const kernelShape = attributes.kernelShape.slice(); + if (attributes.kernelShape.length === 0 || attributes.kernelShape.reduce((a, b) => a * b, 1) === 0) { + kernelShape.length = 0; + for (let i = 2; i < inputs[1].dims.length; ++i) { + kernelShape.push(inputs[1].dims[i]); + } + } + const isChannelsLast = attributes.format === "NHWC"; + kernelShape.splice(0, 0, inputs[1].dims[0]); + kernelShape.splice(isChannelsLast ? 3 : 1, 0, inputs[1].dims[1]); + const pads = attributes.pads.slice(); + const outputShape = attributes.outputShape.slice(); + const outputPadding = attributes.outputPadding.slice(); + const inputShape = inputs[0].dims; + let dilations = attributes.dilations.slice(); + if (dilations.reduce((a, b) => a + b, 0) === 0) { + const spatialRank = inputs[0].dims.length - 2; + dilations = new Array(spatialRank).fill(1); + } + let strides = attributes.strides.slice(); + if (strides.reduce((a, b) => a + b, 0) === 0) { + const spatialRank = inputs[0].dims.length - 2; + strides = new Array(spatialRank).fill(1); + } + calculateOutputShapeAndPads( + inputShape, + kernelShape, + dilations, + attributes.autoPad, + attributes.group, + pads, + strides, + isChannelsLast, + outputPadding, + outputShape + ); + const newAttributes = Object.assign({}, attributes); + Object.assign(newAttributes, { kernelShape, pads, outputPadding, outputShape, dilations, strides }); + return newAttributes; + }; + parseConvTransposeAttributes = (attributes) => { + const activationAttributes = parseInternalActivationAttributes(attributes); + const format = attributes.format; + const autoPad = [ + "NOTSET", + "VALID", + "SAME_UPPER", + "SAME_LOWER" + ][typeof attributes.autoPad == "undefined" ? 0 : attributes.autoPad]; + const dilations = attributes.dilations; + const group = attributes.group; + const kernelShape = attributes.kernelShape; + const pads = attributes.pads; + const strides = attributes.strides; + const wIsConst = attributes.wIsConst(); + const outputPadding = attributes.outputPadding; + const outputShape = attributes.outputShape; + return { + autoPad, + format, + dilations, + group, + kernelShape, + outputPadding, + outputShape, + pads, + strides, + wIsConst, + ...activationAttributes, + cacheKey: `${attributes.format};${activationAttributes.activation};` + }; + }; + validateInputs10 = (inputs, attributes) => { + if (!inputs || inputs.length !== 2 && inputs.length !== 3) { + throw new Error("Conv requires 2 or 3 inputs"); + } + if (inputs[0].dims.length !== 4 && inputs[0].dims.length !== 3) { + throw new Error("currently only support 2-dimensional conv"); + } + if (inputs[0].dims.length !== inputs[1].dims.length) { + throw new Error("filter does not have same dimension as input"); + } + const dataChannel = inputs[0].dims[attributes.format === "NHWC" ? inputs[0].dims.length - 1 : 1]; + const filterInChannel = inputs[1].dims[0]; + if (dataChannel !== filterInChannel) { + throw new Error("FILTER_IN_CHANNEL should be equal to DATA_CHANNEL"); + } + const featureMaps = inputs[1].dims[1] * attributes.group; + if (inputs.length === 3 && (inputs[2].dims.length !== 1 || inputs[2].dims[0] !== featureMaps)) { + throw new Error("invalid bias"); + } + const spatialRank = inputs[0].dims.length - 2; + const dilationsSet = attributes.dilations.reduce((a, b) => a + b, 0) > 0; + if (dilationsSet && attributes.dilations.length !== spatialRank) { + throw new Error(`dilations should be ${spatialRank}D`); + } + const stridesSet = attributes.strides.reduce((a, b) => a + b, 0) > 0; + if (stridesSet && attributes.strides.length !== spatialRank) { + throw new Error(`strides should be ${spatialRank}D`); + } + const padsSet = attributes.pads.reduce((a, b) => a + b, 0) > 0; + if (padsSet && attributes.pads.length !== spatialRank * 2) { + throw new Error(`pads should be ${spatialRank * 2}D`); + } + if (attributes.outputPadding.length !== spatialRank && attributes.outputPadding.length !== 0) { + throw new Error(`output_padding should be ${spatialRank}D`); + } + const kernelShapeSet = attributes.kernelShape.reduce((a, b) => a + b, 0) > 0; + if (kernelShapeSet && attributes.kernelShape.length !== 0 && attributes.kernelShape.length !== inputs[1].dims.length - 2) { + throw new Error("invalid kernel shape"); + } + if (attributes.outputShape.length !== 0 && attributes.outputShape.length !== inputs[0].dims.length - 2) { + throw new Error("invalid output shape"); + } + }; + weightTransposePerm = [2, 3, 1, 0]; + convTranspose2d = (context, inputs, attributes) => { + const adjustedAttributes = getAdjustedConvTransposeAttributes(attributes, inputs); + const isChannelsLast = attributes.format === "NHWC"; + const outputShape = adjustedAttributes.outputShape; + const outChannels = outputShape[isChannelsLast ? 3 : 1]; + const inputChannels = inputs[0].dims[isChannelsLast ? 3 : 1]; + if (adjustedAttributes.group !== 1 || outChannels === 1 && inputChannels === 1) { + context.compute(createConvTranspose2DProgramInfo(inputs, adjustedAttributes)); + return; + } + const outHeight = outputShape[isChannelsLast ? 1 : 2]; + const outWidth = outputShape[isChannelsLast ? 2 : 3]; + const weightHeight = inputs[1].dims[2]; + const weightWidth = inputs[1].dims[3]; + const dimAOuter = isChannelsLast ? outHeight * outWidth : outChannels; + const dimBOuter = isChannelsLast ? outChannels : outHeight * outWidth; + const dimInner = weightHeight * weightWidth * inputChannels; + const sequentialAccessByThreads = ( + /* backend.adapterInfo.isIntel() */ + true + ); + const transposedWeight = context.kernelCustomData.wT ?? context.compute( + createTransposeProgramInfo(inputs[1], weightTransposePerm), + { inputs: [1], outputs: [attributes.wIsConst ? -2 : -1] } + )[0]; + if (attributes.wIsConst && !context.kernelCustomData.wT) { + context.kernelCustomData.wT = transposedWeight; + } + const convTransposeInputs = [inputs[0], transposedWeight]; + const hasBias = inputs.length === 3; + if (hasBias) { + if (!isChannelsLast && inputs[2].dims.length === 1) { + convTransposeInputs.push(inputs[2].reshape([inputs[2].dims[0], 1, 1])); + } else { + convTransposeInputs.push(inputs[2]); + } + } + context.compute( + createConv2DTransposeMatMulProgramInfo( + convTransposeInputs, + adjustedAttributes, + outputShape, + dimAOuter, + dimBOuter, + dimInner, + hasBias, + sequentialAccessByThreads + ), + { inputs: convTransposeInputs } + ); + }; + convTranspose1d = (context, attributes) => { + const isChannelLast = attributes.format === "NHWC"; + const inputs = [ + context.inputs[0].reshape( + isChannelLast ? ( + // [N, W, C] -> [N, H=1, W, C] + [context.inputs[0].dims[0], 1, context.inputs[0].dims[1], context.inputs[0].dims[2]] + ) : ( + // [N, C, W] -> [N, C, H=1, W] + [context.inputs[0].dims[0], context.inputs[0].dims[1], 1, context.inputs[0].dims[2]] + ) + ), + //[FILTER_OUT_CHANNEL, FILTER_IN_CHANNEL, kW] -> [FILTER_OUT_CHANNEL, FILTER_IN_CHANNEL, kH=1, kW] + context.inputs[1].reshape([context.inputs[1].dims[0], context.inputs[1].dims[1], 1, context.inputs[1].dims[2]]) + ]; + if (context.inputs.length === 3) { + inputs.push(context.inputs[2]); + } + let kernelShape = attributes.kernelShape; + if (kernelShape.length === 0 || kernelShape[0] === 0) { + kernelShape = [context.inputs[1].dims[2]]; + } + let dilations = attributes.dilations; + if (dilations.length === 0 || dilations[0] === 0) { + dilations = [1]; + } + let strides = attributes.strides; + if (strides.length === 0 || strides[0] === 0) { + strides = [1]; + } + let pads = attributes.pads; + if (pads.length === 0) { + pads = [0, 0]; + } + pads = [0, pads[0], 0, pads[1]]; + strides = [1].concat(strides); + dilations = [1].concat(dilations); + kernelShape = [1].concat(kernelShape); + const adjustedAttributes = getAdjustedConvTransposeAttributes({ ...attributes, pads, strides, dilations, kernelShape }, inputs); + context.compute(createConvTranspose2DProgramInfo( + inputs, + adjustedAttributes, + (outputShape) => isChannelLast ? [outputShape[0], outputShape[2], outputShape[3]] : [outputShape[0], outputShape[1], outputShape[3]] + )); + }; + convTranspose = (context, attributes) => { + validateInputs10(context.inputs, attributes); + if (context.inputs[0].dims.length === 3) { + convTranspose1d(context, attributes); + } else { + convTranspose2d(context, context.inputs, attributes); + } + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/cumsum.ts +var createCumsumProgramInfo, cumsum, parseCumSumAttributes; +var init_cumsum = __esm({ + "web/lib/wasm/jsep/webgpu/ops/cumsum.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_attribute_with_cache_key(); + init_common(); + createCumsumProgramInfo = (inputType, inputShape, axisInput, attributes) => { + const outputSize = ShapeUtil.size(inputShape); + const rank = inputShape.length; + const input = inputVariable("input", inputType, rank); + const output = outputVariable("output", inputType, rank); + const axisValue = axisInput.dataType === 6 /* int32 */ ? axisInput.getInt32Array()[0] : Number(axisInput.getBigInt64Array()[0]); + const axis = ShapeUtil.normalizeAxis(axisValue, rank); + const getShaderSource = (shaderHelper) => { + const index = ` i32(${input.indicesGet("inputIndices", "uniforms.axis")}) `; + const max = getElementAt("uniforms.input_shape", "uniforms.axis", rank); + const lowerLimit = attributes.reverse ? index + (attributes.exclusive ? " + 1" : "") : "0"; + const upperLimit = attributes.reverse ? max : index + (attributes.exclusive ? "" : " + 1"); + return ` + ${shaderHelper.registerUniform("outputSize", "u32").registerUniform("axis", "u32").declareVariables(input, output)} + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")} + var inputIndices = ${output.offsetToIndices("global_idx")}; + var sum = ${output.type.value}(0); + let first : i32 = ${lowerLimit}; + let last : i32 = ${upperLimit}; + for (var i : i32 = first; i < last; i++) { + ${input.indicesSet("inputIndices", "uniforms.axis", "u32(i)")}; + sum = sum + ${input.getByIndices("inputIndices")}; + } + ${output.setByOffset("global_idx", "sum")}; + }`; + }; + return { + name: "CumSum", + shaderCache: { hint: attributes.cacheKey, inputDependencies: ["rank"] }, + getRunData: () => ({ + outputs: [{ dims: inputShape, dataType: inputType }], + dispatchGroup: { x: Math.ceil( + outputSize / 64 + /* workgroup size */ + ) }, + programUniforms: [ + { type: 12 /* uint32 */, data: outputSize }, + { type: 12 /* uint32 */, data: axis }, + ...createTensorShapeVariables(inputShape, inputShape) + ] + }), + getShaderSource + }; + }; + cumsum = (context, attributes) => { + const inputShape = context.inputs[0].dims; + const inputType = context.inputs[0].dataType; + const axis = context.inputs[1]; + context.compute(createCumsumProgramInfo(inputType, inputShape, axis, attributes), { inputs: [0] }); + }; + parseCumSumAttributes = (attributes) => { + const exclusive = attributes.exclusive === 1; + const reverse = attributes.reverse === 1; + return createAttributeWithCacheKey({ exclusive, reverse }); + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/depth-to-space.ts +var validateInputs11, permFunctionBody2, createDepthToSpaceProgramInfo, depthToSpace, parseDepthToSpaceAttributes; +var init_depth_to_space = __esm({ + "web/lib/wasm/jsep/webgpu/ops/depth-to-space.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_attribute_with_cache_key(); + init_common(); + validateInputs11 = (inputs) => { + if (!inputs || inputs.length !== 1) { + throw new Error("DepthToSpace requires 1 input."); + } + if (inputs[0].dims.length !== 4) { + throw new Error("DepthToSpace requires 4D input."); + } + }; + permFunctionBody2 = (perm, rank, input, output) => { + const reverseFunc = []; + reverseFunc.push(`fn perm(i: ${output.type.indices}) -> ${input.type.indices} { + var a: ${input.type.indices};`); + for (let i = 0; i < rank; ++i) { + reverseFunc.push(input.indicesSet("a", perm[i], `i[${i}]`)); + } + reverseFunc.push("return a;}"); + return reverseFunc.join("\n"); + }; + createDepthToSpaceProgramInfo = (inputTensor, attributes) => { + let n, h, w, c; + let shape; + let perm; + const isChannelLast = attributes.format === "NHWC"; + const blocksize = attributes.blocksize; + const isDCRmode = attributes.mode === "DCR"; + if (isChannelLast) { + [n, h, w, c] = inputTensor.dims; + shape = isDCRmode ? [n, h, w, blocksize, blocksize, c / blocksize ** 2] : [n, h, w, c / blocksize ** 2, blocksize, blocksize]; + perm = isDCRmode ? [0, 1, 3, 2, 4, 5] : [0, 1, 4, 2, 5, 3]; + } else { + [n, h, w, c] = [inputTensor.dims[0], inputTensor.dims[2], inputTensor.dims[3], inputTensor.dims[1]]; + shape = isDCRmode ? [n, blocksize, blocksize, c / blocksize ** 2, h, w] : [n, c / blocksize ** 2, blocksize, blocksize, h, w]; + perm = isDCRmode ? [0, 3, 4, 1, 5, 2] : [0, 1, 4, 2, 5, 3]; + } + const reshapedInputTensor = inputTensor.reshape(shape); + const reshapedInputRank = reshapedInputTensor.dims.length; + const inputDataType = inputTensor.dataType; + const reshapedInput = inputVariable("a", inputDataType, reshapedInputRank); + const permedOutput = outputVariable("output", inputDataType, reshapedInputRank); + const getShaderSource = (shaderHelper) => ` + ${shaderHelper.registerUniform("output_size", "u32").declareVariables(reshapedInput, permedOutput)} + + ${permFunctionBody2(perm, reshapedInputRank, reshapedInput, permedOutput)} + + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + + let indices = ${permedOutput.offsetToIndices("global_idx")}; + let aIndices = perm(indices); + + ${permedOutput.setByOffset("global_idx", reshapedInput.getByIndices("aIndices"))} + }`; + return { + name: "DepthToSpace", + shaderCache: { hint: `${inputTensor.dims};${attributes.blocksize};${attributes.mode}`, inputDependencies: ["rank"] }, + getRunData: (inputs) => { + const outputShape = isChannelLast ? [n, h * blocksize, w * blocksize, c / blocksize ** 2] : [n, c / blocksize ** 2, h * blocksize, w * blocksize]; + const outputSize = ShapeUtil.size(outputShape); + const shapeBeforePerm = reshapedInputTensor.dims; + const shapeAfterPerm = ShapeUtil.sortBasedOnPerm(shapeBeforePerm, perm); + return { + outputs: [{ dims: outputShape, dataType: inputs[0].dataType }], + dispatchGroup: { x: Math.ceil( + outputSize / 64 + /* workgroup size */ + ) }, + programUniforms: [{ type: 12 /* uint32 */, data: outputSize }, ...createTensorShapeVariables(shapeBeforePerm, shapeAfterPerm)] + }; + }, + getShaderSource + }; + }; + depthToSpace = (context, attributes) => { + validateInputs11(context.inputs); + context.compute(createDepthToSpaceProgramInfo(context.inputs[0], attributes)); + }; + parseDepthToSpaceAttributes = (attributes) => createAttributeWithCacheKey({ + blocksize: attributes.blocksize, + mode: attributes.mode, + format: attributes.format + }); + } +}); + +// web/lib/wasm/jsep/webgpu/ops/einsum.ts +var symbolPattern, termPattern, termPatternOnly, lhsPattern, lhsPatternOnly, EinsumTerm, EinsumEquation, appendMax, createEinsumProgramInfo, einsum, parseEinsumAttributes; +var init_einsum = __esm({ + "web/lib/wasm/jsep/webgpu/ops/einsum.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_attribute_with_cache_key(); + init_common(); + symbolPattern = "[a-zA-Z]|\\.\\.\\."; + termPattern = "(" + symbolPattern + ")+"; + termPatternOnly = "^" + termPattern + "$"; + lhsPattern = "(" + termPattern + ",)*" + termPattern; + lhsPatternOnly = "^" + lhsPattern + "$"; + EinsumTerm = class { + constructor(inputIndex = -1) { + this.symbolToIndices = /* @__PURE__ */ new Map(); + this.inputIndex = inputIndex; + } + // Add a symbol to the term + addSymbol(symbol, index) { + let value = this.symbolToIndices.get(symbol); + if (value === void 0) { + value = [index]; + } else { + value.push(index); + } + this.symbolToIndices.set(symbol, value); + } + // -1 for output and 0, 1, 2, ... for inputs + }; + EinsumEquation = class { + constructor(inputs, equation) { + this.equation = equation; + this.hasEllipsis = false; + this.symbolToInfo = /* @__PURE__ */ new Map(); + this.lhs = new Array(); + this.outputDims = []; + let [lhs, rhs] = equation.includes("->") ? equation.split("->", 2) : [equation, ""]; + if (!lhs.match(RegExp(lhsPatternOnly))) { + throw new Error("Invalid LHS term"); + } + const inputTerms = lhs.split(","); + inputTerms.forEach((inputTerm, index) => { + const dims = inputs[index].dims.slice(); + if (!inputTerm.match(RegExp(termPatternOnly))) { + throw new Error("Invalid LHS term"); + } + const einsumTerm = this.processTerm(inputTerm, true, dims, index); + this.lhs.push(einsumTerm); + }); + if (rhs === "") { + rhs += [...this.symbolToInfo.entries()].filter(([sym, info]) => info.count === 1 || sym === "...").map(([sym]) => sym).join(""); + } else { + if (!rhs.match(RegExp(termPattern))) { + throw new Error("Invalid RHS"); + } + } + const rhsSymbols = rhs.match(RegExp(symbolPattern, "g")); + rhsSymbols?.forEach((symbol) => { + if (symbol === "...") { + this.outputDims = this.outputDims.concat(this.ellipsisDims); + } else { + const info = this.symbolToInfo.get(symbol); + if (info === void 0) { + throw new Error("Invalid RHS symbol"); + } + this.outputDims.push(info.dimValue); + } + }); + this.rhs = this.processTerm(rhs, false, this.outputDims); + } + // End of EinsumEqation constructor + // Add a symbol to the equation + addSymbol(symbol, dimValue, inputIndex) { + let info = this.symbolToInfo.get(symbol); + if (info !== void 0) { + if (info.dimValue !== dimValue && info.count !== 1) { + throw new Error("Dimension mismatch"); + } else { + info.count++; + info.inputIndices.push(inputIndex); + } + } else { + info = { count: 1, dimValue, inputIndices: [inputIndex] }; + } + this.symbolToInfo.set(symbol, info); + } + // Process one input/output term + processTerm(term, isInput, dims, index = -1) { + const rank = dims.length; + let ellipsis = false; + let ellipsisDims = []; + let nextDim = 0; + if (!term.match(RegExp(termPatternOnly)) && (!isInput && term !== "")) { + throw new Error("Invalid LHS term"); + } + const indexSymbols = term.match(RegExp(symbolPattern, "g")); + const einsumTerm = new EinsumTerm(index); + indexSymbols?.forEach((symbol, i) => { + if (symbol === "...") { + if (ellipsis) { + throw new Error("Only one ellipsis is allowed per input term"); + } + ellipsis = true; + const ellipsisDimLength = rank - indexSymbols.length + 1; + if (ellipsisDimLength < 0) { + throw new Error("Ellipsis out of bounds"); + } + ellipsisDims = dims.slice(nextDim, nextDim + ellipsisDimLength); + if (this.hasEllipsis) { + if (this.ellipsisDims.length !== ellipsisDims.length || this.ellipsisDims.toString() !== ellipsisDims.toString()) { + throw new Error("Ellipsis dimensions mismatch"); + } + } else if (isInput) { + this.hasEllipsis = true; + this.ellipsisDims = ellipsisDims; + } else { + throw new Error("Ellipsis must be specified in the LHS"); + } + for (let j = 0; j < ellipsisDims.length; j++) { + const symbol2 = String.fromCharCode("0".charCodeAt(0) + j); + einsumTerm.addSymbol(symbol2, i + j); + this.addSymbol(symbol2, dims[nextDim++], index); + } + } else { + einsumTerm.addSymbol(symbol, i + (this.hasEllipsis ? this.ellipsisDims.length - 1 : 0)); + this.addSymbol(symbol, dims[nextDim++], index); + } + }); + return einsumTerm; + } + // Output dimensions of the equation + }; + appendMax = (name) => name + "_max"; + createEinsumProgramInfo = (inputShapes, dataType, einsumEquation, outputShape) => { + const ranks = inputShapes.map((dims) => dims.length); + const inputVars = ranks.map((rank, index) => inputVariable(`input${index}`, dataType, rank)); + const outputSize = ShapeUtil.size(outputShape); + const output = outputVariable("output", dataType, outputShape.length); + const uniformsSymbols = [...einsumEquation.symbolToInfo.keys()].filter((symbol) => !einsumEquation.rhs.symbolToIndices.has(symbol)); + const getShaderSource = (shaderHelper) => { + const idxCopy = []; + const initProd = "var prod = 1.0;"; + const initSum = "var sum = 0.0;"; + const updateSum = "sum += prod;"; + const reduceOpsSetIndices = []; + const reduceOpsLoopHeaders = []; + const reduceOpsLoopFooters = []; + const reduceOpCompute = []; + const isReduceOpsWithoutLoop = einsumEquation.symbolToInfo.size === einsumEquation.rhs.symbolToIndices.size; + einsumEquation.symbolToInfo.forEach((info, symbol) => { + if (einsumEquation.rhs.symbolToIndices.has(symbol)) { + const outputIndex = einsumEquation.rhs.symbolToIndices.get(symbol)?.[0]; + if (outputIndex !== void 0) { + einsumEquation.lhs.forEach((term, i) => { + if (info.inputIndices.includes(i)) { + const indices = term.symbolToIndices.get(symbol); + if (indices === void 0) { + throw new Error("Invalid symbol error"); + } + indices.forEach((index) => { + idxCopy.push(`${inputVars[i].indicesSet( + `input${i}Indices`, + index, + output.indicesGet("outputIndices", outputIndex) + )}`); + }); + } + }); + } + } else { + einsumEquation.lhs.forEach((term, i) => { + if (info.inputIndices.includes(i)) { + const indices = term.symbolToIndices.get(symbol); + if (indices === void 0) { + throw new Error("Invalid symbol error"); + } + indices.forEach((index) => { + reduceOpsSetIndices.push(`${inputVars[i].indicesSet(`input${i}Indices`, index, `${symbol}`)}`); + }); + reduceOpCompute.push(`prod *= ${inputVars[i].getByIndices(`input${i}Indices`)};`); + } + }); + reduceOpsLoopHeaders.push( + `for(var ${symbol}: u32 = 0; ${symbol} < uniforms.${appendMax(symbol)}; ${symbol}++) {` + ); + reduceOpsLoopFooters.push("}"); + } + }); + const reduceOps2 = isReduceOpsWithoutLoop ? [ + ...idxCopy, + `let sum = ${inputVars.map((inputVar, i) => inputVar.getByIndices(`input${i}Indices`)).join(" * ")};` + ] : [ + ...idxCopy, + initSum, + ...reduceOpsLoopHeaders, + ...reduceOpsSetIndices, + initProd, + ...reduceOpCompute, + updateSum, + ...reduceOpsLoopFooters + ]; + return ` + ${shaderHelper.registerUniforms(uniformsSymbols.map((symbol) => ({ name: `${appendMax(symbol)}`, type: "u32" }))).registerUniform("outputSize", "u32").declareVariables(...inputVars, output)} + + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")} + var outputIndices = ${output.offsetToIndices("global_idx")}; + ${inputVars.map((_var, i) => `var input${i}Indices: ${inputVars[i].type.indices};`).join("\n")} + ${reduceOps2.join("\n")}; + ${output.setByOffset("global_idx", "sum")}; + }`; + }; + return { + name: "Einsum", + shaderCache: { hint: einsumEquation.equation, inputDependencies: inputShapes.map(() => "rank") }, + getRunData: () => { + const programUniformsInit = uniformsSymbols.filter((symbol) => einsumEquation.symbolToInfo.has(symbol)).map( + (symbol) => ({ type: 12 /* uint32 */, data: einsumEquation.symbolToInfo.get(symbol)?.dimValue || 0 }) + ); + programUniformsInit.push({ type: 12 /* uint32 */, data: outputSize }); + const programUniforms = inputShapes.map((dims, _) => [...createTensorShapeVariables(dims)]).reduce((acc, inputProgramUniforms) => acc.concat(inputProgramUniforms), programUniformsInit); + programUniforms.push(...createTensorShapeVariables(outputShape)); + return { + outputs: [{ dims: outputShape, dataType }], + dispatchGroup: { x: Math.ceil( + outputSize / 64 + /* workgroup size */ + ) }, + programUniforms + }; + }, + getShaderSource + }; + }; + einsum = (context, attributes) => { + const einsumEquation = new EinsumEquation(context.inputs, attributes.equation); + const outputShape = einsumEquation.outputDims; + const inputShapes = context.inputs.map((input, _) => input.dims); + context.compute(createEinsumProgramInfo(inputShapes, context.inputs[0].dataType, einsumEquation, outputShape)); + }; + parseEinsumAttributes = (attributes) => { + const equation = attributes.equation.replace(/\s+/g, ""); + return createAttributeWithCacheKey({ equation }); + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/expand.ts +var validateInputs12, getAdjustedShape, calculateOutputShape2, createExpandProgramInfo, expand; +var init_expand = __esm({ + "web/lib/wasm/jsep/webgpu/ops/expand.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_common(); + validateInputs12 = (inputs) => { + if (!inputs || inputs.length !== 2) { + throw new Error("Expand requires 2 input."); + } + const inputShape = inputs[0].dims; + const shape = Array.from(inputs[1].getBigInt64Array(), Number); + let shapeIndex = shape.length < inputShape.length ? 0 : shape.length - inputShape.length; + let inputShapeIndex = inputShape.length < shape.length ? 0 : inputShape.length - shape.length; + for (; shapeIndex < shape.length && inputShapeIndex < inputShape.length; ++shapeIndex, ++inputShapeIndex) { + if (shape[shapeIndex] !== inputShape[inputShapeIndex] && shape[shapeIndex] !== 1 && inputShape[inputShapeIndex] !== 1) { + throw new Error("Expand requires shape to be broadcastable to input"); + } + } + }; + getAdjustedShape = (shape1, shape2) => { + const diff = shape1.length - shape2.length; + const shape = []; + for (let i = 0; i < diff; ++i) { + shape.push(shape1[i]); + } + for (let i = 0; i < shape2.length; ++i) { + shape.push(shape2[i] === 1 ? shape1[i + diff] : shape2[i]); + } + return shape; + }; + calculateOutputShape2 = (inputShape, shape) => inputShape.length > shape.length ? getAdjustedShape(inputShape, shape) : getAdjustedShape(shape, inputShape); + createExpandProgramInfo = (inputs) => { + const inputShape = inputs[0].dims; + const shape = Array.from(inputs[1].getBigInt64Array(), Number); + const outputShape = calculateOutputShape2(inputShape, shape); + const dataType = inputs[0].dataType; + const components = dataType === 9 /* bool */ ? 4 : 1; + const outputSize = Math.ceil(ShapeUtil.size(outputShape) / components); + const getShaderSource = (shaderHelper) => { + const input = inputVariable("input", dataType, inputShape.length, components); + const output = outputVariable("output", dataType, outputShape.length, components); + let assignment; + if (dataType === 9 /* bool */) { + const singleAssignment = (resStr, x, typeCast = "") => ` + let outputIndices${x} = ${output.offsetToIndices(`outputOffset + ${x}u`)}; + let offset${x} = ${input.broadcastedIndicesToOffset(`outputIndices${x}`, output)}; + let index${x} = offset${x} / 4u; + let component${x} = offset${x} % 4u; + ${resStr}[${x}] = ${typeCast}(${input.getByOffset(`index${x}`)}[component${x}]); + `; + assignment = ` + let outputOffset = global_idx * ${components}; + var data = vec4(0); + ${singleAssignment("data", 0, "u32")} + ${singleAssignment("data", 1, "u32")} + ${singleAssignment("data", 2, "u32")} + ${singleAssignment("data", 3, "u32")} + ${output.setByOffset("global_idx", "data")} + }`; + } else { + assignment = ` + let outputIndices = ${output.offsetToIndices("global_idx")}; + let inputOffset = ${input.broadcastedIndicesToOffset("outputIndices", output)}; + ${output.setByOffset("global_idx", input.getByOffset("inputOffset"))} + }`; + } + return ` + ${shaderHelper.registerUniform("vec_size", "u32").declareVariables(input, output)} + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.vec_size")} + ${assignment}`; + }; + const programUniforms = [{ type: 12 /* uint32 */, data: outputSize }, ...createTensorShapeVariables(inputShape, outputShape)]; + return { + name: "Expand", + shaderCache: { hint: `${outputShape.length}`, inputDependencies: ["rank"] }, + getShaderSource, + getRunData: () => ({ + outputs: [{ dims: outputShape, dataType: inputs[0].dataType }], + dispatchGroup: { x: Math.ceil( + outputSize / 64 + /* workgroup size */ + ) }, + programUniforms + }) + }; + }; + expand = (context) => { + validateInputs12(context.inputs); + context.compute(createExpandProgramInfo(context.inputs), { inputs: [0] }); + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/fast-gelu.ts +var createFastGeluProgramInfo, fastGelu2; +var init_fast_gelu = __esm({ + "web/lib/wasm/jsep/webgpu/ops/fast-gelu.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_common(); + init_unary_op(); + createFastGeluProgramInfo = (inputTensors) => { + const dataType = inputTensors[0].dataType; + const outputSize = ShapeUtil.size(inputTensors[0].dims); + const biasLength = ShapeUtil.size(inputTensors[1].dims); + const useVec4 = biasLength % 4 === 0; + const getShaderSource = (shaderHelper) => { + const x = inputVariable("x", dataType, [1], 4); + const bias = inputVariable("bias", dataType, [1], 4); + const y = outputVariable("y", dataType, [1], 4); + const uniforms = [{ name: "output_vec_size", type: "u32" }, { name: "bias_size", type: "u32" }]; + const singleElementBias = (i) => ` + let bias${i}_offset: u32 = (global_idx * 4 + ${i}) % uniforms.bias_size; + let bias${i} = ${bias.getByOffset(`bias${i}_offset / 4`)}[bias${i}_offset % 4];`; + const biasGetExpression = useVec4 ? ` + let bias = ${bias.getByOffset("global_idx % (uniforms.bias_size / 4)")};` : `${singleElementBias(0)}${singleElementBias(1)}${singleElementBias(2)}${singleElementBias(3)} + let bias = ${x.type.value}(bias0, bias1, bias2, bias3);`; + return `${shaderHelper.registerUniforms(uniforms).declareVariables(x, bias, y)} + + ${fastGeluImpl(tensorTypeToWsglValueType(dataType))} + + ${shaderHelper.mainStart(WORKGROUP_SIZE)} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_vec_size")} + + let x = ${x.getByOffset("global_idx")}; + ${biasGetExpression} + let x_in = x + bias; + ${y.setByOffset("global_idx", fastGeluExpression("x_in"))} + }`; + }; + return { + name: "FastGeluWithBias", + shaderCache: { hint: `${useVec4}`, inputDependencies: ["type", "type"] }, + getShaderSource, + getRunData: (inputs) => ({ + outputs: [{ dims: inputs[0].dims, dataType: inputs[0].dataType }], + programUniforms: [{ type: 12 /* uint32 */, data: Math.ceil(outputSize / 4) }, { type: 12 /* uint32 */, data: biasLength }], + dispatchGroup: { x: Math.ceil(outputSize / WORKGROUP_SIZE / 4) } + }) + }; + }; + fastGelu2 = (context) => { + if (context.inputs.length < 2 || ShapeUtil.size(context.inputs[1].dims) === 0) { + fastGelu(context); + } else { + context.compute(createFastGeluProgramInfo(context.inputs)); + } + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/gather.ts +var validateInputs13, createGatherProgramInfo, parseGatherAttributes, gather; +var init_gather = __esm({ + "web/lib/wasm/jsep/webgpu/ops/gather.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_attribute_with_cache_key(); + init_common(); + validateInputs13 = (inputs) => { + if (!inputs || inputs.length !== 2) { + throw new Error("Gather requires 2 inputs."); + } + }; + createGatherProgramInfo = (inputs, attributes) => { + const inputShape = inputs[0].dims; + const indicesShape = inputs[1].dims; + const inputRank = inputShape.length; + const axis = ShapeUtil.normalizeAxis(attributes.axis, inputRank); + const outputShape = inputShape.slice(0); + outputShape.splice(axis, 1, ...indicesShape); + const axisDimLimit = inputShape[axis]; + const components = inputs[0].dataType === 9 /* bool */ ? 4 : 1; + const outputSize = Math.ceil(ShapeUtil.size(outputShape) / components); + const programUniforms = [ + { type: 12 /* uint32 */, data: outputSize }, + { type: 6 /* int32 */, data: axisDimLimit }, + { type: 12 /* uint32 */, data: axis }, + ...createTensorShapeVariables(inputs[0].dims, inputs[1].dims, outputShape) + ]; + const getShaderSource = (shaderHelper) => { + const data = inputVariable("data", inputs[0].dataType, inputs[0].dims.length, components); + const indices = inputVariable("inputIndices", inputs[1].dataType, inputs[1].dims.length); + const output = outputVariable("output", inputs[0].dataType, outputShape.length, components); + const calcDataIndices = (x) => { + const indicesRank = indicesShape.length; + let calcStr = `var indicesIndices${x} = ${indices.type.indices}(0);`; + for (let i = 0; i < indicesRank; i++) { + calcStr += `${indicesRank > 1 ? `indicesIndices${x}[${i}]` : `indicesIndices${x}`} = ${outputShape.length > 1 ? `outputIndices${x}[uniforms.axis + ${i}]` : `outputIndices${x}`};`; + } + calcStr += ` + var idx${x} = ${indices.getByIndices(`indicesIndices${x}`)}; + if (idx${x} < 0) { + idx${x} = idx${x} + uniforms.axisDimLimit; + } + var dataIndices${x} : ${data.type.indices}; + `; + for (let i = 0, j = 0; i < inputRank; i++) { + if (i === axis) { + calcStr += `${inputRank > 1 ? `dataIndices${x}[${i}]` : `dataIndices${x}`} = u32(idx${x});`; + j += indicesRank; + } else { + calcStr += `${inputRank > 1 ? `dataIndices${x}[${i}]` : `dataIndices${x}`} = ${outputShape.length > 1 ? `outputIndices${x}[${j}]` : `outputIndices${x}`};`; + j++; + } + } + return calcStr; + }; + let assignment; + if (inputs[0].dataType === 9 /* bool */) { + const singleAssignment = (resStr, x, typeCast = "") => ` + let outputIndices${x} = ${output.offsetToIndices(`outputOffset + ${x}u`)}; + ${calcDataIndices(x)}; + let offset${x} = ${data.indicesToOffset(`dataIndices${x}`)}; + let index${x} = offset${x} / 4u; + let component${x} = offset${x} % 4u; + ${resStr}[${x}] = ${typeCast}(${data.getByOffset(`index${x}`)}[component${x}]); + `; + assignment = ` + let outputOffset = global_idx * ${components}; + var value = vec4(0); + ${singleAssignment("value", 0, "u32")} + ${singleAssignment("value", 1, "u32")} + ${singleAssignment("value", 2, "u32")} + ${singleAssignment("value", 3, "u32")} + ${output.setByOffset("global_idx", "value")} + `; + } else { + assignment = ` + let outputIndices = ${output.offsetToIndices("global_idx")}; + ${calcDataIndices("")}; + let value = ${data.getByIndices("dataIndices")}; + ${output.setByOffset("global_idx", "value")}; + `; + } + return ` + ${shaderHelper.registerUniform("outputSize", "u32").registerUniform("axisDimLimit", "i32").registerUniform("axis", "u32").declareVariables(data, indices, output)} + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")} + ${assignment} + }`; + }; + return { + name: "Gather", + shaderCache: { hint: attributes.cacheKey, inputDependencies: ["rank", "rank"] }, + getRunData: () => ({ + outputs: [ + { dims: outputShape, dataType: inputs[0].dataType } + ], + dispatchGroup: { x: Math.ceil( + outputSize / 64 + /* workgroup size */ + ) }, + programUniforms + }), + getShaderSource + }; + }; + parseGatherAttributes = (attributes) => createAttributeWithCacheKey({ axis: attributes.axis }); + gather = (context, attributes) => { + const inputs = context.inputs; + validateInputs13(inputs); + context.compute(createGatherProgramInfo(context.inputs, attributes)); + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/gather-elements.ts +var validateInputs14, createGatherElementsProgramInfo, parseGatherElementsAttributes, gatherElements; +var init_gather_elements = __esm({ + "web/lib/wasm/jsep/webgpu/ops/gather-elements.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_attribute_with_cache_key(); + init_common(); + validateInputs14 = (inputs) => { + if (!inputs || inputs.length !== 2) { + throw new Error("GatherElements requires 2 inputs."); + } + if (inputs[0].dims.length < 1) { + throw new Error("GatherElements requires that the data input be rank >= 1."); + } + if (inputs[0].dims.length !== inputs[1].dims.length) { + throw new Error(`GatherElements requires that the data input and + indices input tensors be of same rank.`); + } + }; + createGatherElementsProgramInfo = (inputs, attributes) => { + const inputShape = inputs[0].dims; + const inputOutputDataType = inputs[0].dataType; + const inputRank = inputShape.length; + const indicesShape = inputs[1].dims; + const indicesDataType = inputs[1].dataType; + const axis = ShapeUtil.normalizeAxis(attributes.axis, inputRank); + const axisDimLimit = inputShape[axis]; + const outputShape = indicesShape.slice(0); + const outputSize = ShapeUtil.size(outputShape); + const input = inputVariable("input", inputOutputDataType, inputRank); + const indices = inputVariable("indicesInput", indicesDataType, indicesShape.length); + const output = outputVariable("output", inputOutputDataType, outputShape.length); + const programUniforms = [ + { type: 12 /* uint32 */, data: outputSize }, + { type: 6 /* int32 */, data: axisDimLimit }, + { type: 12 /* uint32 */, data: axis } + ]; + programUniforms.push(...createTensorShapeVariables(inputShape, indicesShape, outputShape)); + const inputDependencies = ["rank", "rank"]; + const getShaderSource = (shaderHelper) => ` + ${shaderHelper.registerUniform("outputSize", "u32").registerUniform("axisDimLimit", "i32").registerUniform("axis", "u32").declareVariables(input, indices, output)} + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")} + + let outputIndices = ${output.offsetToIndices("global_idx")}; + + var idx = ${indices.getByOffset("global_idx")}; + if (idx < 0) { + idx = idx + uniforms.axisDimLimit; + } + var inputIndices = ${input.type.indices}(outputIndices); + ${input.indicesSet("inputIndices", "uniforms.axis", "u32(idx)")}; + let value = ${input.getByIndices("inputIndices")}; + + ${output.setByOffset("global_idx", "value")}; + }`; + return { + name: "GatherElements", + shaderCache: { inputDependencies }, + getRunData: () => ({ + outputs: [{ dims: outputShape, dataType: inputs[0].dataType }], + dispatchGroup: { x: Math.ceil( + outputSize / 64 + /* workgroup size */ + ) }, + programUniforms + }), + getShaderSource + }; + }; + parseGatherElementsAttributes = (attributes) => createAttributeWithCacheKey({ axis: attributes.axis }); + gatherElements = (context, attributes) => { + const inputs = context.inputs; + validateInputs14(inputs); + context.compute(createGatherElementsProgramInfo(context.inputs, attributes)); + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/gemm.ts +var validateInputs15, createGemmProgramInfo, parseGemmAttributes, gemm; +var init_gemm = __esm({ + "web/lib/wasm/jsep/webgpu/ops/gemm.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_common(); + validateInputs15 = (inputs) => { + if (!inputs) { + throw new Error("Input is missing"); + } + if (inputs.length < 2 || inputs.length > 3) { + throw new Error("Invaid input number."); + } + if (inputs.length === 3 && inputs[2].dims.length > 2) { + throw new Error("Invalid input shape of C"); + } + if (inputs[0].dataType !== inputs[1].dataType || inputs.length === 3 && inputs[0].dataType !== inputs[2].dataType) { + throw new Error("Input types are mismatched"); + } + }; + createGemmProgramInfo = (inputs, attributes) => { + const aShape = inputs[0].dims.slice(); + const bShape = inputs[1].dims.slice(); + const [M, N, K] = GemmUtil.getShapeOfGemmResult( + aShape, + attributes.transA, + bShape, + attributes.transB, + inputs.length === 3 ? inputs[2].dims : void 0 + ); + const outputShape = [M, N]; + if (!outputShape) { + throw new Error("Can't use gemm on the given tensors"); + } + const outputSize = ShapeUtil.size(outputShape); + const programUniforms = [ + { type: 12 /* uint32 */, data: outputSize }, + { type: 12 /* uint32 */, data: M }, + { type: 12 /* uint32 */, data: N }, + { type: 12 /* uint32 */, data: K }, + { type: 1 /* float */, data: attributes.alpha }, + { type: 1 /* float */, data: attributes.beta } + ]; + const inputDependencies = ["type", "type"]; + if (inputs.length === 3) { + programUniforms.push(...createTensorShapeVariables(inputs[2].dims)); + inputDependencies.push("rank"); + } + programUniforms.push(...createTensorShapeVariables(outputShape)); + const getShaderSource = (shaderHelper) => { + let line = ""; + if (attributes.transA && attributes.transB) { + line = "value += a[k * uniforms.M + m] * b[n * uniforms.K + k];"; + } else if (attributes.transA && !attributes.transB) { + line = "value += a[k * uniforms.M + m] * b[k * uniforms.N + n];"; + } else if (!attributes.transA && attributes.transB) { + line = "value += a[m * uniforms.K + k] * b[n * uniforms.K + k];"; + } else if (!attributes.transA && !attributes.transB) { + line = "value += a[m * uniforms.K + k] * b[k * uniforms.N + n];"; + } + const calculateAlpha = attributes.alpha === 1 ? "" : "value *= uniforms.alpha;"; + const a = inputVariable("a", inputs[0].dataType, inputs[0].dims); + const b = inputVariable("b", inputs[1].dataType, inputs[1].dims); + const dataType = a.type.value; + let c = null; + const variables = [a, b]; + if (inputs.length === 3) { + c = inputVariable("c", inputs[2].dataType, inputs[2].dims.length); + variables.push(c); + } + const output = outputVariable("output", inputs[0].dataType, outputShape.length); + variables.push(output); + const uniforms = [ + { name: "output_size", type: "u32" }, + { name: "M", type: "u32" }, + { name: "N", type: "u32" }, + { name: "K", type: "u32" }, + { name: "alpha", type: "f32" }, + { name: "beta", type: "f32" } + ]; + return ` + ${shaderHelper.registerUniforms(uniforms).declareVariables(...variables)} + + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + + let m = global_idx / uniforms.N; + let n = global_idx % uniforms.N; + + var value = ${dataType}(0); + for (var k: u32 = 0u; k < uniforms.K; k++) { + ${line} + } + + ${calculateAlpha} + ${(() => { + if (c != null) { + return `let cOffset = ${c.broadcastedIndicesToOffset("vec2(m, n)", output)}; value += ${dataType}(uniforms.beta) * ${c.getByOffset("cOffset")};`; + } + return ""; + })()} + output[global_idx] = value; + }`; + }; + return { + name: "Gemm", + shaderCache: { hint: `${attributes.cacheKey}`, inputDependencies }, + getRunData: () => ({ + outputs: [{ dims: outputShape, dataType: inputs[0].dataType }], + dispatchGroup: { x: Math.ceil( + outputSize / 64 + /* workgroup size */ + ) }, + programUniforms + }), + getShaderSource + }; + }; + parseGemmAttributes = (attributes) => { + const transA = attributes.transA; + const transB = attributes.transB; + const alpha = attributes.alpha; + const beta = attributes.beta; + return { transA, transB, alpha, beta, cacheKey: `${attributes.transA};${attributes.transB};${attributes.alpha === 1}` }; + }; + gemm = (context, attributes) => { + validateInputs15(context.inputs); + context.compute(createGemmProgramInfo(context.inputs, attributes)); + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/instance-norm.ts +var createInstanceNormProgramInfo, computeMean, createInstanceNormNHWCProgramInfo, instanceNorm; +var init_instance_norm = __esm({ + "web/lib/wasm/jsep/webgpu/ops/instance-norm.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_common(); + createInstanceNormProgramInfo = (inputs, attributes) => { + const xShape = inputs[0].dims; + const outputShape = xShape; + const axis = 2; + const normCount = ShapeUtil.sizeToDimension(xShape, axis); + const normSize = ShapeUtil.sizeFromDimension(xShape, axis); + const components = getMaxComponents(normSize); + const normPackedSize = normSize / components; + const inputShape = [xShape[0], xShape[1], normPackedSize]; + const inputDependencies = ["rank", "type", "type"]; + const programUniforms = [{ type: 12 /* uint32 */, data: normSize }, { type: 12 /* uint32 */, data: normPackedSize }]; + programUniforms.push(...createTensorShapeVariables(inputShape, inputShape)); + const getShaderSource = (shaderHelper) => { + const x = inputVariable("x", inputs[0].dataType, inputShape.length, components); + const scale = inputVariable("scale", inputs[1].dataType, inputs[1].dims); + const bias = inputVariable("bias", inputs[2].dataType, inputs[2].dims); + const output = outputVariable("output", inputs[0].dataType, inputShape.length, components); + const variables = [x, scale, bias, output]; + const dataType = x.type.value; + const f32Type = components === 1 ? "f32" : `vec${components}`; + const workgroupSize = 64; + const uniforms = [{ name: "normSize", type: "u32" }, { name: "normPackedSize", type: "u32" }]; + return ` + var meanShared : f32; + var squaredNormShared : f32; + var workgroupShared : array<${f32Type}, ${workgroupSize}>; + const workgroupSize = ${workgroupSize}u; + ${shaderHelper.registerUniforms(uniforms).declareVariables(...variables)} + ${shaderHelper.mainStart(workgroupSize)} + let norm = global_idx / workgroupSize; + let batch = norm / uniforms.x_shape[1]; + let channel = norm % uniforms.x_shape[1]; + let localIndex = local_id.x; + + // initialize workgroup memory + var initial = ${f32Type}(0); + for (var h = localIndex; h < uniforms.normPackedSize; h += workgroupSize) { + initial = initial + ${f32Type}(${x.get("batch", "channel", "h")}); + } + workgroupShared[localIndex] = initial; + workgroupBarrier(); + + // Calculate the mean of current channel data. + for (var currSize = workgroupSize >> 1; currSize > 0; currSize = currSize >> 1) { + if (localIndex < currSize) { + workgroupShared[localIndex] = workgroupShared[localIndex] + workgroupShared[localIndex + currSize]; + } + workgroupBarrier(); + } + if (localIndex == 0) { + meanShared = ${sumVector("workgroupShared[0]", components)} / f32(uniforms.normSize); + } + workgroupBarrier(); + + // reinitialize workgroup memory. + initial = ${f32Type}(0); + for (var h = localIndex; h < uniforms.normPackedSize; h += workgroupSize) { + let deviation = ${f32Type}(${x.get("batch", "channel", "h")}) - ${f32Type}(meanShared); + initial = initial + deviation * deviation; + } + workgroupShared[localIndex] = initial; + workgroupBarrier(); + + // Calculate the sum of square of deviation of current channel data. + for (var currSize = workgroupSize >> 1; currSize > 0; currSize = currSize >> 1) { + if (localIndex < currSize) { + workgroupShared[localIndex] = workgroupShared[localIndex] + workgroupShared[localIndex + currSize]; + } + workgroupBarrier(); + } + if (localIndex == 0) { + squaredNormShared = ${sumVector("workgroupShared[0]", components)}; + } + workgroupBarrier(); + + let invStdDev = inverseSqrt(squaredNormShared / f32(uniforms.normSize) + f32(${attributes.epsilon})); + let channelScale = invStdDev * f32(${scale.getByOffset("channel")}); + let channelShift = f32(${bias.getByOffset("channel")}) - meanShared * channelScale; + for (var h = localIndex; h < uniforms.normPackedSize; h += workgroupSize) { + let value = ${x.get("batch", "channel", "h")} * ${dataType}(${f32Type}(channelScale)) + ${dataType}(${f32Type}(channelShift)); + ${output.set("batch", "channel", "h", "value")}; + } + }`; + }; + return { + ...{ name: "InstanceNormalization" }, + // TODO: use epsilon as uniform. Currently epsilon as uniform fails test_instancenorm_epsilon. + shaderCache: { hint: `${attributes.epsilon};${components}`, inputDependencies }, + getRunData: () => ({ + outputs: [ + { dims: outputShape, dataType: inputs[0].dataType } + ], + dispatchGroup: { x: normCount }, + programUniforms + }), + getShaderSource + }; + }; + computeMean = (context, input, scale, bias, n, h, c, epsilon) => { + const components = getMaxComponents(c); + const WG = 64; + const outputType = components === 1 ? "vec2f" : `mat2x${components}f`; + const sumCastType = components === 1 ? "f32" : `vec${components}f`; + const setOutputValue = (var1, var2) => `${outputType}(${var1}, ${var2})`; + const unitsOfWork = n * c / components; + const wgSize = Math.ceil(h / WG); + const meanInputDependencies = ["type"]; + const meanProgramUniforms = [ + { type: 12 /* uint32 */, data: wgSize }, + { type: 12 /* uint32 */, data: h }, + { type: 12 /* uint32 */, data: Math.floor(c / components) }, + { type: 12 /* uint32 */, data: Math.floor(h * c / components) } + ]; + const getMeanShaderSource = (shaderHelper) => { + const inputHelper = inputVariable("input", input.dataType, input.dims, components); + return ` + ${shaderHelper.declareVariables(inputHelper)} + @group(0) @binding(1) var output : array<${outputType}>; + struct Uniforms {wg_size:u32, H:u32, C:u32, image_size:u32}; + @group(0) @binding(2) var uniforms: Uniforms; + + ${shaderHelper.mainStart(WG)} + let currentImageNumber = global_idx / ${WG} / uniforms.C; + let currentChannelNumber = (global_idx / ${WG}) % uniforms.C; + let wgOffset = local_id.x * uniforms.wg_size; + if (wgOffset >= uniforms.H) { + return; + } + let wgMax = min(wgOffset + uniforms.wg_size, uniforms.H); + + let offset = currentImageNumber * uniforms.image_size + currentChannelNumber; + var sum = ${fillVector("f32", components)}; + var squaredSum = ${fillVector("f32", components)}; + for (var i: u32 = wgOffset; i < wgMax; i++) { + let value = ${sumCastType}(input[offset + i * uniforms.C]); + sum += value; + squaredSum += value * value; + } + output[global_idx] = ${setOutputValue("sum", "squaredSum")}; + }`; + }; + const meanValues = context.compute( + { + name: "InstanceNormComputeMean", + shaderCache: { hint: `${components}`, inputDependencies: meanInputDependencies }, + getRunData: () => ({ + outputs: [ + { dims: [n, c, WG, 2], dataType: 1 /* float */ } + ], + dispatchGroup: { x: n * c / components }, + programUniforms: meanProgramUniforms + }), + getShaderSource: getMeanShaderSource + }, + { inputs: [input], outputs: [-1] } + )[0]; + const programUniforms = [ + { type: 12 /* uint32 */, data: unitsOfWork }, + { type: 12 /* uint32 */, data: h }, + { type: 12 /* uint32 */, data: Math.floor(c / components) }, + { type: 12 /* uint32 */, data: Math.floor(WG * c / components) } + ]; + const inputDependencies = ["type", "type", "type"]; + const getShaderSource = (shaderHelper) => { + const scaleHelper = inputVariable("scale", scale.dataType, scale.dims, components); + const biasHelper = inputVariable("bias", bias.dataType, bias.dims, components); + return ` + @group(0) @binding(0) var input : array<${outputType}>; + @group(0) @binding(1) var scale : array<${scaleHelper.type.storage}>; + @group(0) @binding(2) var bias : array<${biasHelper.type.storage}>; + @group(0) @binding(3) var output : array<${outputType}>; + struct Uniforms {units_of_work : u32, H: u32, C : u32, image_size : u32}; + @group(0) @binding(4) var uniforms: Uniforms; + + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.units_of_work")} + let currentImageNumber = global_idx / uniforms.C; + let currentChannelNumber = global_idx % uniforms.C; + + let offset = currentImageNumber * uniforms.image_size; + var sum = ${fillVector("f32", components)}; + var squaredSum = ${fillVector("f32", components)}; + for (var i: u32 = 0; i < min(${WG}, uniforms.H); i++) { + let value = input[offset + i + currentChannelNumber * ${WG}]; + sum += value[0]; + squaredSum += value[1]; + } + sum = sum / f32(uniforms.H); + squaredSum = squaredSum / f32(uniforms.H); + let invStdDev = inverseSqrt(squaredSum - sum * sum + f32(${epsilon})); + let channelScale = invStdDev * ${sumCastType}(scale[currentChannelNumber]); + let channelShift = ${sumCastType}(bias[currentChannelNumber]) - sum * channelScale; + + output[global_idx] = ${setOutputValue("channelScale", "channelShift")}; + }`; + }; + return context.compute( + { + name: "InstanceNormComputeChannelScaleShift", + // TODO: use epsilon as uniform. Currently epsilon as uniform fails test_instancenorm_epsilon. + shaderCache: { hint: `${components};${epsilon}`, inputDependencies }, + getRunData: () => ({ + outputs: [ + { dims: [n, c, 2], dataType: 1 /* float */ } + ], + dispatchGroup: { x: Math.ceil( + unitsOfWork / 64 + /* workgroup size */ + ) }, + programUniforms + }), + getShaderSource + }, + { inputs: [meanValues, scale, bias], outputs: [-1] } + )[0]; + }; + createInstanceNormNHWCProgramInfo = (context, inputs, attributes) => { + const xShape = inputs[0].dims; + const outputShape = xShape; + const N = xShape[0]; + const C = xShape[xShape.length - 1]; + const H = ShapeUtil.sizeFromDimension(xShape, 1) / C; + const components = getMaxComponents(C); + const outputSize = ShapeUtil.size(outputShape) / components; + const programUniforms = [{ type: 12 /* uint32 */, data: H }, { type: 12 /* uint32 */, data: Math.floor(C / components) }]; + const inputDependencies = ["type", "type"]; + const channelScaleShift = computeMean(context, inputs[0], inputs[1], inputs[2], N, H, C, attributes.epsilon); + const getShaderSource = (shaderHelper) => { + const dataType = tensorTypeToWsglStorageType(inputs[0].dataType); + const scaleType = components === 1 ? "vec2f" : `mat2x${components}f`; + const scaleCastType = components === 1 ? dataType : `vec${components}<${dataType}>`; + const inputHelper = inputVariable("input", inputs[0].dataType, inputs[0].dims, components); + const outputHelper = outputVariable("output", inputs[0].dataType, outputShape, components); + return ` + @group(0) @binding(0) var input : array<${inputHelper.type.storage}>; + @group(0) @binding(1) var scaleInput : array<${scaleType}>; + @group(0) @binding(2) var output : array<${outputHelper.type.storage}>; + struct Uniforms {H: u32, C : u32}; + @group(0) @binding(3) var uniforms: Uniforms; + + ${shaderHelper.mainStart()} + let currentImageNumber = global_idx / (uniforms.C * uniforms.H); + let currentChannelNumber = global_idx % uniforms.C; + + let scaleOffset = currentImageNumber * uniforms.C + currentChannelNumber; + let scale = scaleInput[scaleOffset]; + output[global_idx] = fma(input[global_idx], ${scaleCastType}(scale[0]), ${scaleCastType}(scale[1])); + }`; + }; + context.compute( + { + name: "InstanceNormalizationNHWC", + shaderCache: { hint: `${components}`, inputDependencies }, + getRunData: () => ({ + outputs: [{ dims: outputShape, dataType: inputs[0].dataType }], + dispatchGroup: { x: Math.ceil( + outputSize / 64 + /* workgroup size */ + ) }, + programUniforms + }), + getShaderSource + }, + { inputs: [inputs[0], channelScaleShift] } + ); + }; + instanceNorm = (context, attributes) => { + if (attributes.format === "NHWC") { + createInstanceNormNHWCProgramInfo(context, context.inputs, attributes); + } else { + context.compute(createInstanceNormProgramInfo(context.inputs, attributes)); + } + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/layer-norm.ts +var validateInputs16, createLayerNormProgramInfo, layerNorm; +var init_layer_norm = __esm({ + "web/lib/wasm/jsep/webgpu/ops/layer-norm.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_common(); + validateInputs16 = (inputs) => { + if (!inputs || inputs.length < 2) { + throw new Error("layerNorm requires at least 2 inputs."); + } + }; + createLayerNormProgramInfo = (inputs, attributes, outputCount) => { + const simplified = attributes.simplified; + const xShape = inputs[0].dims; + const scale = inputs[1]; + const bias = !simplified && inputs[2]; + const outputShape = xShape; + const axis = ShapeUtil.normalizeAxis(attributes.axis, xShape.length); + const normCount = ShapeUtil.sizeToDimension(xShape, axis); + const normSize = ShapeUtil.sizeFromDimension(xShape, axis); + const scaleSize = ShapeUtil.size(scale.dims); + const biasSize = bias ? ShapeUtil.size(bias.dims) : 0; + if (scaleSize !== normSize || bias && biasSize !== normSize) { + throw new Error(`Size of X.shape()[axis:] == ${normSize}. + Size of scale and bias (if provided) must match this. + Got scale size of ${scaleSize} and bias size of ${biasSize}`); + } + const meanInvStdDevDim = []; + for (let i = 0; i < xShape.length; ++i) { + if (i < axis) { + meanInvStdDevDim.push(xShape[i]); + } else { + meanInvStdDevDim.push(1); + } + } + const components = getMaxComponents(normSize); + const inputDependencies = ["type", "type"]; + const programUniforms = [ + { type: 12 /* uint32 */, data: normCount }, + { type: 1 /* float */, data: normSize }, + { type: 12 /* uint32 */, data: Math.floor(normSize / components) }, + { type: 1 /* float */, data: attributes.epsilon } + ]; + if (bias) { + inputDependencies.push("type"); + } + const hasMeanDataOutput = outputCount > 1; + const hasInvStdOutput = outputCount > 2; + const getShaderSource = (shaderHelper) => { + const dataType = tensorTypeToWsglStorageType(inputs[0].dataType); + const variables = [ + inputVariable("x", inputs[0].dataType, inputs[0].dims, components), + inputVariable("scale", scale.dataType, scale.dims, components) + ]; + if (bias) { + variables.push(inputVariable("bias", bias.dataType, bias.dims, components)); + } + variables.push(outputVariable("output", inputs[0].dataType, outputShape, components)); + if (hasMeanDataOutput) { + variables.push(outputVariable("mean_data_output", 1 /* float */, meanInvStdDevDim)); + } + if (hasInvStdOutput) { + variables.push(outputVariable("inv_std_output", 1 /* float */, meanInvStdDevDim)); + } + const uniforms = [ + { name: "norm_count", type: "u32" }, + { name: "norm_size", type: "f32" }, + { name: "norm_size_vectorized", type: "u32" }, + { name: "epsilon", type: "f32" } + ]; + return ` + ${shaderHelper.registerUniforms(uniforms).declareVariables(...variables)} + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.norm_count")} + let offset = global_idx * uniforms.norm_size_vectorized; + var mean_vector = ${fillVector("f32", components)}; + var mean_square_vector = ${fillVector("f32", components)}; + + for (var h: u32 = 0u; h < uniforms.norm_size_vectorized; h++) { + let value = ${castToF32(dataType, components, "x[h + offset]")}; + mean_vector += value; + mean_square_vector += value * value; + } + let mean = ${sumVector("mean_vector", components)} / uniforms.norm_size; + let inv_std_dev = inverseSqrt(${sumVector("mean_square_vector", components)} / uniforms.norm_size ${simplified ? "" : "- mean * mean"} + uniforms.epsilon); + + for (var j: u32 = 0; j < uniforms.norm_size_vectorized; j++) { + let f32input = ${castToF32(dataType, components, "x[j + offset]")}; + let f32scale = ${castToF32(dataType, components, "scale[j]")}; + output[j + offset] = ${variables[0].type.value}((f32input ${simplified ? "" : "- mean"}) * inv_std_dev * f32scale + ${bias ? `+ ${castToF32(dataType, components, "bias[j]")}` : ""} + ); + } + + ${hasMeanDataOutput ? "mean_data_output[global_idx] = mean" : ""}; + ${hasInvStdOutput ? "inv_std_output[global_idx] = inv_std_dev" : ""}; + }`; + }; + const outputs = [{ dims: outputShape, dataType: inputs[0].dataType }]; + if (hasMeanDataOutput) { + outputs.push({ dims: meanInvStdDevDim, dataType: 1 /* float */ }); + } + if (hasInvStdOutput) { + outputs.push({ dims: meanInvStdDevDim, dataType: 1 /* float */ }); + } + return { + name: "LayerNormalization", + shaderCache: { hint: `${components};${outputCount};${simplified}`, inputDependencies }, + getRunData: () => ({ outputs, dispatchGroup: { x: Math.ceil( + normCount / 64 + /* workgroup size */ + ) }, programUniforms }), + getShaderSource + }; + }; + layerNorm = (context, attributes) => { + validateInputs16(context.inputs); + context.compute(createLayerNormProgramInfo(context.inputs, attributes, context.outputCount)); + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/matmulnbits.ts +var validateInputs17, createMatMulNBitsProgramInfo, matMulNBits, parseMatMulNBitsAttributes; +var init_matmulnbits = __esm({ + "web/lib/wasm/jsep/webgpu/ops/matmulnbits.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_attribute_with_cache_key(); + init_common(); + validateInputs17 = (inputs, attributes) => { + if (inputs.length < 3 || inputs.length > 4) { + throw new Error("MatMulNBits requires 3 or 4 inputs"); + } + const a = inputs[0]; + const aRank = a.dims.length; + if (a.dims[aRank - 1] !== attributes.k) { + throw new Error("The last dim of input shape does not match the k value"); + } + const nBlocksPerCol = Math.floor((attributes.k + attributes.blockSize - 1) / attributes.blockSize); + const blobSize = attributes.blockSize / 8 * attributes.bits; + const b = inputs[1]; + if (!ShapeUtil.areEqual(b.dims, [attributes.n, nBlocksPerCol, blobSize])) { + throw new Error("The second inputs must be 3D tensor with shape N X nBlocksPerCol X blobSize"); + } + const scales = inputs[2]; + const scalesShape = scales.dims; + if (ShapeUtil.size(scalesShape) !== attributes.n * nBlocksPerCol) { + throw new Error("scales input size error."); + } + if (inputs.length === 4) { + const zeroPoints = inputs[3]; + const zeroPointsShape = zeroPoints.dims; + const expectedZeroPointsSize = attributes.bits > 4 ? attributes.n * nBlocksPerCol : attributes.n * Math.floor((nBlocksPerCol + 1) / 2); + if (ShapeUtil.size(zeroPointsShape) !== expectedZeroPointsSize) { + throw new Error("zeroPoints input size error."); + } + } + }; + createMatMulNBitsProgramInfo = (inputs, attributes, maxComputeWorkgroupSizes, maxComputeWorkgroupStorageSize) => { + const inputShape = inputs[0].dims; + const aRank = inputShape.length; + const nBlocksPerCol = Math.floor((attributes.k + attributes.blockSize - 1) / attributes.blockSize); + const dimAOuter = inputShape[aRank - 2]; + const dimInner = attributes.k; + const dimBOuter = attributes.n; + const batchDims = inputShape.slice(0, aRank - 2); + const batchSize = ShapeUtil.size(batchDims); + const blobSize = attributes.blockSize / 8 * attributes.bits; + const blobSizeInWords = blobSize / 4; + const dataType = inputs[0].dataType; + const outputNumber = getMaxComponents(dimAOuter); + const aComponents = getMaxComponents(attributes.k); + const bComponents = getMaxComponents(blobSizeInWords); + const elementSize = getTensorElementSize(dataType); + const workgroupOutputSize = dimAOuter * nBlocksPerCol * elementSize; + const maxNumberOfComponents = Math.floor(maxComputeWorkgroupStorageSize / workgroupOutputSize); + const useBlockwiseMatMulNBits = nBlocksPerCol <= maxComputeWorkgroupSizes[0] && maxNumberOfComponents > 0; + const components = !useBlockwiseMatMulNBits || maxNumberOfComponents >= 4 ? getMaxComponents(dimBOuter) : maxNumberOfComponents >= 2 && getMaxComponents(dimBOuter) >= 2 ? 2 : 1; + const outputShape = batchDims.concat([dimAOuter, dimBOuter]); + const outputSize = ShapeUtil.size(outputShape) / components / outputNumber; + const programUniforms = useBlockwiseMatMulNBits ? [] : [{ type: 12 /* uint32 */, data: outputSize }, { type: 12 /* uint32 */, data: attributes.blockSize }]; + const inputShapeTemp = [batchSize, dimAOuter, dimInner / aComponents]; + const bShape = ShapeUtil.convertShape(inputs[1].dims).slice(); + bShape.splice(-1, 1, blobSizeInWords / bComponents); + programUniforms.push(...createTensorShapeVariables(inputShapeTemp)); + programUniforms.push(...createTensorShapeVariables(bShape)); + programUniforms.push(...createTensorShapeVariables(inputs[2].dims)); + if (inputs.length === 4) { + programUniforms.push(...createTensorShapeVariables(ShapeUtil.convertShape(inputs[3].dims))); + } + const outputShapeTemp = [batchSize, dimAOuter, dimBOuter / components]; + programUniforms.push(...createTensorShapeVariables(outputShapeTemp)); + const getShaderSource = (shaderHelper) => { + const inputRank = inputShapeTemp.length; + const a = inputVariable("a", inputs[0].dataType, inputRank, aComponents); + const b = inputVariable("b", 12 /* uint32 */, bShape.length, bComponents); + const scales = inputVariable("scales", inputs[2].dataType, inputs[2].dims.length); + const inputVariables = [a, b, scales]; + const zeroPoints = inputs.length === 4 ? inputVariable("zero_points", 12 /* uint32 */, inputs[3].dims.length) : void 0; + if (zeroPoints) { + inputVariables.push(zeroPoints); + } + const outputRank = outputShapeTemp.length; + const output = outputVariable("output", inputs[0].dataType, outputRank, components); + const uniforms = [{ name: "output_size", type: "u32" }, { name: "block_size", type: "u32" }]; + const dataType2 = tensorTypeToWsglStorageType(inputs[0].dataType); + const qDqDataType = (() => { + switch (aComponents) { + case 1: + return `array<${dataType2}, 8>`; + case 2: + return `mat4x2<${dataType2}>`; + case 4: + return `mat2x4<${dataType2}>`; + default: + throw new Error(`${aComponents}-component is not supported.`); + } + })(); + const processOneBlock = ` + for (var word: u32 = 0; word < ${blobSizeInWords}; word += ${bComponents}) { + ${b.indicesSet("b_indices", "2", "word")}; + let b_data = ${b.getByIndices("b_indices")}; + for (var i: u32 = 0; i < ${bComponents}; i++) { + let b_value: u32 = ${bComponents === 1 ? "b_data" : "b_data[word + i]"}; + let b_mask: u32 = 0x0F0F0F0Fu; + let b_value_lower: vec4 = unpack4xU8(b_value & b_mask); + let b_value_upper: vec4 = unpack4xU8((b_value >> 4) & b_mask); + let b_quantized_values = ${qDqDataType}(${Array.from({ length: 4 }, (_, i) => `${dataType2}(b_value_lower[${i}]), ${dataType2}(b_value_upper[${i}])`).join(", ")}); + let b_dequantized_values = ${(() => { + if (aComponents === 1) { + return `${qDqDataType}(${Array.from({ length: 8 }, (_, i) => `(b_quantized_values[${i}] - zero_point) * scale`).join(", ")});`; + } else { + return `(b_quantized_values - ${qDqDataType}(${Array(8).fill("zero_point").join(",")})) * scale;`; + } + })()}; + // Number of B elements per 32-bit word is 32/bits = 32/4 = 8 + for (var m: u32 = 0; m < ${useBlockwiseMatMulNBits ? dimAOuter : outputNumber}u; m++) { + ${a.indicesSet("a_indices", inputRank - 2, useBlockwiseMatMulNBits ? "m" : `row * ${outputNumber} + m`)}; + ${a.indicesSet("a_indices", inputRank - 1, "word_offset")}; + var input_offset = ${a.indicesToOffset("a_indices")}; + var a_data: ${qDqDataType}; + for (var j: u32 = 0; j < ${8 / aComponents}; j++) { + a_data[j] = ${a.getByOffset("input_offset")}; + input_offset++; + } + ${useBlockwiseMatMulNBits ? "workgroup_shared[workgroup_shared_offset + m]" : "output_values[m]"}${components > 1 ? "[c]" : ""} += ${Array.from( + { length: 8 / aComponents }, + (_, i) => `${aComponents === 1 ? `a_data[${i}] * b_dequantized_values[${i}]` : `dot(a_data[${i}], b_dequantized_values[${i}])`}` + ).join(" + ")}; + } + word_offset += ${8 / aComponents}; + } + }`; + const updateZeroPointIndex = zeroPoints ? ` + zero_point_offset += 4; + if (zero_point_offset == 32) { + zero_point_offset = 0; + zero_point_index++; + zero_point_word = ${zeroPoints.getByOffset("zero_point_index")}; + }` : ""; + return useBlockwiseMatMulNBits ? ` + var workgroup_shared: array<${output.type.value}, ${dimAOuter * nBlocksPerCol}>; + ${shaderHelper.declareVariables(...inputVariables, output)} + ${shaderHelper.mainStart([ + nBlocksPerCol, + 1, + 1 + ])} + var a_indices: ${a.type.indices}; + var block = local_id.x; + var col = workgroup_id.y; + var batch = workgroup_id.z; + ${a.indicesSet("a_indices", "0", "batch")}; + // Two zero points are packed into one byte when uniforms.bits is 4. + for (var c: u32 = 0; c < ${components}; c++) { + let col_times_components_plus_c = col * ${components} + c; + ${zeroPoints ? ` + var zero_point_bytes_per_col: u32 = (${nBlocksPerCol} + 1) / 2; + var zero_point_byte_count: u32 = col_times_components_plus_c * zero_point_bytes_per_col + (block >> 0x1u); + var zero_point_word_index: u32 = zero_point_byte_count >> 0x2u; + var zero_point_byte_offset: u32 = zero_point_byte_count & 0x3u; + var zero_point_nibble_offset: u32 = block & 0x1u; + var zero_point_bits_offset: u32 = (zero_point_byte_offset << 3) + (zero_point_nibble_offset << 2); + var zero_point_word: u32 = ${zeroPoints.getByOffset("zero_point_word_index")} >> zero_point_bits_offset;` : ""} + var b_indices: ${b.type.indices}; + ${b.indicesSet("b_indices", "0", "col_times_components_plus_c")}; + // The scale and zero points are computed per block. + var scales_index = col_times_components_plus_c * ${nBlocksPerCol} + block; + let scale = ${scales.getByOffset("scales_index")}; + // The default zero point is 8 for unsigned 4-bit quantization. + let zero_point = ${dataType2}(${zeroPoints ? "(zero_point_word) & 0xFu" : 8}); + ${b.indicesSet("b_indices", "1", "block")}; + var word_offset: u32 = block * ${attributes.blockSize / aComponents}; + var workgroup_shared_offset: u32 = block * ${dimAOuter}; + ${processOneBlock} + } + workgroupBarrier(); + if (local_id.x == 0u) { + var output_indices: ${output.type.indices}; + ${output.indicesSet("output_indices", "0", "batch")}; + ${output.indicesSet("output_indices", outputRank - 1, "col")}; + ${output.indicesSet("output_indices", outputRank - 2, "0")}; + var output_offset = ${output.indicesToOffset("output_indices")}; + for (var m: u32 = 0u; m < ${dimAOuter}u; m++) { + var output_value: ${output.type.value} = ${output.type.value}(0); + var workgroup_shared_offset: u32 = m; + for (var b: u32 = 0u; b < ${nBlocksPerCol}u; b++) { + output_value += workgroup_shared[workgroup_shared_offset]; + workgroup_shared_offset += ${dimAOuter}; + } + ${output.setByOffset("output_offset", "output_value")}; + output_offset += ${dimBOuter / components}; + } + } + }` : ` + ${shaderHelper.registerUniforms(uniforms).declareVariables(...inputVariables, output)} + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + var output_values: array<${output.type.value}, ${outputNumber}>; + var output_indices = ${output.offsetToIndices("global_idx")}; + var col = ${output.indicesGet("output_indices", outputRank - 1)}; + var row = ${output.indicesGet("output_indices", outputRank - 2)}; + var a_indices: ${a.type.indices} = output_indices; + // Two zero points are packed into one byte because uniforms.bits <= 4. + // zero_point_offset is either 0 or 4. It is bit offset within one byte. + // TODO support zero_point_offset for bits > 4 + ${zeroPoints ? ` + var zero_point_abs_offset = col * ${components} * ((${nBlocksPerCol} + 1) / 2); + var zero_point_index: u32 = zero_point_abs_offset / 4; + var zero_point_word: u32 = ${zeroPoints.getByOffset("zero_point_index")}; + var zero_point_offset: u32 = (zero_point_abs_offset % 4) * 8;` : ""} + var scale_index = col * ${nBlocksPerCol * components}; + var b_indices: ${b.type.indices}; + for (var c: u32 = 0; c < ${components}; c++) { + ${b.indicesSet("b_indices", "0", `col * ${components} + c`)}; + var block_offset: u32 = 0; + for (var block: u32 = 0; block < ${nBlocksPerCol}; block++) { + // The scale and zero points are computed per block. + let scale = ${scales.getByOffset("scale_index")}; + // The default zero point is 8 for unsigned 4-bit quantization. + let zero_point = ${dataType2}(${zeroPoints ? "extractBits(zero_point_word, zero_point_offset, 4)" : 8}); + ${b.indicesSet("b_indices", "1", "block")}; + var word_offset: u32 = block_offset; + ${processOneBlock} + scale_index++; + ${updateZeroPointIndex} + block_offset += uniforms.block_size / ${aComponents}; + } + // Drop the trailing 4 bits if the zero_poit_offset is not a byte boundary to align with the next byte. + ${zeroPoints ? `if (zero_point_offset % 8 > 0) { + ${updateZeroPointIndex} + }` : ""} + } + for (var k: u32 = 0u; k < ${outputNumber}u; k++) { + ${output.indicesSet("output_indices", outputRank - 2, `${outputNumber} * row + k`)}; + ${output.setByIndices("output_indices", "output_values[k]")} + } + }`; + }; + return { + name: useBlockwiseMatMulNBits ? "BlockwiseMatMulNBits" : "MatMulNBits", + shaderCache: { + hint: `${attributes.cacheKey};${dimAOuter};${dataType};${inputs.length}`, + inputDependencies: Array(inputs.length).fill("rank") + }, + getRunData: () => ({ + outputs: [{ dims: outputShape, dataType }], + name: useBlockwiseMatMulNBits ? "BlockwiseMatMulNBits" : "MatMulNBits", + dispatchGroup: useBlockwiseMatMulNBits ? { x: 1, y: Math.ceil(dimBOuter / components), z: batchSize } : { x: Math.ceil( + outputSize / 64 + /* workgroup size */ + ) }, + programUniforms + }), + getShaderSource + }; + }; + matMulNBits = (context, attributes) => { + validateInputs17(context.inputs, attributes); + const maxComputeWorkgroupSizes = context.getMaxComputeWorkgroupSizes(); + const maxComputeWorkgroupStorageSize = context.getMaxComputeWorkgroupStoragesize(); + context.compute(createMatMulNBitsProgramInfo( + context.inputs, + attributes, + maxComputeWorkgroupSizes, + maxComputeWorkgroupStorageSize + )); + }; + parseMatMulNBitsAttributes = (attributes) => createAttributeWithCacheKey(attributes); + } +}); + +// web/lib/wasm/jsep/webgpu/ops/multihead-attentiion.ts +var getInput, validateInputs18, parseMultiHeadAttentionAttributes, weightTransposeAttribute2, addBiasTranspose, maybeTransposeToBNSHAndAddBias, multiHeadAttention; +var init_multihead_attentiion = __esm({ + "web/lib/wasm/jsep/webgpu/ops/multihead-attentiion.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_attribute_with_cache_key(); + init_types(); + init_attention(); + init_common(); + init_transpose(); + getInput = (inputs, i) => inputs.length > i && inputs[i].dims.length > 0 && ShapeUtil.size(inputs[i].dims) > 0 ? inputs[i] : void 0; + validateInputs18 = (inputs, attributes) => { + const query = inputs[0]; + const key = getInput(inputs, 1); + const value = getInput(inputs, 2); + const bias = getInput(inputs, 3); + const keyPaddingMask = getInput(inputs, 4); + const relativePositionBias = getInput(inputs, 5); + const pastKey = getInput(inputs, 6); + const pastValue = getInput(inputs, 7); + if (query.dims.length !== 3 && query.dims.length !== 5) { + throw new Error("Input query is expected to have 3 or 5 dimensions"); + } + const dmmhaPacking = false; + const batchSize = query.dims[0]; + const sequenceLength = query.dims[1]; + const hiddenSize = query.dims.length === 3 ? dmmhaPacking ? query.dims[2] / 3 : query.dims[2] : attributes.numHeads * query.dims[4]; + let kvSequenceLength = sequenceLength; + let pastSequenceLength = 0; + let maxSequenceLength = 0; + const headSize = Math.floor(hiddenSize / attributes.numHeads); + if (pastKey && pastValue) { + if (pastKey.dims.length !== 4) { + throw new Error('Input "past_key" is expected to have 4 dimensions'); + } + if (pastKey.dims[0] !== batchSize || pastKey.dims[1] !== attributes.numHeads || pastKey.dims[3] !== headSize) { + throw new Error('Input "past_key" shape (batch_size, num_heads, past_sequence_length, head_size)'); + } + if (pastValue.dims[0] !== batchSize || pastValue.dims[1] !== attributes.numHeads || pastValue.dims[3] !== headSize) { + throw new Error('Input "past_value" shape (batch_size, num_heads, past_sequence_length, head_size)'); + } + if (pastKey.dims[2] !== pastValue.dims[2]) { + throw new Error('Input "past_key" and "past_value" shall have same dim 2 (past_sequence_length)'); + } + if (pastValue.dims.length !== 4) { + throw new Error('Input "past_value" is expected to have 4 dimensions'); + } + pastSequenceLength = pastKey.dims[2]; + maxSequenceLength = pastKey.dims[2]; + } else if (pastKey || pastValue) { + throw new Error('Input "past_key" and "past_value" shall be both present or both absent'); + } + let qkvFormat; + if (key) { + if (query.dims.length !== 3) { + throw new Error('Input "query" is expected to have 3 dimensions when key is given'); + } + if (key.dims.length < 3 || key.dims.length > 5) { + throw new Error('Input "key" is expected to have 3, 4, or 5 dimensions'); + } + if (query.dims[0] !== key.dims[0]) { + throw new Error('Input "query" and "key" shall have same dim 0 (batch size)'); + } + if (key.dims.length === 3) { + if (key.dims[2] !== query.dims[2]) { + throw new Error('Input "query" and "key" shall have same dim 2 (hidden_size)'); + } + qkvFormat = 2 /* qkvBSNH */; + kvSequenceLength = key.dims[1]; + } else if (key.dims.length === 5) { + if (key.dims[2] !== attributes.numHeads || key.dims[3] !== 2 || key.dims[4] !== headSize) { + throw new Error('Expect "key" shape (batch_size, kv_sequence_length, num_heads, 2, head_size) for packed kv'); + } + if (value) { + throw new Error('Expect "value" be none when "key" has packed kv format.'); + } + qkvFormat = 5 /* qKvBSNHxBSN2H */; + kvSequenceLength = key.dims[1]; + } else { + if (key.dims[1] !== attributes.numHeads || key.dims[3] !== headSize) { + throw new Error('Expect "key" shape (batch_size, num_heads, kv_sequence_length, head_size) for past_key'); + } + qkvFormat = 0 /* unknown */; + kvSequenceLength = key.dims[2]; + } + } else { + if (query.dims.length !== 3 && query.dims.length !== 5) { + throw new Error('Input "query" is expected to have 3 or 5 dimensions when key is empty'); + } + if (query.dims.length === 5 && (query.dims[2] !== attributes.numHeads || query.dims[3] !== 3)) { + throw new Error('Expect "query" shape (batch_size, kv_sequence_length, num_heads, 3, head_size) for packed kv'); + } + qkvFormat = 3 /* qkvBSN3H */; + } + if (bias) { + if (bias.dims.length !== 1) { + throw new Error('Input "bias" is expected to have 1 dimension'); + } + if (value) { + if (query.dims.length === 5 && query.dims[3] === 2) { + throw new Error("bias is not allowed for packed kv."); + } + } + } + let maskType = 0 /* none */; + if (keyPaddingMask) { + maskType = 8 /* maskUnknown */; + const maskDims = keyPaddingMask.dims; + if (maskDims.length === 1) { + if (maskDims[0] === batchSize) { + maskType = 1 /* mask1dKeySeqLen */; + } else if (maskDims[0] === 3 * batchSize + 2) { + maskType = 3 /* mask1DKeySeqLenStart */; + } + } else if (maskDims.length === 2 && maskDims[0] === batchSize && maskDims[1] === kvSequenceLength) { + maskType = 5 /* mask2dKeyPadding */; + } + if (maskType === 8 /* maskUnknown */) { + throw new Error('Input "key_padding_mask" shape shall be (batch_size) or (batch_size, kv_sequence_length)'); + } + throw new Error("Mask not supported"); + } + let passPastInKv = false; + let vHiddenSize = hiddenSize; + if (value) { + if (value.dims.length !== 3 && value.dims.length !== 4) { + throw new Error('Input "value" is expected to have 3 or 4 dimensions'); + } + if (query.dims[0] !== value.dims[0]) { + throw new Error('Input "query" and "value" shall have same dim 0 (batch_size)'); + } + if (value.dims.length === 3) { + if (kvSequenceLength !== value.dims[1]) { + throw new Error('Input "key" and "value" shall have the same dim 1 (kv_sequence_length)'); + } + vHiddenSize = value.dims[2]; + } else { + if (kvSequenceLength !== value.dims[2]) { + throw new Error('Input "past_key" and "past_value" shall have the same dim 2 (kv_sequence_length)'); + } + vHiddenSize = value.dims[1] * value.dims[3]; + passPastInKv = true; + } + } + const totalSequenceLength = pastSequenceLength + kvSequenceLength; + const broadcastResPosBias = false; + if (keyPaddingMask) { + throw new Error("Key padding mask is not supported"); + } + if (relativePositionBias) { + if (relativePositionBias.dims.length !== 4) { + throw new Error('Input "relative_position_bias" is expected to have 4 dimensions'); + } + if (relativePositionBias.dims[0] !== batchSize && relativePositionBias.dims[0] !== 1 || relativePositionBias.dims[1] !== attributes.numHeads || relativePositionBias.dims[2] !== sequenceLength || relativePositionBias.dims[3] !== totalSequenceLength) { + throw new Error('Input "relative_position_bias" shape (batch_size, 1, sequence_length, kv_sequence_length)'); + } + } + return { + batchSize, + sequenceLength, + pastSequenceLength, + kvSequenceLength, + totalSequenceLength, + maxSequenceLength, + inputHiddenSize: 0, + hiddenSize, + vHiddenSize, + headSize, + vHeadSize: Math.floor(vHiddenSize / attributes.numHeads), + numHeads: attributes.numHeads, + isUnidirectional: false, + pastPresentShareBuffer: false, + maskFilterValue: attributes.maskFilterValue, + maskType, + scale: attributes.scale, + broadcastResPosBias, + passPastInKv, + qkvFormat + }; + }; + parseMultiHeadAttentionAttributes = (attributes) => createAttributeWithCacheKey({ ...attributes }); + weightTransposeAttribute2 = createAttributeWithCacheKey({ perm: [0, 2, 1, 3] }); + addBiasTranspose = (context, qkv, bias, batchSize, sequenceLength, hiddenSize, biasOffset) => { + const outputShape = [batchSize, sequenceLength, hiddenSize]; + const outputSize = ShapeUtil.size(outputShape); + const programUniforms = [ + { type: 12 /* uint32 */, data: outputSize }, + { type: 12 /* uint32 */, data: biasOffset }, + { type: 12 /* uint32 */, data: hiddenSize } + ]; + const getShaderSource = (shaderHelper) => { + const output = outputVariable("qkv_with_bias", qkv.dataType, outputShape); + const qkvInput = inputVariable("qkv", qkv.dataType, outputShape); + const biasInput = inputVariable("bias", bias.dataType, outputShape); + const uniforms = [ + { name: "output_size", type: "u32" }, + { name: "bias_offset", type: "u32" }, + { name: "hidden_size", type: "u32" } + ]; + return ` + ${shaderHelper.registerUniforms(uniforms).declareVariables(qkvInput, biasInput, output)} + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + let bias_offset_idx = (global_idx % uniforms.hidden_size) + uniforms.bias_offset; + + qkv_with_bias[global_idx] = qkv[global_idx] + bias[bias_offset_idx]; + }`; + }; + return context.compute( + { + name: "MultiHeadAttentionAddBias", + shaderCache: { inputDependencies: ["type", "type"] }, + getRunData: () => ({ + outputs: [{ dims: outputShape, dataType: qkv.dataType, gpuDataType: 0 /* default */ }], + dispatchGroup: { x: Math.ceil( + outputSize / 64 + /* workgroup size */ + ) }, + programUniforms + }), + getShaderSource + }, + { inputs: [qkv, bias], outputs: [-1] } + )[0]; + }; + maybeTransposeToBNSHAndAddBias = (context, batchSize, numHeads, sequenceLength, headSize, input, bias, biasOffset) => { + let reshapedInput = input; + if (!bias) { + if (input.dims.length === 3) { + reshapedInput = input.reshape([batchSize, sequenceLength, numHeads, headSize]); + } + return context.compute( + createTransposeProgramInfo(reshapedInput, weightTransposeAttribute2.perm), + { inputs: [reshapedInput], outputs: [-1] } + )[0]; + } else { + if (sequenceLength === 1) { + throw new Error("AddBiasReshape is not implemented. Please export your model with packed QKV or KV"); + } else { + reshapedInput = addBiasTranspose(context, input, bias, batchSize, sequenceLength, numHeads * headSize, biasOffset); + reshapedInput = reshapedInput.reshape([batchSize, sequenceLength, numHeads, headSize]); + return context.compute( + createTransposeProgramInfo(reshapedInput, weightTransposeAttribute2.perm), + { inputs: [reshapedInput], outputs: [-1] } + )[0]; + } + } + }; + multiHeadAttention = (context, attributes) => { + const params = validateInputs18(context.inputs, attributes); + const query = context.inputs[0]; + const key = getInput(context.inputs, 1); + const value = getInput(context.inputs, 2); + const bias = getInput(context.inputs, 3); + const keyPaddingMask = getInput(context.inputs, 4); + const relativePositionBias = getInput(context.inputs, 5); + const pastKey = getInput(context.inputs, 6); + const pastValue = getInput(context.inputs, 7); + if (query.dims.length === 5) { + throw new Error("Packed QKV is not implemented"); + } + if (key?.dims.length === 5) { + throw new Error("Packed KV is not implemented"); + } + const kvBNSH = key && value && key.dims.length === 4 && value.dims.length === 4; + const Q = maybeTransposeToBNSHAndAddBias( + context, + params.batchSize, + params.numHeads, + params.sequenceLength, + params.headSize, + query, + bias, + 0 + ); + if (kvBNSH) { + return applyAttention( + context, + Q, + key, + value, + keyPaddingMask, + void 0, + pastKey, + pastValue, + relativePositionBias, + params, + attributes + ); + } + if (!key || !value) { + throw new Error("key and value must be provided"); + } + const K = maybeTransposeToBNSHAndAddBias( + context, + params.batchSize, + params.numHeads, + params.kvSequenceLength, + params.headSize, + key, + bias, + params.hiddenSize + ); + const V = maybeTransposeToBNSHAndAddBias( + context, + params.batchSize, + params.numHeads, + params.kvSequenceLength, + params.vHeadSize, + value, + bias, + 2 * params.hiddenSize + ); + applyAttention( + context, + Q, + K, + V, + keyPaddingMask, + void 0, + pastKey, + pastValue, + relativePositionBias, + params, + attributes + ); + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/pad.ts +var validateInputs19, getPadConstant, getPadReflect, getPadEdge, getPadWrap, getPadSnippet, createPadProgramInfo, createPadAttributesFromInputs, pad; +var init_pad = __esm({ + "web/lib/wasm/jsep/webgpu/ops/pad.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_common(); + validateInputs19 = (inputs) => { + if (!inputs || inputs.length < 1) { + throw new Error("Too few inputs"); + } + if (inputs[0].dataType !== 1 /* float */ && inputs[0].dataType !== 10 /* float16 */) { + throw new Error("Input type must be float or float16."); + } + if (inputs.length >= 2) { + let validPads = inputs[0].dims.length * 2 === inputs[1].dims[0]; + if (inputs.length === 4) { + validPads = inputs[3].dims[0] * 2 === inputs[1].dims[0]; + } + if (!validPads) { + throw new Error("The pads should be a 1D tensor of shape [2 * input_rank] or [2 * num_axes]."); + } + } + }; + getPadConstant = (output, inputRank, padsLength) => { + let block = ""; + for (let i = inputRank - 1; i >= 0; --i) { + block += ` + k = i32(${output.indicesGet("indices", i)}) - ${getElementAt("uniforms.pads", i, padsLength)}; + if (k < 0) { + break; + } + if (k >= i32(${getElementAt("uniforms.x_shape", i, inputRank)})) { + break; + } + offset += k * i32(${getElementAt("uniforms.x_strides", i, inputRank)}); + `; + } + return ` + value = ${output.type.value}(uniforms.constant_value); + for (var i = 0; i < 1; i++) { + var offset = 0; + var k = 0; + ${block} + value = x[offset]; + } + `; + }; + getPadReflect = (output, inputRank, padsLength) => { + let block = ""; + for (let i = inputRank - 1; i >= 0; --i) { + block += ` + k = i32(${output.indicesGet("indices", i)}) - ${getElementAt("uniforms.pads", i, padsLength)}; + if (k < 0) { + k = -k; + } + { + let _2n_1 = 2 * (i32(${getElementAt("uniforms.x_shape", i, inputRank)}) - 1); + k = k % _2n_1; + if(k >= i32(${getElementAt("uniforms.x_shape", i, inputRank)})) { + k = _2n_1 - k; + } + } + offset += k * i32(${getElementAt("uniforms.x_strides", i, inputRank)}); + `; + } + return ` + var offset = 0; + var k = 0; + ${block} + value = x[offset]; + `; + }; + getPadEdge = (output, inputRank, padsLength) => { + let block = ""; + for (let i = inputRank - 1; i >= 0; --i) { + block += ` + k = i32(${output.indicesGet("indices", i)}) - ${getElementAt("uniforms.pads", i, padsLength)}; + if (k < 0) { + k = 0; + } + if (k >= i32(${getElementAt("uniforms.x_shape", i, inputRank)})) { + k = i32(${getElementAt("uniforms.x_shape", i, inputRank)}) - 1; + } + offset += k * i32(${getElementAt("uniforms.x_strides", i, inputRank)}); + `; + } + return ` + var offset = 0; + var k = 0; + ${block} + value = x[offset]; + `; + }; + getPadWrap = (output, inputRank, padsLength) => { + let block = ""; + for (let i = inputRank - 1; i >= 0; --i) { + block += ` + k = i32(${output.indicesGet("indices", i)}) - ${getElementAt("uniforms.pads", i, padsLength)}; + if (k < 0) { + k += i32(${getElementAt("uniforms.x_shape", i, inputRank)}]); + } + if (k >= i32(${getElementAt("uniforms.x_shape", i, inputRank)})) { + k -= i32(${getElementAt("uniforms.x_shape", i, inputRank)}); + } + offset += k * i32(${getElementAt("uniforms.x_strides", i, inputRank)}); + `; + } + return ` + var offset = 0; + var k = 0; + ${block} + value = x[offset]; + `; + }; + getPadSnippet = (output, inputRank, attributes) => { + switch (attributes.mode) { + case 0: + return getPadConstant(output, inputRank, attributes.pads.length); + case 1: + return getPadReflect(output, inputRank, attributes.pads.length); + case 2: + return getPadEdge(output, inputRank, attributes.pads.length); + case 3: + return getPadWrap(output, inputRank, attributes.pads.length); + default: + throw new Error("Invalid mode"); + } + }; + createPadProgramInfo = (inputs, attributes) => { + const outputShape = ShapeUtil.padShape(inputs[0].dims.slice(), attributes.pads); + const inputDims = inputs[0].dims; + const outputSize = ShapeUtil.size(outputShape); + const programUniforms = [{ type: 12 /* uint32 */, data: outputSize }, { type: 6 /* int32 */, data: attributes.pads }]; + if (attributes.mode === 0) { + programUniforms.push({ type: inputs[0].dataType, data: attributes.value }); + } + programUniforms.push(...createTensorShapeVariables(inputs[0].dims, outputShape)); + const inputDependencies = ["rank"]; + const getShaderSource = (shaderHelper) => { + const output = outputVariable("output", inputs[0].dataType, outputShape.length); + const input = inputVariable("x", inputs[0].dataType, inputDims.length); + const dataType = input.type.value; + const padSnippet = getPadSnippet(output, inputDims.length, attributes); + const uniforms = [{ name: "output_size", type: "u32" }, { name: "pads", type: "i32", length: attributes.pads.length }]; + if (attributes.mode === 0) { + uniforms.push({ name: "constant_value", type: dataType }); + } + return ` + ${shaderHelper.registerUniforms(uniforms).declareVariables(input, output)} + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + + let indices = ${output.offsetToIndices("global_idx")}; + + var value = ${dataType}(0); + ${padSnippet} + output[global_idx] = value; + }`; + }; + return { + name: "Pad", + shaderCache: { hint: `${attributes.mode}`, inputDependencies }, + getRunData: () => ({ + outputs: [{ dims: outputShape, dataType: inputs[0].dataType }], + dispatchGroup: { x: Math.ceil( + ShapeUtil.size(outputShape) / 64 + /* workgroup size */ + ) }, + programUniforms + }), + getShaderSource + }; + }; + createPadAttributesFromInputs = (inputs, attributes) => { + if (inputs.length > 1) { + const bigInt64Pads = inputs[1].getBigInt64Array(); + const value = inputs.length >= 3 && inputs[2].data ? inputs[2].getFloat32Array()[0] : 0; + const inputRank = inputs[0].dims.length; + const updatePads = new Int32Array(2 * inputRank).fill(0); + if (inputs.length >= 4) { + const axes = inputs[3].getBigInt64Array(); + for (let i = 0; i < axes.length; i++) { + updatePads[Number(axes[i])] = Number(bigInt64Pads[i]); + updatePads[Number(axes[i]) + inputRank] = Number(bigInt64Pads[i + axes.length]); + } + } else { + bigInt64Pads.forEach((v, i) => updatePads[Number(i)] = Number(v)); + } + const pads = []; + updatePads.forEach((v) => pads.push(v)); + return { mode: attributes.mode, value, pads }; + } else { + return attributes; + } + }; + pad = (context, attributes) => { + validateInputs19(context.inputs); + const updatedAttributes = createPadAttributesFromInputs(context.inputs, attributes); + context.compute(createPadProgramInfo(context.inputs, updatedAttributes), { inputs: [0] }); + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/pool.ts +var validateInputs20, getAdjustedPoolAttributesAndOutputShape, getUniformAndPadInfo, generatePoolingCode, createShaderKeyFromAttributes, createAveragePoolShaderKeyFromAttributes, createMaxPoolShaderKeyFromAttributes, parsePoolCommonAttributes, createAveragePoolProgramInfo, parseAveragePoolAttributes, averagePool, globalPoolAttributes, parseGlobalAveragePoolAttributes, globalAveragePool, createMaxPoolProgramInfo, maxPool, parseMaxPoolAttributes, parseGlobalMaxPoolAttributes, globalMaxPool; +var init_pool = __esm({ + "web/lib/wasm/jsep/webgpu/ops/pool.ts"() { + "use strict"; + init_esm(); + init_wasm_common(); + init_util(); + init_common(); + validateInputs20 = (inputs) => { + if (env2.webgpu.validateInputContent && (!inputs || inputs.length !== 1)) { + throw new Error("Pool ops requires 1 input."); + } + }; + getAdjustedPoolAttributesAndOutputShape = (input, attributes, isGlobalOperator) => { + const isChannelsLast = attributes.format === "NHWC"; + const inputShapeAsChannelFirst = input.dims.slice(); + if (isChannelsLast) { + inputShapeAsChannelFirst.splice(1, 0, inputShapeAsChannelFirst.pop()); + } + const hasDilations = Object.hasOwnProperty.call(attributes, "dilations"); + const kernelShape = attributes.kernelShape.slice(); + const strides = attributes.strides.slice(); + const dilations = hasDilations ? attributes.dilations.slice() : []; + const pads = attributes.pads.slice(); + PoolConvUtil.adjustPoolAttributes(isGlobalOperator, inputShapeAsChannelFirst, kernelShape, strides, dilations, pads); + const outputShapeAsChannelFirst = PoolConvUtil.computePoolOutputShape( + isGlobalOperator, + inputShapeAsChannelFirst, + strides, + dilations, + kernelShape, + pads, + attributes.autoPad + ); + const newAttributes = Object.assign({}, attributes); + if (hasDilations) { + Object.assign(newAttributes, { kernelShape, strides, pads, dilations, cacheKey: attributes.cacheKey }); + } else { + Object.assign(newAttributes, { kernelShape, strides, pads, cacheKey: attributes.cacheKey }); + } + const outputShapeAsChannelLast = outputShapeAsChannelFirst.slice(); + outputShapeAsChannelLast.push(outputShapeAsChannelLast.splice(1, 1)[0]); + return [newAttributes, isChannelsLast ? outputShapeAsChannelLast : outputShapeAsChannelFirst]; + }; + getUniformAndPadInfo = (outputShape, attributes) => { + const isChannelsLast = attributes.format === "NHWC"; + const outputSize = ShapeUtil.size(outputShape); + const kernelSize = ShapeUtil.size(attributes.kernelShape); + const programUniforms = [{ type: 12 /* uint32 */, data: outputSize }, { type: 12 /* uint32 */, data: kernelSize }]; + const uniforms = [{ name: "outputSize", type: "u32" }, { name: "kernelSize", type: "u32" }]; + if (attributes.kernelShape.length <= 2) { + const kw = attributes.kernelShape[attributes.kernelShape.length - 1]; + const sw = attributes.strides[attributes.strides.length - 1]; + const pwStart = attributes.pads[attributes.pads.length / 2 - 1]; + const pwEnd = attributes.pads[attributes.pads.length - 1]; + const pwStartEndNotZero = !!(pwStart + pwEnd); + programUniforms.push( + { type: 12 /* uint32 */, data: kw }, + { type: 12 /* uint32 */, data: sw }, + { type: 12 /* uint32 */, data: pwStart }, + { type: 12 /* uint32 */, data: pwEnd } + ); + uniforms.push( + { name: "kw", type: "u32" }, + { name: "sw", type: "u32" }, + { name: "pwStart", type: "u32" }, + { name: "pwEnd", type: "u32" } + ); + let phStartEndNotZero = false; + if (attributes.kernelShape.length === 2) { + const kh = attributes.kernelShape[attributes.kernelShape.length - 2]; + const sh = attributes.strides[attributes.strides.length - 2]; + const phStart = attributes.pads[attributes.pads.length / 2 - 2]; + const phEnd = attributes.pads[attributes.pads.length - 2]; + phStartEndNotZero = !!(phStart + phEnd); + programUniforms.push( + { type: 12 /* uint32 */, data: kh }, + { type: 12 /* uint32 */, data: sh }, + { type: 12 /* uint32 */, data: phStart }, + { type: 12 /* uint32 */, data: phEnd } + ); + uniforms.push( + { name: "kh", type: "u32" }, + { name: "sh", type: "u32" }, + { name: "phStart", type: "u32" }, + { name: "phEnd", type: "u32" } + ); + } + return [programUniforms, uniforms, true, pwStartEndNotZero, phStartEndNotZero]; + } else { + if (isChannelsLast) { + throw new Error("Pooling with kernelShape.length > 2 is not supported for NHWC format."); + } + const kernelStrides = ShapeUtil.computeStrides(attributes.kernelShape); + programUniforms.push( + { type: 12 /* uint32 */, data: kernelStrides }, + { type: 12 /* uint32 */, data: attributes.pads }, + { type: 12 /* uint32 */, data: attributes.strides } + ); + uniforms.push( + { name: "kernelStrides", type: "u32", length: kernelStrides.length }, + { name: "pads", type: "u32", length: attributes.pads.length }, + { name: "strides", type: "u32", length: attributes.strides.length } + ); + const hasPads = attributes.pads.reduce((sum, cur) => sum + cur); + return [programUniforms, uniforms, !!hasPads, false, false]; + } + }; + generatePoolingCode = (shaderHelper, x, rank, outputShapeRank, attributes, op1, op2, start, uniforms, hasPads, pwStartEndNotZero, phStartEndNotZero) => { + const isChannelsLast = attributes.format === "NHWC"; + const dataType = x.type.value; + const output = outputVariable("output", x.type.tensor, outputShapeRank); + if (attributes.kernelShape.length <= 2) { + let codeW = ""; + let codeH = ""; + let codeHEnd = ""; + const dimIdxW = rank - (isChannelsLast ? 2 : 1); + if (pwStartEndNotZero) { + codeW = ` + for (var i: u32 = 0u; i < uniforms.kw; i++) { + xIndices[${dimIdxW}] = indices[${dimIdxW}] * uniforms.sw - uniforms.pwStart + i; + if (xIndices[${dimIdxW}] < 0 || xIndices[${dimIdxW}] + >= uniforms.x_shape[${dimIdxW}]) { + pad++; + continue; + } + let x_val = x[${x.indicesToOffset("xIndices")}]; + ${op1} + }`; + } else { + codeW = ` + for (var i: u32 = 0u; i < uniforms.kw; i++) { + xIndices[${dimIdxW}] = indices[${dimIdxW}] * uniforms.sw - uniforms.pwStart + i; + let x_val = x[${x.indicesToOffset("xIndices")}]; + ${op1} + }`; + } + if (attributes.kernelShape.length === 2) { + const dimIdxH = rank - (isChannelsLast ? 3 : 2); + if (phStartEndNotZero) { + codeH = ` + for (var j: u32 = 0u; j < uniforms.kh; j++) { + xIndices[${dimIdxH}] = indices[${dimIdxH}] * uniforms.sh - uniforms.phStart + j; + if (xIndices[${dimIdxH}] < 0 || xIndices[${dimIdxH}] >= uniforms.x_shape[${dimIdxH}]) { + pad += i32(uniforms.kw); + continue; + } + `; + } else { + codeH = ` + for (var j: u32 = 0u; j < uniforms.kh; j++) { + xIndices[${dimIdxH}] = indices[${dimIdxH}] * uniforms.sh - uniforms.phStart + j; + `; + } + codeHEnd = ` + } + `; + } + const poolingCode = ` + ${shaderHelper.registerUniforms(uniforms).declareVariables(x, output)} + + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")} + + let indices = ${output.offsetToIndices("global_idx")}; + var xIndices = ${output.offsetToIndices("global_idx")}; + + var value = ${dataType}(${start}); + var pad = 0; + ${codeH} + ${codeW} + ${codeHEnd} + ${op2} + + output[global_idx] = value; + }`; + return poolingCode; + } else { + if (isChannelsLast) { + throw new Error("Pooling with kernelShape.length > 2 is not supported for NHWC format."); + } + const stridesRank = attributes.kernelShape.length; + const padsRank = attributes.pads.length; + let padCode = ""; + if (hasPads) { + padCode = ` + if (xIndices[j] >= uniforms.x_shape[j]) { + pad++; + isPad = true; + break; + } + } + if (!isPad) { + let x_val = x[${x.indicesToOffset("xIndices")}]; + ${op1} + }`; + } else { + padCode = ` + } + let x_val = x[${x.indicesToOffset("xIndices")}]; + ${op1} + `; + } + const poolingCode = ` + ${shaderHelper.registerUniforms(uniforms).declareVariables(x, output)} + + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")} + let indices = ${output.offsetToIndices("global_idx")}; + var xIndices = ${output.offsetToIndices("global_idx")}; + + var offsets: array; + + var value = ${dataType}(${start}); + var pad = 0; + var isPad = false; + + for (var i: u32 = 0u; i < uniforms.kernelSize; i++) { + var offset = i; + for (var j = 0u; j < ${stridesRank - 1}u; j++) { + offsets[j] = offset / ${getElementAt("uniforms.kernelStrides", "j", stridesRank)}; + offset -= offsets[j] * ${getElementAt("uniforms.kernelStrides", "j", stridesRank)}; + } + offsets[${stridesRank - 1}] = offset; + + isPad = false; + for (var j = ${rank - stridesRank}u; j < ${rank}u; j++) { + xIndices[j] = indices[j] * ${getElementAt("uniforms.strides", `j - ${rank - stridesRank}u`, stridesRank)} + + offsets[j - ${rank - stridesRank}u] - ${getElementAt("uniforms.pads", "j - 2u", padsRank)}; + ${padCode} + } + ${op2} + + output[global_idx] = value; + }`; + return poolingCode; + } + }; + createShaderKeyFromAttributes = (attributes) => `${attributes.format};${attributes.ceilMode};${attributes.autoPad};${attributes.kernelShape.length}`; + createAveragePoolShaderKeyFromAttributes = (attributes) => `${createShaderKeyFromAttributes(attributes)};${attributes.countIncludePad}`; + createMaxPoolShaderKeyFromAttributes = (attributes) => `${createShaderKeyFromAttributes(attributes)};${attributes.storageOrder};${attributes.dilations}`; + parsePoolCommonAttributes = (attributes) => ({ + format: attributes.format, + autoPad: ["NOTSET", "VALID", "SAME_UPPER", "SAME_LOWER"][attributes.auto_pad], + ceilMode: attributes.ceil_mode, + kernelShape: attributes.kernel_shape, + strides: attributes.strides, + pads: attributes.pads + }); + createAveragePoolProgramInfo = (name, input, isGlobalOperator, attributes) => { + const [adjustedAttributes, outputShape] = getAdjustedPoolAttributesAndOutputShape(input, attributes, isGlobalOperator); + const x = inputVariable("x", input.dataType, input.dims.length); + const dataType = x.type.value; + const op1 = "value += x_val;"; + let op2 = ""; + if (adjustedAttributes.countIncludePad) { + op2 += `value /= ${dataType}(uniforms.kernelSize);`; + } else { + op2 += `value /= ${dataType}(i32(uniforms.kernelSize) - pad);`; + } + const [programUniforms, uniforms, hasPads, pwStartEndNotZero, phStartEndNotZero] = getUniformAndPadInfo(outputShape, adjustedAttributes); + programUniforms.push(...createTensorShapeVariables(input.dims, outputShape)); + const inputDependencies = ["rank"]; + return { + name, + shaderCache: { hint: `${attributes.cacheKey};${hasPads};${pwStartEndNotZero};${phStartEndNotZero}`, inputDependencies }, + getRunData: () => ({ + outputs: [{ dims: outputShape, dataType: input.dataType }], + dispatchGroup: { x: Math.ceil( + ShapeUtil.size(outputShape) / 64 + /* workgroup size */ + ) }, + programUniforms + }), + getShaderSource: (shaderHelper) => generatePoolingCode( + shaderHelper, + x, + input.dims.length, + outputShape.length, + adjustedAttributes, + op1, + op2, + 0, + uniforms, + hasPads, + pwStartEndNotZero, + phStartEndNotZero + ) + }; + }; + parseAveragePoolAttributes = (attributes) => { + const countIncludePad = attributes.count_include_pad === 0 ? false : true; + const attr = parsePoolCommonAttributes(attributes); + if (attr.ceilMode !== 0) { + throw new Error("using ceil() in shape computation is not yet supported for AveragePool"); + } + const averagePoolAttributes = { countIncludePad, ...attr, cacheKey: "" }; + return { ...averagePoolAttributes, cacheKey: createAveragePoolShaderKeyFromAttributes(averagePoolAttributes) }; + }; + averagePool = (context, attributes) => { + validateInputs20(context.inputs); + context.compute(createAveragePoolProgramInfo("AveragePool", context.inputs[0], false, attributes)); + }; + globalPoolAttributes = { + autoPad: "", + ceilMode: 0, + countIncludePad: false, + kernelShape: [], + strides: [], + pads: [], + storageOrder: 0, + dilations: [] + }; + parseGlobalAveragePoolAttributes = (attributes) => { + const format = attributes.format; + return { format, ...globalPoolAttributes, cacheKey: format }; + }; + globalAveragePool = (context, attributes) => { + validateInputs20(context.inputs); + context.compute(createAveragePoolProgramInfo("GlobalAveragePool", context.inputs[0], true, attributes)); + }; + createMaxPoolProgramInfo = (name, input, isGlobalOperator, attributes) => { + const [adjustedAttributes, outputShape] = getAdjustedPoolAttributesAndOutputShape(input, attributes, isGlobalOperator); + const op1 = ` + value = max(x_val, value); + `; + const op2 = ""; + const x = inputVariable("x", input.dataType, input.dims.length); + const inputDependencies = ["rank"]; + const [programUniforms, uniforms, hasPads, pwStartEndNotZero, phStartEndNotZero] = getUniformAndPadInfo(outputShape, adjustedAttributes); + programUniforms.push(...createTensorShapeVariables(input.dims, outputShape)); + return { + name, + shaderCache: { hint: `${attributes.cacheKey};${hasPads};${pwStartEndNotZero};${phStartEndNotZero}`, inputDependencies }, + getRunData: () => ({ + outputs: [{ dims: outputShape, dataType: input.dataType }], + dispatchGroup: { x: Math.ceil( + ShapeUtil.size(outputShape) / 64 + /* workgroup size */ + ) }, + programUniforms + }), + getShaderSource: (shaderHelper) => generatePoolingCode( + shaderHelper, + x, + input.dims.length, + outputShape.length, + adjustedAttributes, + op1, + op2, + input.dataType === 10 /* float16 */ ? -65504 : -1e5, + uniforms, + hasPads, + pwStartEndNotZero, + phStartEndNotZero + ) + }; + }; + maxPool = (context, attributes) => { + validateInputs20(context.inputs); + context.compute(createMaxPoolProgramInfo("MaxPool", context.inputs[0], false, attributes)); + }; + parseMaxPoolAttributes = (attributes) => { + const storageOrder = attributes.storage_order; + const dilations = attributes.dilations; + const attr = parsePoolCommonAttributes(attributes); + if (storageOrder !== 0) { + throw new Error("column major storage order is not yet supported for MaxPool"); + } + if (attr.ceilMode !== 0) { + throw new Error("using ceil() in shape computation is not yet supported for MaxPool"); + } + const maxPoolAttributes = { storageOrder, dilations, ...attr, cacheKey: "" }; + return { ...maxPoolAttributes, cacheKey: createMaxPoolShaderKeyFromAttributes(maxPoolAttributes) }; + }; + parseGlobalMaxPoolAttributes = (attributes) => { + const format = attributes.format; + return { format, ...globalPoolAttributes, cacheKey: format }; + }; + globalMaxPool = (context, attributes) => { + validateInputs20(context.inputs); + context.compute(createMaxPoolProgramInfo("GlobalMaxPool", context.inputs[0], true, attributes)); + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/range.ts +var validateInputsContent, createRangeProgramInfo, range; +var init_range = __esm({ + "web/lib/wasm/jsep/webgpu/ops/range.ts"() { + "use strict"; + init_esm(); + init_wasm_common(); + init_common(); + validateInputsContent = (start, limit, delta) => { + const sameStartLimit = start === limit; + const increasingRangeNegativeStep = start < limit && delta < 0; + const decreasingRangePositiveStep = start > limit && delta > 0; + if (sameStartLimit || increasingRangeNegativeStep || decreasingRangePositiveStep) { + throw new Error("Range these inputs' contents are invalid."); + } + }; + createRangeProgramInfo = (start, limit, delta, dataType) => { + const numElements = Math.abs(Math.ceil((limit - start) / delta)); + const outputShape = [numElements]; + const outputSize = numElements; + const programUniforms = [ + { type: 12 /* uint32 */, data: outputSize }, + { type: dataType, data: start }, + { type: dataType, data: delta }, + ...createTensorShapeVariables(outputShape) + ]; + const getShaderSource = (shaderHelper) => { + const output = outputVariable("output", dataType, outputShape.length); + const wgslType = output.type.value; + const uniforms = [ + { name: "outputSize", type: "u32" }, + { name: "start", type: wgslType }, + { name: "delta", type: wgslType } + ]; + return ` + ${shaderHelper.registerUniforms(uniforms).declareVariables(output)} + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")} + output[global_idx] = uniforms.start + ${wgslType}(global_idx) * uniforms.delta; + }`; + }; + return { + name: "Range", + shaderCache: { hint: `${dataType}` }, + getShaderSource, + getRunData: () => ({ + outputs: [{ dims: outputShape, dataType }], + dispatchGroup: { x: Math.ceil( + outputSize / 64 + /* workgroup size */ + ) }, + programUniforms + }) + }; + }; + range = (context) => { + let start = 0; + let limit = 0; + let delta = 0; + if (context.inputs[0].dataType === 6 /* int32 */) { + start = context.inputs[0].getInt32Array()[0]; + limit = context.inputs[1].getInt32Array()[0]; + delta = context.inputs[2].getInt32Array()[0]; + } else if (context.inputs[0].dataType === 1 /* float */) { + start = context.inputs[0].getFloat32Array()[0]; + limit = context.inputs[1].getFloat32Array()[0]; + delta = context.inputs[2].getFloat32Array()[0]; + } + if (env2.webgpu.validateInputContent) { + validateInputsContent(start, limit, delta); + } + context.compute(createRangeProgramInfo(start, limit, delta, context.inputs[0].dataType), { inputs: [] }); + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/resize.ts +var validateScales, updateScales, validateInputs21, getOriginalCoordinateFromResizedCoordinate, getNearestPixelFromOriginal, updateRoI, initOutputShape, adjustOutputShape, calculateOriginalIndicesFromOutputIndices, calculateInputIndicesFromOutputIndices, checkInputIndices, setChannelAndBatchIndices, bilinearInterpolation, bicubicInterpolation, trilinearInterpolation, createResizeProgramInfo, getOpsetVersionFromCustomDataBuffer, resize, parseResizeAttributes; +var init_resize = __esm({ + "web/lib/wasm/jsep/webgpu/ops/resize.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_attribute_with_cache_key(); + init_common(); + validateScales = (scales, attributes) => { + scales.every((value) => value > 0 || (() => { + throw new Error("Resize requires scales input values to be positive"); + })); + if (scales.length > 0) { + if (attributes.mode === "linear") { + if (!(scales.length === 2 || scales.length === 3 || scales.length === 4 && scales[0] === 1 && scales[1] === 1 || scales.length === 4 && scales[0] === 1 && scales[3] === 1 || scales.length === 5 && scales[0] === 1 && scales[1] === 1)) { + throw new Error( + `For linear mode, Resize requires scales to be 2D, 3D, 4D with either two outermost or one innermost and + one outermost scale values equal to 1, or 5D with two outermost scale values equal to 1` + ); + } + } else if (attributes.mode === "cubic") { + if (!(scales.length === 2 || scales.length === 4 && scales[0] === 1 && scales[1] === 1 || scales.length === 4 && scales[0] === 1 && scales[3] === 1)) { + throw new Error("Resize requires scales input size to be 2 or 4 for cubic mode"); + } + } + } + }; + updateScales = (scales, axes, rank) => { + axes.every((value) => value >= 0 && value < rank || (() => { + throw new Error("Resize requires axes input values to be positive and less than rank"); + })); + const newScales = new Array(rank).fill(1); + axes.forEach((value, index) => newScales[value] = scales[index]); + return newScales; + }; + validateInputs21 = (inputs, attributes, opsetVersion, scales, sizes, roi) => { + const [roiInputIndex, scalesInputIndex, sizesInputIndex] = opsetVersion > 10 ? [1, 2, 3] : [-1, inputs.length > 1 ? 1 : -1, -1]; + const rank = inputs[0].dims.length; + if (roiInputIndex > 0 && inputs.length > roiInputIndex && inputs[roiInputIndex].dims.length > 0) { + inputs[roiInputIndex].getFloat32Array().forEach((value) => roi.push(value)); + } else if (attributes.coordinateTransformMode === "tf_crop_and_resize") { + throw new Error("Resize requires RoI input to be specified when coordinateTransformMode is tfCropAndResize"); + } + if (scalesInputIndex > 0 && inputs.length > scalesInputIndex && inputs[scalesInputIndex].dims.length > 0) { + inputs[scalesInputIndex].getFloat32Array().forEach((value) => scales.push(value)); + if (scales.length !== 0 && (scales.length !== rank && (opsetVersion >= 18 && scales.length !== attributes.axes.length))) { + throw new Error( + "Resize requires scales input size to be same as input rank or axes size for opset 18 and up" + ); + } + validateScales(scales, attributes); + if (attributes.axes.length > 0) { + updateScales(scales, attributes.axes, rank).forEach((value, index) => scales[index] = value); + } + } + if (sizesInputIndex > 0 && inputs.length > sizesInputIndex) { + inputs[sizesInputIndex].getBigInt64Array().forEach((value) => sizes.push(Number(value))); + if (sizes.length !== rank || opsetVersion >= 18 && sizes.length === attributes.axes.length) { + throw new Error("Resize requires sizes input size to be same as input rank or axes size for opset 18 and up"); + } + } + if (attributes.axes.length > 0) { + if (scales.length !== attributes.axes.length) { + throw new Error('Resize requires "scales" input size to be of axes rank when axes attributes is specified'); + } + if (sizes.length !== attributes.axes.length) { + throw new Error( + 'Resize requires "sizes" input size to be of rank axes rank when axes attributes is specified' + ); + } + } + if (typeof scales !== "undefined" && typeof sizes !== "undefined" && scales.length > 0 && sizes.length > rank) { + throw new Error("Resize requires only of scales or sizes to be specified"); + } + }; + getOriginalCoordinateFromResizedCoordinate = (coordinateTransferMode, dType) => `fn getOriginalCoordinateFromResizedCoordinate(xResized: u32, xScale: f32, lengthResized: u32, + lengthOriginal: u32, roiStart: f32, roiEnd: f32) -> ${dType} { ` + (() => { + switch (coordinateTransferMode) { + case "asymmetric": + return `return ${dType}(xResized) / ${dType}(xScale);`; + case "pytorch_half_pixel": + return `if (lengthResized > 1) { + return (${dType}(xResized) + 0.5) / ${dType}(xScale) - 0.5; + } else { + return 0.0; + }`; + case "tf_half_pixel_for_nn": + return `return (${dType}(xResized) + 0.5) / ${dType}(xScale);`; + case "align_corners": + return `if (lengthResized == 1) { + return 0.0; + } else { + // The whole part and the fractional part are calculated separately due to inaccuracy of floating + // point division. As an example, f32(21) / f32(7) may evaluate to 2.99... instead of 3, causing an + // offset-by-one error later in floor(). + let whole = ${dType}(xResized * (lengthOriginal - 1) / (lengthResized - 1)); + let fract = + ${dType}(xResized * (lengthOriginal - 1) % (lengthResized - 1)) / ${dType}(lengthResized - 1); + return whole + fract; + }`; + case "tf_crop_and_resize": + return `if (lengthResized > 1) { + return ${dType}(roiStart) * ${dType}(lengthOriginal - 1) + + (${dType}(xResized) * ${dType}(roiEnd - roiStart) * ${dType}(lengthOriginal - 1)) / + ${dType}(lengthResized - 1); + } else { + return 0.5 * ${dType}(roiStart + roiEnd) * ${dType}(lengthOriginal - 1); + }`; + case "half_pixel_symmetric": + return `const outputWidth = ${dType}xScale * ${dType}(lengthResized); + const adjustment = ${dType}(lengthResized) / outputWidth; + const center = ${dType}(lengthOriginal) / 2; + const offset = center * (1 - adjustment); + return offset + ((${dType}(xResized) + 0.5) / ${dType}(xScale)) - 0.5;`; + case "half_pixel": + return `return ((${dType}(xResized) + 0.5) / ${dType}(xScale)) - 0.5;`; + default: + throw new Error(`Coordinate transform mode ${coordinateTransferMode} is not supported`); + } + })() + "}"; + getNearestPixelFromOriginal = (nearestMode, opsetVersion, dType) => `fn getNearestPixelFromOriginal(xOriginal: ${dType}, isDownSample: bool) -> ${dType} {` + (() => { + switch (nearestMode) { + case "round_prefer_ceil": + return "if (fract(xOriginal) == 0.5) { return ceil(xOriginal); } else { return round(xOriginal); }"; + case "floor": + return "return floor(xOriginal);"; + case "ceil": + return "return ceil(xOriginal);"; + case "round_prefer_floor": + return "if (fract(xOriginal) == 0.5) { return floor(xOriginal); } else { return round(xOriginal); }"; + case "simple": + default: + if (opsetVersion < 11) { + return "if (isDownSample) { return ceil(xOriginal); } else { return xOriginal; }"; + } + throw new Error(`Nearest mode ${nearestMode} is not supported`); + } + })() + "}"; + updateRoI = (roi, axes, rank) => { + const roiTmp = new Array(rank).fill(0).concat(new Array(rank).fill(1)); + const roiLocal = roi.length === 0 ? roiTmp : roi.slice(); + if (axes.length > 0) { + axes.forEach((v, i) => { + roiTmp[v] = roiLocal[i]; + roiTmp[i + rank] = roiLocal[axes.length + i]; + }); + return roiTmp; + } + return roiLocal; + }; + initOutputShape = (inputShape, scales, sizes, axes) => { + let outputShape = []; + if (sizes.length > 0) { + if (axes.length > 0) { + inputShape.forEach((v) => outputShape.push(v)); + if (Math.max(...axes) > inputShape.length) { + throw new Error("axes is out of bound"); + } + axes.forEach((v, i) => outputShape[v] = sizes[i]); + } else { + sizes.forEach((v) => outputShape.push(v)); + } + } else { + if (scales.length === 0) { + throw new Error("Resize requires either scales or sizes."); + } else { + outputShape = inputShape.map((value, index) => Math.round(value * scales[index])); + } + } + return outputShape; + }; + adjustOutputShape = (inputShape, scales, attributes) => { + const scaleInPolicy = (() => { + switch (attributes.keepAspectRatioPolicy) { + case "not_larger": + return attributes.axes.length > 0 ? Math.min(...attributes.axes.map((i) => scales[i]), Number.MAX_VALUE) : Math.min(...scales, Number.MAX_VALUE); + case "not_smaller": + return attributes.axes.length > 0 ? Math.max(...attributes.axes.map((i) => scales[i]), Number.MIN_VALUE) : Math.max(...scales, Number.MIN_VALUE); + default: + throw new Error(`Keep aspect ratio policy ${attributes.keepAspectRatioPolicy} is not supported`); + } + })(); + scales.fill(1, 0, scales.length); + const adjustedOutputShape = inputShape.slice(); + if (attributes.axes.length > 0) { + attributes.axes.forEach((v) => scales[v] = scaleInPolicy); + attributes.axes.forEach((v) => adjustedOutputShape[v] = Math.round(inputShape[v] * scales[v])); + } else { + scales.fill(scaleInPolicy, 0, scales.length); + adjustedOutputShape.forEach((v, i) => adjustedOutputShape[i] = Math.round(v * scales[i])); + } + return adjustedOutputShape; + }; + calculateOriginalIndicesFromOutputIndices = (output, inputShape, outputShape, scalesLength, roiLength) => ` + fn calculateOriginalIndicesFromOutputIndices(output_indices: ${output.type.indices}) -> array<${output.type.value}, ${outputShape.length}> { + var original_indices: array<${output.type.value}, ${outputShape.length}>; + for (var i:u32 = 0; i < ${outputShape.length}; i++) { + var output_index = ${output.indicesGet("output_indices", "i")}; + var scale = ${getElementAt("uniforms.scales", "i", scalesLength)}; + var roi_low = ${getElementAt("uniforms.roi", "i", roiLength)}; + var roi_hi = ${getElementAt("uniforms.roi", `i + ${inputShape.length}`, roiLength)}; + if (scale == 1.0) { + original_indices[i] = ${output.type.value}(output_index); + } else { + var input_shape_i = ${getElementAt("uniforms.input_shape", "i", inputShape.length)}; + var output_shape_i = ${getElementAt("uniforms.output_shape", "i", outputShape.length)}; + original_indices[i] = getOriginalCoordinateFromResizedCoordinate(output_index, scale, output_shape_i, + input_shape_i, roi_low, roi_hi); + } + } + return original_indices; + }`; + calculateInputIndicesFromOutputIndices = (input, output, inputShape, outputShape, scalesLength, roiLength, useExtrapolation) => ` + fn calculateInputIndicesFromOutputIndices(output_indices: ${output.type.indices}) -> ${input.type.indices} { + var input_indices: ${input.type.indices}; + for (var i:u32 = 0; i < ${outputShape.length}; i++) { + var output_index = ${output.indicesGet("output_indices", "i")}; + var input_index: u32; + var scale = ${getElementAt("uniforms.scales", "i", scalesLength)}; + if (scale == 1.0) { + input_index = output_index; + } else { + var roi_low = ${getElementAt("uniforms.roi", "i", roiLength)}; + var roi_hi = ${getElementAt("uniforms.roi", `i + ${inputShape.length}`, roiLength)}; + var input_shape_i = ${getElementAt("uniforms.input_shape", "i", inputShape.length)}; + var output_shape_i = ${getElementAt("uniforms.output_shape", "i", outputShape.length)}; + var original_idx = getOriginalCoordinateFromResizedCoordinate(output_index, scale, output_shape_i, + input_shape_i, roi_low, roi_hi); + if (!${useExtrapolation} || (original_idx >= 0 && original_idx < ${output.type.value}(input_shape_i))) { + if (original_idx < 0) { + input_index = 0; + } else if (original_idx > ${output.type.value}(input_shape_i - 1)) { + input_index = input_shape_i - 1; + } else { + input_index = u32(getNearestPixelFromOriginal(original_idx, scale < 1)); + } + } else { + input_index = u32(original_idx); + } + } + ${input.indicesSet("input_indices", "i", " input_index")} + } + return input_indices; + }`; + checkInputIndices = (input, inputShape) => ` + fn checkInputIndices(input_indices: ${input.type.indices}) -> bool { + for (var i:u32 = 0; i < ${inputShape.length}; i++) { + var input_index = ${input.indicesGet("input_indices", "i")}; + if (input_index < 0 || input_index >= ${getElementAt("uniforms.input_shape", "i", inputShape.length)}) { + return false; + } + } + return true; + }`; + setChannelAndBatchIndices = (input, channelIdx, batchIdx, spacialDims) => input.rank > spacialDims ? ` + ${input.indicesSet("input_indices", channelIdx, "channel")}; + ${input.indicesSet("input_indices", batchIdx, "batch")}; +` : ""; + bilinearInterpolation = (input, output, inputShape, useExtrapolation, extrapolationValue) => { + const isNchw = true; + const [batchIdx, heightIdx, widthIdx, channelIdx] = inputShape.length === 2 ? [-1, 0, 1, -1] : isNchw ? [0, 2, 3, 1] : [0, 1, 2, 3]; + const dType = input.type.value; + return ` + fn getInputValue(batch: u32, channel: u32, row: u32, col: u32) -> ${dType} { + var input_indices: ${input.type.indices}; + ${input.indicesSet("input_indices", heightIdx, `max(0, min(row, ${inputShape[heightIdx]} - 1))`)}; + ${input.indicesSet("input_indices", widthIdx, `max(0, min(col, ${inputShape[widthIdx]} - 1))`)}; + ${setChannelAndBatchIndices(input, channelIdx, batchIdx, 2)} + return ${input.getByIndices("input_indices")}; + } + + fn bilinearInterpolation(output_indices: ${output.type.indices}) -> ${dType} { + var originalIndices = calculateOriginalIndicesFromOutputIndices(output_indices); + var row:${dType} = originalIndices[${heightIdx}]; + var col:${dType} = originalIndices[${widthIdx}]; + ${useExtrapolation ? `if (row < 0 || row > (${inputShape[heightIdx]} - 1) || col < 0 || col > (${inputShape[widthIdx]} - 1)) { + return ${extrapolationValue}; + }` : ""}; + row = max(0, min(row, ${inputShape[heightIdx]} - 1)); + col = max(0, min(col, ${inputShape[widthIdx]} - 1)); + var row1: u32 = u32(row); + var col1: u32 = u32(col); + var row2: u32 = u32(row + 1); + var col2: u32 = u32(col + 1); + var channel: u32 = ${inputShape.length > 2 ? `u32(originalIndices[${channelIdx}])` : "0"}; + var batch: u32 = ${inputShape.length > 2 ? `u32(originalIndices[${batchIdx}])` : "0"}; + var x11: ${dType} = getInputValue(batch, channel, row1, col1); + var x12: ${dType} = getInputValue(batch, channel, row1, col2); + var x21: ${dType} = getInputValue(batch, channel, row2, col1); + var x22: ${dType} = getInputValue(batch, channel, row2, col2); + var dx1: ${dType} = abs(row - ${dType}(row1)); + var dx2: ${dType} = abs(${dType}(row2) - row); + var dy1: ${dType} = abs(col - ${dType}(col1)); + var dy2: ${dType} = abs(${dType}(col2) - col); + if (row1 == row2) { + dx1 = 0.5; + dx2 = 0.5; + } + if (col1 == col2) { + dy1 = 0.5; + dy2 = 0.5; + } + return (x11 * dx2 * dy2 + x12 * dx2 * dy1 + x21 * dx1 * dy2 + x22 * dx1 * dy1); + }`; + }; + bicubicInterpolation = (input, output, inputShape, outputShape, scales, roi, cubicCoeffA, useExtrapolation, extrapolationValue, excludeOutside) => { + const is2D = inputShape.length === 2; + const isNchw = true; + const [heightIdx, widthIdx] = is2D ? [0, 1] : isNchw ? [2, 3] : [1, 2]; + const dType = input.type.value; + const createCubicInterpolationFunction = (idx) => { + const direction = idx === heightIdx ? "row" : "col"; + return ` + fn ${direction}CubicInterpolation(input_indices: ${input.type.indices}, output_indices: ${output.type.indices}) -> ${dType} { + var output_index = ${output.indicesGet("output_indices", idx)}; + var originalIdx: ${dType} = getOriginalCoordinateFromResizedCoordinate(output_index, ${scales[idx]}, + ${outputShape[idx]}, ${inputShape[idx]}, ${roi[idx]}, ${roi[idx]} + ${inputShape.length}); + var fractOriginalIdx: ${dType} = originalIdx - floor(originalIdx); + var coefs = getCubicInterpolationCoefs(fractOriginalIdx); + + if (${useExtrapolation} && (originalIdx < 0 || originalIdx > (${inputShape[idx]} - 1))) { + return ${extrapolationValue}; + } + var data: array<${dType}, 4> = array<${dType}, 4>(0.0, 0.0, 0.0, 0.0); + for (var i: i32 = -1; i < 3; i++) { + var ${direction}: ${dType} = originalIdx + ${dType}(i); + if (${direction} < 0 || ${direction} >= ${inputShape[idx]}) { + ${(() => { + if (excludeOutside) { + return `coefs[i + 1] = 0.0; + continue;`; + } else if (useExtrapolation) { + return `return ${extrapolationValue};`; + } else { + return `${direction} = max(0, min(${direction}, ${inputShape[idx]} - 1));`; + } + })()}; + } + var input_indices_copy: ${input.type.indices} = input_indices; + ${input.indicesSet("input_indices_copy", idx, `u32(${direction})`)}; + data[i + 1] = ${idx === heightIdx ? input.getByIndices("input_indices_copy") : "rowCubicInterpolation(input_indices_copy, output_indices)"}; + } + return cubicInterpolation1D(data, coefs); + }`; + }; + return ` + ${createCubicInterpolationFunction(heightIdx)}; + ${createCubicInterpolationFunction(widthIdx)}; + fn getCubicInterpolationCoefs(s: ${dType}) -> array<${dType}, 4> { + var absS = abs(s); + var coeffs: array<${dType}, 4> = array<${dType}, 4>(0.0, 0.0, 0.0, 0.0); + var oneMinusAbsS: ${dType} = 1.0 - absS; + var twoMinusAbsS: ${dType} = 2.0 - absS; + var onePlusAbsS: ${dType} = 1.0 + absS; + coeffs[0] = ((${cubicCoeffA} * onePlusAbsS - 5 * ${cubicCoeffA}) * onePlusAbsS + 8 * ${cubicCoeffA}) * onePlusAbsS - 4 * ${cubicCoeffA}; + coeffs[1] = ((${cubicCoeffA} + 2) * absS - (${cubicCoeffA} + 3)) * absS * absS + 1; + coeffs[2] = ((${cubicCoeffA} + 2) * oneMinusAbsS - (${cubicCoeffA} + 3)) * oneMinusAbsS * oneMinusAbsS + 1; + coeffs[3] = ((${cubicCoeffA} * twoMinusAbsS - 5 * ${cubicCoeffA}) * twoMinusAbsS + 8 * ${cubicCoeffA}) * twoMinusAbsS - 4 * ${cubicCoeffA}; + return coeffs; + } + + fn cubicInterpolation1D(x: array<${dType}, 4>, coefs: array<${dType}, 4>) -> ${dType} { + var coefsSum: ${dType} = coefs[0] + coefs[1] + coefs[2] + coefs[3]; + return (x[0] * coefs[0] + x[1] * coefs[1]+ x[2] * coefs[2]+ x[3] * coefs[3]) / coefsSum; + } + + fn bicubicInterpolation(output_indices: ${output.type.indices}) -> ${dType} { + var input_indices: ${input.type.indices} = output_indices; + return colCubicInterpolation(input_indices, output_indices); + } + `; + }; + trilinearInterpolation = (input, output, inputShape, useExtrapolation, extrapolationValue) => { + const isNchw = true; + const [batchIdx, depthIdx, heightIdx, widthIdx, channelIdx] = inputShape.length === 3 ? [-1, 0, 1, 2, -1] : isNchw ? [0, 2, 3, 4, 1] : [0, 1, 2, 3, 4]; + const dType = input.type.value; + return ` + fn getInputValue(batch: u32, channel: u32, depth:u32, height: u32, width: u32) -> ${dType} { + var input_indices: ${input.type.indices}; + ${input.indicesSet("input_indices", depthIdx, `max(0, min(depth, ${inputShape[depthIdx]} - 1))`)}; + ${input.indicesSet("input_indices", heightIdx, `max(0, min(height, ${inputShape[heightIdx]} - 1))`)}; + ${input.indicesSet("input_indices", widthIdx, `max(0, min(width, ${inputShape[widthIdx]} - 1))`)}; + ${setChannelAndBatchIndices(input, channelIdx, batchIdx, 3)} + return ${input.getByIndices("input_indices")}; + } + + fn trilinearInterpolation(output_indices: ${output.type.indices}) -> ${dType} { + var originalIndices = calculateOriginalIndicesFromOutputIndices(output_indices); + var depth:${dType} = originalIndices[${depthIdx}]; + var height:${dType} = originalIndices[${heightIdx}]; + var width:${dType} = originalIndices[${widthIdx}]; + ${useExtrapolation ? `if (depth < 0 || depth > (${inputShape[depthIdx]} - 1) || height < 0 || height > (${inputShape[heightIdx]} - 1) || width < 0 || (width > ${inputShape[widthIdx]} - 1)) { + return ${extrapolationValue}; + }` : ""}; + + depth = max(0, min(depth, ${inputShape[depthIdx]} - 1)); + height = max(0, min(height, ${inputShape[heightIdx]} - 1)); + width = max(0, min(width, ${inputShape[widthIdx]} - 1)); + var depth1: u32 = u32(depth); + var height1: u32 = u32(height); + var width1: u32 = u32(width); + var depth2: u32 = u32(depth + 1); + var height2: u32 = u32(height + 1); + var width2: u32 = u32(width + 1); + var channel: u32 = ${inputShape.length > 3 ? `u32(originalIndices[${channelIdx}])` : "0"}; + var batch: u32 = ${inputShape.length > 3 ? `u32(originalIndices[${batchIdx}])` : "0"}; + + var x111: ${dType} = getInputValue(batch, channel, depth1, height1, width1); + var x112: ${dType} = getInputValue(batch, channel, depth1, height1, width2); + var x121: ${dType} = getInputValue(batch, channel, depth1, height2, width1); + var x122: ${dType} = getInputValue(batch, channel, depth1, height2, width2); + var x211: ${dType} = getInputValue(batch, channel, depth2, height1, width1); + var x212: ${dType} = getInputValue(batch, channel, depth2, height1, width2); + var x221: ${dType} = getInputValue(batch, channel, depth2, height2, width1); + var x222: ${dType} = getInputValue(batch, channel, depth2, height2, width2); + var dx1: ${dType} = abs(depth - ${dType}(depth1)); + var dx2: ${dType} = abs(${dType}(depth2) - depth); + var dy1: ${dType} = abs(height - ${dType}(height1)); + var dy2: ${dType} = abs(${dType}(height2) - height); + var dz1: ${dType} = abs(width - ${dType}(width1)); + var dz2: ${dType} = abs(${dType}(width2) - width); + if (depth1 == depth2) { + dx1 = 0.5; + dx2 = 0.5; + } + if (height1 == height2) { + dy1 = 0.5; + dy2 = 0.5; + } + if (width1 == width2) { + dz1 = 0.5; + dz2 = 0.5; + } + return (x111 * dx2 * dy2 * dz2 + x112 * dx2 * dy2 * dz1 + x121 * dx2 * dy1 *dz2 + x122 * dx2 * dy1 * dz1 + + x211 * dx1 * dy2 * dz2 + x212 * dx1 * dy2 * dz1 + x221 * dx1 * dy1 *dz2 + x222 * dx1 * dy1 * dz1); + }`; + }; + createResizeProgramInfo = (inputTensor, attributes, opsetVersion, scalesInput, sizes, roiInput) => { + const inputShape = inputTensor.dims; + const roi = updateRoI(roiInput, attributes.axes, inputShape.length); + let outputShape = initOutputShape(inputShape, scalesInput, sizes, attributes.axes); + let scales = scalesInput.slice(); + if (scalesInput.length === 0) { + scales = inputShape.map((value, index) => value === 0 ? 1 : outputShape[index] / value); + if (attributes.keepAspectRatioPolicy !== "stretch") { + outputShape = adjustOutputShape(inputShape, scales, attributes); + } + } + const output = outputVariable("output", inputTensor.dataType, outputShape.length); + const input = inputVariable("input", inputTensor.dataType, inputShape.length); + const outputSize = ShapeUtil.size(outputShape); + const noScale = inputShape.length === outputShape.length && inputShape.every((d, i) => d === outputShape[i]); + const useExtrapolation = attributes.coordinateTransformMode === "tf_crop_and_resize"; + const extrapolationValue = attributes.extrapolationValue; + const dataType = input.type.value; + const getShaderSource = (shaderHelper) => ` + ${noScale ? "" : ` + ${getOriginalCoordinateFromResizedCoordinate(attributes.coordinateTransformMode, dataType)}; + ${(() => { + switch (attributes.mode) { + case "nearest": + return ` + ${checkInputIndices(input, inputShape)}; + ${getNearestPixelFromOriginal(attributes.nearestMode, opsetVersion, dataType)}; + ${calculateInputIndicesFromOutputIndices( + input, + output, + inputShape, + outputShape, + scales.length, + roi.length, + useExtrapolation + )}; + `; + case "linear": + return ` + ${calculateOriginalIndicesFromOutputIndices(output, inputShape, outputShape, scales.length, roi.length)}; + ${(() => { + if (inputShape.length === 2 || inputShape.length === 4) { + return `${bilinearInterpolation(input, output, inputShape, useExtrapolation, extrapolationValue)}`; + } else if (inputShape.length === 3 || inputShape.length === 5) { + return `${trilinearInterpolation(input, output, inputShape, useExtrapolation, extrapolationValue)}`; + } else { + throw Error("Linear mode only supports input dims 2, 3, 4 and 5 are supported in linear mode."); + } + })()}; + `; + case "cubic": + return ` + ${(() => { + if (inputShape.length === 2 || inputShape.length === 4) { + return `${bicubicInterpolation( + input, + output, + inputShape, + outputShape, + scales, + roi, + attributes.cubicCoeffA, + useExtrapolation, + attributes.extrapolationValue, + attributes.excludeOutside + )}`; + } else { + throw Error("Cubic mode only supports input dims 2 and 4 are supported in linear mode."); + } + })()}; + `; + default: + throw Error("Invalid resize mode"); + } + })()}; + `} + ${shaderHelper.registerUniform("output_size", "u32").registerUniform("scales", "f32", scales.length).registerUniform("roi", "f32", roi.length).declareVariables(input, output)} + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + ${noScale ? "output[global_idx] = input[global_idx];" : ` + let output_indices = ${output.offsetToIndices("global_idx")}; + var input_indices: ${input.type.indices}; + ${(() => { + switch (attributes.mode) { + case "nearest": + return `input_indices = calculateInputIndicesFromOutputIndices(output_indices); + if (checkInputIndices(input_indices)) { + output[global_idx] = ${input.getByIndices("input_indices")}; + } else { + output[global_idx] = ${attributes.extrapolationValue}; + }`; + case "linear": + return `output[global_idx] = ${inputShape.length === 2 || inputShape.length === 4 ? "bilinearInterpolation" : "trilinearInterpolation"}(output_indices);`; + case "cubic": + return "output[global_idx] = bicubicInterpolation(output_indices);"; + default: + throw Error(`Unsupported resize mode: ${attributes.mode}`); + } + })()}; +`} + }`; + return { + name: "Resize", + shaderCache: { + hint: `${attributes.cacheKey}|${opsetVersion}|${scales.length > 0 ? scales : ""}|${sizes.length > 0 ? sizes : ""}|${roi.length > 0 ? roi : ""}|${noScale}|${inputShape}`, + inputDependencies: ["rank"] + }, + getShaderSource, + getRunData: () => ({ + outputs: [{ dims: outputShape, dataType: inputTensor.dataType }], + dispatchGroup: { x: Math.ceil( + outputSize / 64 + /* workgroup size */ + ) }, + programUniforms: [ + { type: 12 /* uint32 */, data: outputSize }, + { type: 1 /* float */, data: scales }, + { type: 1 /* float */, data: roi }, + ...createTensorShapeVariables(inputShape, outputShape) + ] + }) + }; + }; + getOpsetVersionFromCustomDataBuffer = (context) => { + const customDataBuffer = context.customDataBuffer; + const customDataBuffer32 = new Uint32Array(customDataBuffer, customDataBuffer.byteOffset, 1); + const opsetVersion = customDataBuffer32[0]; + return opsetVersion; + }; + resize = (context, attributes) => { + const scales = []; + const sizes = []; + const roi = []; + const opsetVersion = getOpsetVersionFromCustomDataBuffer(context); + if (attributes.antialias !== 0) { + throw Error("Only default value (0) for Antialias attribute is supported"); + } + validateInputs21(context.inputs, attributes, opsetVersion, scales, sizes, roi); + context.compute( + createResizeProgramInfo(context.inputs[0], attributes, opsetVersion, scales, sizes, roi), + { inputs: [0] } + ); + }; + parseResizeAttributes = (attributes) => { + const antialias = attributes.antialias; + const axes = attributes.axes; + const coordinateTransformMode = attributes.coordinateTransformMode; + const cubicCoeffA = attributes.cubicCoeffA; + const excludeOutside = attributes.excludeOutside !== 0; + const extrapolationValue = attributes.extrapolationValue; + const keepAspectRatioPolicy = attributes.keepAspectRatioPolicy; + const mode = attributes.mode; + const nearestMode = attributes.nearestMode === "" ? "simple" : attributes.nearestMode; + return createAttributeWithCacheKey({ + antialias, + axes, + coordinateTransformMode, + cubicCoeffA, + excludeOutside, + extrapolationValue, + keepAspectRatioPolicy, + mode, + nearestMode + }); + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/rotary-embedding.ts +var validateInputs22, createRotaryEmbeddingProgramInfo, rotaryEmbedding; +var init_rotary_embedding = __esm({ + "web/lib/wasm/jsep/webgpu/ops/rotary-embedding.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_attribute_with_cache_key(); + init_common(); + validateInputs22 = (inputs, attributes) => { + const [input, positionIds, cosCache, sinCache] = inputs; + const { numHeads, rotaryEmbeddingDim } = attributes; + if (input.dims.length !== 3 && input.dims.length !== 4) { + throw new Error(`Input 'x' is expected to have 3 or 4 dimensions, got ${input.dims.length}`); + } + if (!ShapeUtil.areEqual(positionIds.dims, []) && !ShapeUtil.areEqual(positionIds.dims, [1]) && positionIds.dims.length !== 2) { + throw new Error(`Input 'position_ids' is expected to have 0, 1, or 2 dimensions, got ${positionIds.dims.length}`); + } + if (cosCache.dims.length !== 2) { + throw new Error(`Input 'cos_cache' is expected to have 2 dimensions, got ${cosCache.dims.length}`); + } + if (sinCache.dims.length !== 2) { + throw new Error(`Input 'sin_cache' is expected to have 2 dimensions, got ${sinCache.dims.length}`); + } + if (!ShapeUtil.areEqual(cosCache.dims, sinCache.dims)) { + throw new Error("Inputs 'cos_cache' and 'sin_cache' are expected to have the same shape"); + } + if (rotaryEmbeddingDim > 0 && numHeads === 0) { + throw new Error("num_heads must be provided if rotary_embedding_dim is specified"); + } + const batchSize = input.dims[0]; + const sequenceLength = input.dims[input.dims.length - 2]; + const maxSequenceLength = cosCache.dims[0]; + const hiddenSize = ShapeUtil.sizeFromDimension(input.dims, 1) / sequenceLength; + const headSize = rotaryEmbeddingDim === 0 ? cosCache.dims[1] * 2 : hiddenSize / numHeads; + if (rotaryEmbeddingDim > headSize) { + throw new Error("rotary_embedding_dim must be less than or equal to head_size"); + } + if (positionIds.dims.length === 2) { + if (batchSize !== positionIds.dims[0]) { + throw new Error(`Input 'position_ids' dimension 0 should be of size batch_size, got ${positionIds.dims[0]}`); + } + if (sequenceLength !== positionIds.dims[1]) { + throw new Error(`Input 'position_ids' dimension 1 should be of size sequence_length, got ${positionIds.dims[1]}`); + } + } + if (headSize / 2 !== cosCache.dims[1] && rotaryEmbeddingDim / 2 !== cosCache.dims[1]) { + throw new Error(`Input 'cos_cache' dimension 1 should be same as head_size / 2 or rotary_embedding_dim / 2, got ${cosCache.dims[1]}`); + } + if (sequenceLength > maxSequenceLength) { + throw new Error("Updating cos_cache and sin_cache in RotaryEmbedding is not currently supported"); + } + }; + createRotaryEmbeddingProgramInfo = (inputs, attributes) => { + const { interleaved, numHeads, rotaryEmbeddingDim, scale } = attributes; + const batchSize = inputs[0].dims[0]; + const batchStride = ShapeUtil.sizeFromDimension(inputs[0].dims, 1); + const sequenceLength = inputs[0].dims[inputs[0].dims.length - 2]; + const hiddenSize = batchStride / sequenceLength; + const halfRotaryEmbeddingDim = inputs[2].dims[1]; + const headSize = rotaryEmbeddingDim === 0 ? halfRotaryEmbeddingDim * 2 : hiddenSize / numHeads; + const globalShape = new Array(batchSize, sequenceLength, hiddenSize / headSize, headSize - halfRotaryEmbeddingDim); + const globalStrides = ShapeUtil.computeStrides(globalShape); + const programUniforms = [ + { type: 1 /* float */, data: scale }, + { type: 12 /* uint32 */, data: globalShape }, + { type: 12 /* uint32 */, data: globalStrides }, + // strides for addressing the input/output tensor, in permutated order to align with the unfolded global index, + // i.e. BSNH + ...inputs[0].dims.length === 3 ? new Array({ type: 12 /* uint32 */, data: [batchStride, hiddenSize, headSize, 1] }) : [], + ...inputs[0].dims.length === 4 ? new Array( + { type: 12 /* uint32 */, data: [batchStride, headSize, sequenceLength * headSize, 1] } + ) : [], + ...createTensorShapeVariables(inputs[0].dims, inputs[1].dims, inputs[2].dims, inputs[3].dims, inputs[0].dims) + ]; + const getShaderSource = (shaderHelper) => { + const input = inputVariable("input", inputs[0].dataType, inputs[0].dims.length); + const positionIds = inputVariable("position_ids", inputs[1].dataType, inputs[1].dims.length); + const cosCache = inputVariable("cos_cache", inputs[2].dataType, inputs[2].dims.length); + const sinCache = inputVariable("sin_cache", inputs[3].dataType, inputs[3].dims.length); + const output = outputVariable("output", inputs[0].dataType, inputs[0].dims.length); + shaderHelper.registerUniforms([ + { name: "scale", type: "f32" }, + { name: "global_shape", type: "u32", length: globalShape.length }, + { name: "global_strides", type: "u32", length: globalStrides.length }, + { name: "input_output_strides", type: "u32", length: globalStrides.length } + ]); + return ` + ${shaderHelper.declareVariables(input, positionIds, cosCache, sinCache, output)} + + ${shaderHelper.mainStart(WORKGROUP_SIZE)} + let half_rotary_emb_dim = uniforms.${cosCache.name}_shape[1]; + let bsnh = global_idx / uniforms.global_strides % uniforms.global_shape; + let size = uniforms.global_shape[0] * uniforms.global_strides[0]; + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("size")} + + if (bsnh[3] < half_rotary_emb_dim) { + let position_ids_idx = + ${positionIds.broadcastedIndicesToOffset("bsnh.xy", outputVariable("", positionIds.type.tensor, 2))}; + let position_id = + u32(${positionIds.getByOffset("position_ids_idx")}) + select(0, bsnh[1], position_ids_idx == 0); + let i = dot(bsnh, uniforms.input_output_strides) + select(0, bsnh[3], ${interleaved}); + let j = i + select(half_rotary_emb_dim, 1, ${interleaved}); + let re = ${input.getByOffset("i")} * ${cosCache.get("position_id", "bsnh[3]")} - + ${input.getByOffset("j")} * ${sinCache.get("position_id", "bsnh[3]")}; + ${output.setByOffset("i", "re")} + let im = ${input.getByOffset("i")} * ${sinCache.get("position_id", "bsnh[3]")} + + ${input.getByOffset("j")} * ${cosCache.get("position_id", "bsnh[3]")}; + ${output.setByOffset("j", "im")} + } else { + let k = dot(bsnh, uniforms.input_output_strides) + half_rotary_emb_dim; + ${output.setByOffset("k", input.getByOffset("k"))} + } + }`; + }; + return { + name: "RotaryEmbedding", + shaderCache: { + hint: createAttributeWithCacheKey({ + interleaved + }).cacheKey, + inputDependencies: ["rank", "rank", "rank", "rank"] + }, + getShaderSource, + getRunData: () => ({ + outputs: [{ dims: inputs[0].dims, dataType: inputs[0].dataType }], + dispatchGroup: { x: Math.ceil(ShapeUtil.size(globalShape) / WORKGROUP_SIZE) }, + programUniforms + }) + }; + }; + rotaryEmbedding = (context, attributes) => { + validateInputs22(context.inputs, attributes); + context.compute(createRotaryEmbeddingProgramInfo(context.inputs, attributes)); + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/skip-layer-norm.ts +var validateInputs23, createSkipLayerNormProgramInfo, skipLayerNorm; +var init_skip_layer_norm = __esm({ + "web/lib/wasm/jsep/webgpu/ops/skip-layer-norm.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_common(); + validateInputs23 = (inputs) => { + if (!inputs || inputs.length < 3) { + throw new Error("layerNorm requires at least 3 inputs."); + } + const input = inputs[0]; + const skip = inputs[1]; + const gamma = inputs[2]; + if (input.dataType !== skip.dataType || input.dataType !== gamma.dataType) { + throw new Error("All inputs must have the same data type"); + } + if (input.dims.length !== 3 && input.dims.length !== 2) { + throw new Error("Input must be 2D or 3D"); + } + if (skip.dims.length !== 3 && skip.dims.length !== 2) { + throw new Error("Skip must be 2D or 3D"); + } + const hiddenSize = input.dims[input.dims.length - 1]; + const sequenceLength = input.dims[input.dims.length - 2]; + if (skip.dims[skip.dims.length - 1] !== hiddenSize) { + throw new Error("Skip must have the same hidden size as input"); + } + if (skip.dims[skip.dims.length - 2] !== sequenceLength) { + throw new Error("Skip must have the same sequence length as input"); + } + if (gamma.dims.length !== 1) { + throw new Error("Gamma must be 1D"); + } + if (gamma.dims[gamma.dims.length - 1] !== hiddenSize) { + throw new Error("Gamma must have the same hidden size as input"); + } + if (inputs.length > 3) { + const beta = inputs[3]; + if (beta.dims.length !== 1) { + throw new Error("Beta must be 1D"); + } + if (beta.dims[beta.dims.length - 1] !== hiddenSize) { + throw new Error("Beta must have the same hidden size as input"); + } + } + if (inputs.length > 4) { + const bias = inputs[4]; + if (bias.dims.length !== 1) { + throw new Error("Bias must be 1D"); + } + if (bias.dims[bias.dims.length - 1] !== hiddenSize) { + throw new Error("Bias must have the same hidden size as input"); + } + } + }; + createSkipLayerNormProgramInfo = (inputs, attributes, outputCount, isTraining) => { + const simplified = attributes.simplified; + const inputShape = inputs[0].dims; + const inputSize = ShapeUtil.size(inputShape); + const outputShape = inputShape; + const outputSize = inputSize; + const hiddenSize = inputShape.slice(-1)[0]; + const meanInvStdDevDim = isTraining ? inputShape.slice(0, -1).concat(1) : []; + const hasBetaInput = !simplified && inputs.length > 3; + const hasBiasInput = inputs.length > 4; + const hasMeanOutput = isTraining && outputCount > 1; + const hasInvStdDevOutput = isTraining && outputCount > 2; + const hasInputSkipBiasSumOutput = outputCount > 3; + const components = getMaxComponents(hiddenSize); + const programUniforms = [ + { type: 12 /* uint32 */, data: outputSize }, + { type: 12 /* uint32 */, data: components }, + { type: 12 /* uint32 */, data: hiddenSize }, + { type: 1 /* float */, data: attributes.epsilon } + ]; + const getShaderSource = (shaderHelper) => { + const uniformsArray = [ + { name: "output_size", type: "u32" }, + { name: "components", type: "u32" }, + { name: "hidden_size", type: "u32" }, + { name: "epsilon", type: "f32" } + ]; + const variables = [ + inputVariable("x", inputs[0].dataType, inputs[0].dims, components), + inputVariable("skip", inputs[1].dataType, inputs[1].dims, components), + inputVariable("gamma", inputs[2].dataType, inputs[2].dims, components) + ]; + if (hasBetaInput) { + variables.push(inputVariable("beta", inputs[3].dataType, inputs[3].dims, components)); + } + if (hasBiasInput) { + variables.push(inputVariable("bias", inputs[4].dataType, inputs[4].dims, components)); + } + variables.push(outputVariable("output", inputs[0].dataType, outputShape, components)); + if (hasMeanOutput) { + variables.push(outputVariable("mean_output", 1 /* float */, meanInvStdDevDim)); + } + if (hasInvStdDevOutput) { + variables.push(outputVariable("inv_std_output", 1 /* float */, meanInvStdDevDim)); + } + if (hasInputSkipBiasSumOutput) { + variables.push(outputVariable("input_skip_bias_sum", inputs[0].dataType, outputShape, components)); + } + const dataType = tensorTypeToWsglStorageType(inputs[0].dataType); + return ` + + ${shaderHelper.registerUniforms(uniformsArray).declareVariables(...variables)} + + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size / uniforms.hidden_size")} + let hidden_size_vectorized: u32 = uniforms.hidden_size / uniforms.components; + let offset = global_idx * hidden_size_vectorized; + var sum = ${fillVector("f32", components)}; + var squareSum = ${fillVector("f32", components)}; + for (var i: u32 = 0; i < hidden_size_vectorized; i++) { + let skip_value = skip[offset + i]; + let bias_value = ${hasBiasInput ? "bias[i]" : dataType + "(0.0)"}; + let input_value = x[offset + i]; + let value = input_value + skip_value + bias_value; + ${hasInputSkipBiasSumOutput ? "input_skip_bias_sum[offset + i] = value;" : ""} + output[offset + i] = value; + let f32_value = ${castToF32(dataType, components, "value")}; + sum += f32_value; + squareSum += f32_value * f32_value; + } + let mean = ${sumVector("sum", components)} / f32(uniforms.hidden_size); + let inv_std_dev = inverseSqrt(${sumVector("squareSum", components)} / f32(uniforms.hidden_size) ${simplified ? "" : "- mean * mean"} + uniforms.epsilon); + ${hasMeanOutput ? "mean_output[global_idx] = mean;" : ""} + ${hasInvStdDevOutput ? "inv_std_output[global_idx] = inv_std_dev;" : ""} + for (var i: u32 = 0; i < hidden_size_vectorized; i++) { + output[offset + i] = (output[offset + i] ${simplified ? "" : `- ${dataType}(mean)`}) * ${dataType}(inv_std_dev) * gamma[i] ${hasBetaInput ? "+ beta[i]" : ""}; + } + }`; + }; + const outputs = [{ dims: outputShape, dataType: inputs[0].dataType }]; + if (outputCount > 1) { + outputs.push({ dims: meanInvStdDevDim, dataType: 1 /* float */ }); + } + if (outputCount > 2) { + outputs.push({ dims: meanInvStdDevDim, dataType: 1 /* float */ }); + } + if (outputCount > 3) { + outputs.push({ dims: inputShape, dataType: inputs[0].dataType }); + } + return { + name: "SkipLayerNormalization", + shaderCache: { + hint: `${components};${hasMeanOutput};${hasInvStdDevOutput};${hasInputSkipBiasSumOutput}`, + inputDependencies: inputs.map((_input, _index) => "type") + }, + getShaderSource, + getRunData: () => ({ outputs, dispatchGroup: { x: Math.ceil(outputSize / hiddenSize / 64) }, programUniforms }) + }; + }; + skipLayerNorm = (context, attributes) => { + const isTraining = false; + validateInputs23(context.inputs); + const outputs = [0]; + if (context.outputCount > 1) { + outputs.push(isTraining ? 1 : -3); + } + if (context.outputCount > 2) { + outputs.push(isTraining ? 2 : -3); + } + if (context.outputCount > 3) { + outputs.push(3); + } + context.compute( + createSkipLayerNormProgramInfo(context.inputs, attributes, context.outputCount, isTraining), + { outputs } + ); + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/slice.ts +var validateInputs24, readInput, createSliceAttributesFromInputs, fixStartEndValues, calculateInputIndicesImpl, createSliceProgramInfo, slice, parseSliceAttributes; +var init_slice = __esm({ + "web/lib/wasm/jsep/webgpu/ops/slice.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_attribute_with_cache_key(); + init_common(); + validateInputs24 = (inputs, attributes) => { + if (!inputs || inputs.length < 1) { + throw new Error("too few inputs"); + } + if (attributes.axes.length !== 0) { + if (attributes.axes.length !== attributes.starts.length || attributes.axes.length !== attributes.ends.length) { + throw new Error("axes, starts and ends must have the same length"); + } + } else if (attributes.starts.length !== attributes.ends.length) { + throw new Error("starts and ends must have the same length"); + } + inputs.slice(1).forEach((_, idx) => { + if (inputs[idx + 1].dataType !== 6 /* int32 */ && inputs[idx + 1].dataType !== 7 /* int64 */) { + throw new Error(`Input ${idx} must be an array of int32 or int64`); + } + }); + }; + readInput = (inputs, idx) => { + const input = []; + if (inputs.length > idx) { + if (inputs[idx].dataType === 7 /* int64 */) { + inputs[idx].getBigInt64Array().forEach((v) => input.push(Number(v))); + } else if (inputs[idx].dataType === 6 /* int32 */) { + inputs[idx].getInt32Array().forEach((v) => input.push(Number(v))); + } else { + throw new Error(`Input ${idx} must be an array of int32 or int64`); + } + } + return input; + }; + createSliceAttributesFromInputs = (inputs, attributes) => { + if (inputs.length > 1) { + const starts = readInput(inputs, 1); + const ends = readInput(inputs, 2); + let axes = readInput(inputs, 3); + if (axes.length === 0) { + axes = [...Array(inputs[0].dims.length).keys()]; + } + return createAttributeWithCacheKey({ starts, ends, axes }); + } else { + return attributes; + } + }; + fixStartEndValues = (value, index, inputShape, axes, steps) => { + let newValue = value; + if (value < 0) { + newValue += inputShape[axes[index]]; + } + if (steps[index] < 0) { + return Math.max(0, Math.min(newValue, inputShape[axes[index]] - 1)); + } else { + return Math.max(0, Math.min(newValue, inputShape[axes[index]])); + } + }; + calculateInputIndicesImpl = (input, output, inputShape) => `fn calculateInputIndices(output_indices: ${output.type.indices}) -> ${input.type.indices} { + var input_indices: ${input.type.indices}; + var carry = 0u; + for (var i = ${inputShape.length}; i >= 0; i--) { + let input_shape_i = ${getElementAt("uniforms.input_shape", "i", inputShape.length)}; + let steps_i = ${getElementAt("uniforms.steps", "i", inputShape.length)}; + let signs_i = ${getElementAt("uniforms.signs", "i", inputShape.length)}; + let starts_i = ${getElementAt("uniforms.starts", "i", inputShape.length)}; + var output_index = ${output.indicesGet("output_indices", "i")}; + var input_index = output_index * steps_i + starts_i + carry; + carry = input_index / input_shape_i; + input_index = input_index % input_shape_i; + if (signs_i < 0) { + input_index = input_shape_i - input_index - 1u + starts_i; + } + ${input.indicesSet("input_indices", "i", "input_index")}; + } + return input_indices; + }`; + createSliceProgramInfo = (inputs, attributes) => { + const inputShape = inputs[0].dims; + const inputSize = ShapeUtil.size(inputShape); + const axes = attributes.axes.length > 0 ? ShapeUtil.normalizeAxes(attributes.axes, inputShape.length) : [...Array(inputShape.length).keys()]; + let steps = readInput(inputs, 4); + steps.forEach((step) => step !== 0 || (() => { + throw new Error("step cannot be 0"); + })); + if (steps.length === 0) { + steps = Array(axes.length).fill(1); + } + const starts = attributes.starts.map((start, i) => fixStartEndValues(start, i, inputShape, axes, steps)); + const ends = attributes.ends.map((end, i) => fixStartEndValues(end, i, inputShape, axes, steps)); + if (axes.length !== starts.length || axes.length !== ends.length) { + throw new Error("start, ends and axes should have the same number of elements"); + } + if (axes.length !== inputShape.length) { + for (let i = 0; i < inputShape.length; ++i) { + if (!axes.includes(i)) { + starts.splice(i, 0, 0); + ends.splice(i, 0, inputShape[i]); + steps.splice(i, 0, 1); + } + } + } + const signs = steps.map((step) => Math.sign(step)); + steps.forEach((step, i, array) => { + if (step < 0) { + const numSteps = (ends[i] - starts[i]) / step; + const newEnd = starts[i]; + const newStart = newEnd + numSteps * steps[i]; + starts[i] = newStart; + ends[i] = newEnd; + array[i] = -step; + } + }); + const outputShape = inputShape.slice(0); + axes.forEach((axis, _) => { + outputShape[axis] = Math.ceil((ends[axis] - starts[axis]) / steps[axis]); + }); + const outputTensorInfo = { dims: outputShape, dataType: inputs[0].dataType }; + const output = outputVariable("output", inputs[0].dataType, outputShape.length); + const input = inputVariable("input", inputs[0].dataType, inputs[0].dims.length); + const outputSize = ShapeUtil.size(outputShape); + const uniforms = [ + { name: "outputSize", type: "u32" }, + { name: "starts", type: "u32", length: starts.length }, + { name: "signs", type: "i32", length: signs.length }, + { name: "steps", type: "u32", length: steps.length } + ]; + const programUniforms = [ + { type: 12 /* uint32 */, data: outputSize }, + { type: 12 /* uint32 */, data: starts }, + { type: 6 /* int32 */, data: signs }, + { type: 12 /* uint32 */, data: steps }, + ...createTensorShapeVariables(inputs[0].dims, outputShape) + ]; + const getShaderSource = (shaderHelper) => ` + ${shaderHelper.registerUniforms(uniforms).declareVariables(input, output)} + ${calculateInputIndicesImpl(input, output, inputShape)} + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")} + let output_indices = ${output.offsetToIndices("global_idx")}; + let input_indices = calculateInputIndices(output_indices); + ${output.setByOffset("global_idx", input.getByIndices("input_indices"))} + }`; + return { + name: "Slice", + shaderCache: { hint: `${signs.length}_${starts.length}_${steps.length}`, inputDependencies: ["rank"] }, + getShaderSource, + getRunData: () => ({ + outputs: [outputTensorInfo], + dispatchGroup: { x: Math.ceil( + inputSize / 64 + /* workgroup size */ + ) }, + programUniforms + }) + }; + }; + slice = (context, attributes) => { + validateInputs24(context.inputs, attributes); + const updatedAttributes = createSliceAttributesFromInputs(context.inputs, attributes); + context.compute(createSliceProgramInfo(context.inputs, updatedAttributes), { inputs: [0] }); + }; + parseSliceAttributes = (attributes) => { + const starts = attributes.starts; + const ends = attributes.ends; + const axes = attributes.axes; + return createAttributeWithCacheKey({ starts, ends, axes }); + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/softmax.ts +var validateInputs25, createSoftmaxProgramInfo, softmax, parseSoftmaxAttributes; +var init_softmax = __esm({ + "web/lib/wasm/jsep/webgpu/ops/softmax.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_attribute_with_cache_key(); + init_common(); + validateInputs25 = (inputs) => { + if (!inputs || inputs.length !== 1) { + throw new Error("Softmax op requires 1 input."); + } + }; + createSoftmaxProgramInfo = (input, attributes) => { + const shape = input.dims; + const outputSize = ShapeUtil.size(shape); + const WG = 64; + let axis = attributes.axis; + if (axis < 0) { + axis = shape.length + axis; + } + if (axis < shape.length - 1) { + throw new Error("softmax only supports last axis for now."); + } + const cols = shape[axis]; + const rows = outputSize / cols; + const components = getMaxComponents(cols); + const packedCols = cols / components; + const maxVector = (name, components2) => { + if (components2 === 4) { + return `max(max(${name}.x, ${name}.y), max(${name}.z, ${name}.w))`; + } else if (components2 === 2) { + return `max(${name}.x, ${name}.y)`; + } else if (components2 === 3) { + return `max(max(${name}.x, ${name}.y), ${name}.z)`; + } + return name; + }; + const x = inputVariable("x", input.dataType, input.dims, components); + const output = outputVariable("result", input.dataType, input.dims, components); + const valueType = x.type.value; + const threadMaxDecl = tensorTypeToWsglStorageType(input.dataType) === "f32" ? `var threadMax = ${valueType}(-3.402823e+38f);` : `var threadMax = ${valueType}(-65504.0h);`; + const getShaderSource = (shaderHelper) => ` + var rowMaxShared : ${valueType}; + var rowSumShared : ${valueType}; + var threadShared : array<${valueType}, ${WG}>; + + fn getValue(row: i32, col: i32, row_stride: i32) -> ${valueType} { + let index = row * row_stride + col; + return x[index]; + } + + fn setValue(row: i32, col: i32, row_stride: i32, value: ${valueType}) { + let index = row * row_stride + col; + result[index] = value; + } + ${shaderHelper.registerUniform("packedCols", "i32").declareVariables(x, output)} + ${shaderHelper.mainStart()} + let gindex = i32(global_idx); + let lindex = i32(local_idx); + const wg = ${WG}; + let row = gindex / wg; + let cols = uniforms.packedCols; + let row_stride : i32 = uniforms.packedCols; + + // find the rows max + ${threadMaxDecl} + for (var col = lindex; col < cols; col += wg) { + let value = getValue(row, col, row_stride); + threadMax = max(threadMax, value); + } + if (lindex < cols) { + threadShared[lindex] = threadMax; + } + workgroupBarrier(); + + var reduceSize = min(cols, wg); + for (var currSize = reduceSize >> 1; currSize > 0; currSize = reduceSize >> 1) { + reduceSize = currSize + (reduceSize & 1); + if (lindex < currSize) { + threadShared[lindex] = max(threadShared[lindex], threadShared[lindex + reduceSize]); + } + workgroupBarrier(); + } + if (lindex == 0) { + rowMaxShared = ${valueType}(${maxVector("threadShared[0]", components)}); + } + workgroupBarrier(); + + // find the rows sum + var threadSum = ${valueType}(0.0); + for (var col = lindex; col < cols; col += wg) { + let subExp = exp(getValue(row, col, row_stride) - rowMaxShared); + threadSum += subExp; + } + threadShared[lindex] = threadSum; + workgroupBarrier(); + + for (var currSize = wg >> 1; currSize > 0; currSize = currSize >> 1) { + if (lindex < currSize) { + threadShared[lindex] = threadShared[lindex] + threadShared[lindex + currSize]; + } + workgroupBarrier(); + } + if (lindex == 0) { + rowSumShared = ${valueType}(${sumVector("threadShared[0]", components)}); + } + workgroupBarrier(); + + // calculate final value for each element in the row + for (var col = lindex; col < cols; col += wg) { + let value = exp(getValue(row, col, row_stride) - rowMaxShared) / rowSumShared; + setValue(row, col, row_stride, value); + } + }`; + return { + name: "Softmax", + shaderCache: { hint: `${components}`, inputDependencies: ["type"] }, + getRunData: () => ({ + outputs: [{ dims: shape, dataType: input.dataType }], + dispatchGroup: { x: rows }, + programUniforms: [{ type: 6 /* int32 */, data: packedCols }] + }), + getShaderSource + }; + }; + softmax = (context, attributes) => { + validateInputs25(context.inputs); + context.compute(createSoftmaxProgramInfo(context.inputs[0], attributes)); + }; + parseSoftmaxAttributes = (attributes) => createAttributeWithCacheKey({ axis: attributes.axis }); + } +}); + +// web/lib/wasm/jsep/webgpu/ops/split.ts +var validateInputs26, createSplitAttributesFromInputs, calculateOutputIndexImpl, writeBufferDataImpl, createSplitProgramInfo, split, parseSplitAttributes; +var init_split = __esm({ + "web/lib/wasm/jsep/webgpu/ops/split.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_attribute_with_cache_key(); + init_common(); + validateInputs26 = (inputs) => { + if (!inputs || inputs.length < 1) { + throw new Error("too few inputs"); + } + }; + createSplitAttributesFromInputs = (inputs, attributes) => { + const splitSizes = []; + let numOutputs = attributes.numOutputs; + if (inputs[1].dims[0] > 0) { + inputs[1].getBigInt64Array().forEach((v) => splitSizes.push(Number(v))); + numOutputs = splitSizes.length; + } + return createAttributeWithCacheKey({ numOutputs, axis: attributes.axis, splitSizes }); + }; + calculateOutputIndexImpl = (numberOfTensors) => ` +fn calculateOutputIndex(index: u32) -> u32 { + for (var i: u32 = 0u; i < ${numberOfTensors}u; i += 1u ) { + if (index < ${getElementAt("uniforms.size_in_split_axis", "i", numberOfTensors)}) { + return i; + } + } + return ${numberOfTensors}u; +}`; + writeBufferDataImpl = (outputs) => { + const numberOfTensors = outputs.length; + const codeLines = []; + for (let i = 0; i < numberOfTensors; ++i) { + const returnSnippet = outputs[i].setByIndices("indices", "input[global_idx]"); + if (numberOfTensors === 1) { + codeLines.push(returnSnippet); + } else if (i === 0) { + codeLines.push(`if (output_number == ${i}u) { ${returnSnippet} }`); + } else if (i === numberOfTensors - 1) { + codeLines.push(`else { ${returnSnippet} }`); + } else { + codeLines.push(`else if (output_number == ${i}) { ${returnSnippet} }`); + } + } + return ` + fn writeBufferData(output_number: u32, indices: ${outputs[0].type.indices}, global_idx: u32) { + ${codeLines.join("\n")} + }`; + }; + createSplitProgramInfo = (inputs, attributes) => { + const inputShape = inputs[0].dims; + const inputSize = ShapeUtil.size(inputShape); + const dataType = inputs[0].dataType; + const axis = ShapeUtil.normalizeAxis(attributes.axis, inputShape.length); + const outputs = new Array(attributes.numOutputs); + const input = inputVariable("input", dataType, inputShape.length); + const sizeInSplitAxis = new Array(attributes.numOutputs); + const outputsTensorInfo = []; + const outputShapes = []; + let previousSum = 0; + const programUniforms = [{ type: 12 /* uint32 */, data: inputSize }]; + for (let i = 0; i < attributes.numOutputs; i++) { + previousSum += attributes.splitSizes[i]; + sizeInSplitAxis[i] = previousSum; + const outputShape = inputShape.slice(); + outputShape[attributes.axis] = attributes.splitSizes[i]; + outputShapes.push(outputShape); + outputs[i] = outputVariable(`output${i}`, dataType, outputShape.length); + outputsTensorInfo.push({ dims: outputShapes[i], dataType: inputs[0].dataType }); + } + programUniforms.push( + { type: 12 /* uint32 */, data: sizeInSplitAxis }, + ...createTensorShapeVariables(inputShape, ...outputShapes) + ); + const getShaderSource = (shaderHelper) => ` + ${shaderHelper.registerUniform("input_size", "u32").registerUniform("size_in_split_axis", "u32", sizeInSplitAxis.length).declareVariables(input, ...outputs)} + ${calculateOutputIndexImpl(sizeInSplitAxis.length)} + ${writeBufferDataImpl(outputs)} + + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.input_size")} + + var indices = ${input.offsetToIndices("global_idx")}; + var index = ${input.indicesGet("indices", axis)}; + let output_number = calculateOutputIndex(index); + if (output_number != 0) { + index -= ${getElementAt("uniforms.size_in_split_axis", "output_number - 1u", sizeInSplitAxis.length)}; + ${input.indicesSet("indices", axis, "index")}; + } + writeBufferData(output_number, indices, global_idx); + }`; + return { + name: "Split", + shaderCache: { hint: attributes.cacheKey, inputDependencies: ["rank"] }, + getShaderSource, + getRunData: () => ({ + outputs: outputsTensorInfo, + dispatchGroup: { x: Math.ceil( + inputSize / 64 + /* workgroup size */ + ) }, + programUniforms + }) + }; + }; + split = (context, attributes) => { + validateInputs26(context.inputs); + const updatedAttributes = context.inputs.length === 1 ? attributes : createSplitAttributesFromInputs(context.inputs, attributes); + context.compute(createSplitProgramInfo(context.inputs, updatedAttributes), { inputs: [0] }); + }; + parseSplitAttributes = (attributes) => { + const axis = attributes.axis; + const splitSizes = attributes.splitSizes; + const numOutputs = attributes.numOutputs < 0 ? splitSizes.length : attributes.numOutputs; + if (numOutputs !== splitSizes.length) { + throw new Error("numOutputs and splitSizes lengh must be equal"); + } + return createAttributeWithCacheKey({ axis, numOutputs, splitSizes }); + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/tile.ts +var getRepeats, validateInputs27, getOutputShape2, createTileProgramInfo, tile; +var init_tile = __esm({ + "web/lib/wasm/jsep/webgpu/ops/tile.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_common(); + getRepeats = (repeatsTensorView) => Array.from(repeatsTensorView.getBigInt64Array(), Number); + validateInputs27 = (inputs) => { + if (!inputs || inputs.length !== 2) { + throw new Error("Tile requires 2 inputs."); + } + if (inputs[0].dataType !== 1 /* float */ && inputs[0].dataType !== 6 /* int32 */ && inputs[0].dataType !== 12 /* uint32 */) { + throw new Error("Tile only support float, int32, and uint32 data types"); + } + if (inputs[1].dataType !== 7 /* int64 */) { + throw new Error("Tile `repeats` input should be of int64 data type"); + } + if (inputs[1].dims.length !== 1) { + throw new Error("Tile `repeats` input should be 1-D"); + } + const repeats = getRepeats(inputs[1]); + if (repeats.length !== inputs[0].dims.length) { + throw new Error("Tile `repeats` input should have same number of elements as rank of input data tensor"); + } + }; + getOutputShape2 = (inputShape, repeats) => { + const outputShape = []; + for (let i = 0; i < inputShape.length; ++i) { + outputShape.push(inputShape[i] * repeats[i]); + } + return outputShape; + }; + createTileProgramInfo = (inputs) => { + const inputShape = inputs[0].dims; + const repeats = getRepeats(inputs[1]); + const outputShape = getOutputShape2(inputShape, repeats); + const outputSize = ShapeUtil.size(outputShape); + const dataType = inputs[0].dataType; + const input = inputVariable("input", dataType, inputShape.length); + const output = outputVariable("output", dataType, outputShape.length); + const getShaderSource = (shaderHelper) => ` + const inputShape = ${input.indices(...inputShape)}; + ${shaderHelper.registerUniform("output_size", "u32").declareVariables(input, output)} + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")} + let output_indices = ${output.offsetToIndices("global_idx")}; + var input_indices: ${input.type.indices}; + for (var i = 0; i < ${inputShape.length}; i++) { + let input_dim_i = ${input.indicesGet("uniforms.input_shape", "i")}; + let input_dim_value = ${output.indicesGet("output_indices", "i")} % input_dim_i; + + ${input.indicesSet("input_indices", "i", "input_dim_value")} + } + ${output.setByOffset("global_idx", input.getByIndices("input_indices"))} + }`; + return { + name: "Tile", + shaderCache: { hint: `${repeats}`, inputDependencies: ["rank"] }, + getRunData: () => ({ + outputs: [{ dims: outputShape, dataType: inputs[0].dataType }], + dispatchGroup: { x: Math.ceil( + outputSize / 64 + /* workgroup size */ + ) }, + programUniforms: [{ type: 12 /* uint32 */, data: outputSize }, ...createTensorShapeVariables(inputs[0].dims, outputShape)] + }), + getShaderSource + }; + }; + tile = (context) => { + validateInputs27(context.inputs); + context.compute(createTileProgramInfo(context.inputs), { inputs: [0] }); + }; + } +}); + +// web/lib/wasm/jsep/webgpu/ops/where.ts +var createWhereOpProgramShader, createWhereOpProgramInfo, where; +var init_where = __esm({ + "web/lib/wasm/jsep/webgpu/ops/where.ts"() { + "use strict"; + init_wasm_common(); + init_util(); + init_common(); + createWhereOpProgramShader = (shaderHelper, inputs, dimsOutput, isBroadcast, typeOutput) => { + const output = outputVariable("output_data", typeOutput, dimsOutput.length, 4); + const a = inputVariable("a_data", inputs[1].dataType, inputs[1].dims.length, 4); + const b = inputVariable("b_data", inputs[2].dataType, inputs[2].dims.length, 4); + const c = inputVariable("c_data", inputs[0].dataType, inputs[0].dims.length, 4); + let assignment; + const expression = (a2, b2, c2) => `select(${b2}, ${a2}, ${c2})`; + if (!isBroadcast) { + assignment = output.setByOffset( + "global_idx", + expression(a.getByOffset("global_idx"), b.getByOffset("global_idx"), c.getByOffset("global_idx")) + ); + } else { + const singleAssignment = (resStr, x, typeCast = "") => { + const expressionA = `a_data[index_a${x}][component_a${x}]`; + const expressionB = `b_data[index_b${x}][component_b${x}]`; + const expressionC = `bool(c_data[index_c${x}] & (0xffu << (component_c${x} * 8)))`; + return ` + let output_indices${x} = ${output.offsetToIndices(`global_idx * 4u + ${x}u`)}; + let offset_a${x} = ${a.broadcastedIndicesToOffset(`output_indices${x}`, output)}; + let offset_b${x} = ${b.broadcastedIndicesToOffset(`output_indices${x}`, output)}; + let offset_c${x} = ${c.broadcastedIndicesToOffset(`output_indices${x}`, output)}; + let index_a${x} = offset_a${x} / 4u; + let index_b${x} = offset_b${x} / 4u; + let index_c${x} = offset_c${x} / 4u; + let component_a${x} = offset_a${x} % 4u; + let component_b${x} = offset_b${x} % 4u; + let component_c${x} = offset_c${x} % 4u; + ${resStr}[${x}] = ${typeCast}(${expression(expressionA, expressionB, expressionC)}); + `; + }; + if (typeOutput === 9 /* bool */) { + assignment = ` + var data = vec4(0); + ${singleAssignment("data", 0, "u32")} + ${singleAssignment("data", 1, "u32")} + ${singleAssignment("data", 2, "u32")} + ${singleAssignment("data", 3, "u32")} + output_data[global_idx] = dot(vec4(0x1, 0x100, 0x10000, 0x1000000), vec4(data));`; + } else { + assignment = ` + ${singleAssignment("output_data[global_idx]", 0)} + ${singleAssignment("output_data[global_idx]", 1)} + ${singleAssignment("output_data[global_idx]", 2)} + ${singleAssignment("output_data[global_idx]", 3)} + `; + } + } + return ` + ${shaderHelper.registerUniform("vec_size", "u32").declareVariables(c, a, b, output)} + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.vec_size")} + ${assignment} + }`; + }; + createWhereOpProgramInfo = (inputs) => { + const dimsA = inputs[1].dims; + const dimsB = inputs[2].dims; + const dimsC = inputs[0].dims; + const outputDataType = inputs[1].dataType; + const isBroadcast = !(ShapeUtil.areEqual(dimsA, dimsB) && ShapeUtil.areEqual(dimsB, dimsC)); + let outputShape = dimsA; + let outputSize = ShapeUtil.size(dimsA); + if (isBroadcast) { + const calculatedShape = BroadcastUtil.calcShape(BroadcastUtil.calcShape(dimsA, dimsB, false), dimsC, false); + if (!calculatedShape) { + throw new Error("Can't perform where op on the given tensors"); + } + outputShape = calculatedShape; + outputSize = ShapeUtil.size(outputShape); + } + const vecSize = Math.ceil(outputSize / 4); + return { + name: "Where", + shaderCache: { inputDependencies: ["rank", "rank", "rank"] }, + getShaderSource: (shaderHelper) => createWhereOpProgramShader(shaderHelper, inputs, outputShape, isBroadcast, outputDataType), + getRunData: () => ({ + outputs: [{ dims: outputShape, dataType: outputDataType }], + dispatchGroup: { x: Math.ceil( + outputSize / 64 / 4 + /* vec size */ + ) }, + programUniforms: [{ type: 12 /* uint32 */, data: vecSize }, ...createTensorShapeVariables(dimsC, dimsA, dimsB, outputShape)] + }) + }; + }; + where = (context) => { + context.compute(createWhereOpProgramInfo(context.inputs)); + }; + } +}); + +// web/lib/wasm/jsep/webgpu/op-resolve-rules.ts +var WEBGPU_OP_RESOLVE_RULES; +var init_op_resolve_rules = __esm({ + "web/lib/wasm/jsep/webgpu/op-resolve-rules.ts"() { + "use strict"; + init_argminmax(); + init_attention(); + init_batch_norm(); + init_bias_add(); + init_bias_split_gelu(); + init_binary_op(); + init_concat(); + init_conv(); + init_conv_transpose(); + init_cumsum(); + init_depth_to_space(); + init_einsum(); + init_expand(); + init_fast_gelu(); + init_gather(); + init_gather_elements(); + init_gemm(); + init_instance_norm(); + init_layer_norm(); + init_matmul(); + init_matmulnbits(); + init_multihead_attentiion(); + init_pad(); + init_pool(); + init_range(); + init_reduce(); + init_resize(); + init_rotary_embedding(); + init_skip_layer_norm(); + init_slice(); + init_softmax(); + init_split(); + init_tile(); + init_transpose(); + init_unary_op(); + init_where(); + WEBGPU_OP_RESOLVE_RULES = /* @__PURE__ */ new Map([ + ["Abs", [abs]], + ["Acos", [acos]], + ["Acosh", [acosh]], + ["Add", [add]], + ["ArgMax", [argMax, parseArgMinMaxAttributes]], + ["ArgMin", [argMin, parseArgMinMaxAttributes]], + ["Asin", [asin]], + ["Asinh", [asinh]], + ["Atan", [atan]], + ["Atanh", [atanh]], + ["Attention", [attention]], + // TODO: support new attributes for AveragePool-10 + ["AveragePool", [averagePool, parseAveragePoolAttributes]], + ["BatchNormalization", [batchNorm]], + ["BiasAdd", [biasAdd]], + ["BiasSplitGelu", [biasSplitGelu]], + ["Cast", [cast, parseCastAttributes]], + ["Ceil", [ceil]], + ["Clip", [clip]], + ["Concat", [concat, parseConcatAttributes]], + ["Conv", [conv, parseConvAttributes]], + ["ConvTranspose", [convTranspose, parseConvTransposeAttributes]], + ["Cos", [cos]], + ["Cosh", [cosh]], + ["CumSum", [cumsum, parseCumSumAttributes]], + ["DepthToSpace", [depthToSpace, parseDepthToSpaceAttributes]], + ["Div", [div]], + ["Einsum", [einsum, parseEinsumAttributes]], + ["Elu", [elu, parseAlphaAttributes]], + ["Equal", [equal]], + ["Erf", [erf]], + ["Exp", [exp]], + ["Expand", [expand]], + ["FastGelu", [fastGelu2]], + ["Floor", [floor]], + ["FusedConv", [conv, parseConvAttributes]], + ["Gather", [gather, parseGatherAttributes]], + ["GatherElements", [gatherElements, parseGatherElementsAttributes]], + ["Gelu", [gelu]], + ["Gemm", [gemm, parseGemmAttributes]], + ["GlobalAveragePool", [globalAveragePool, parseGlobalAveragePoolAttributes]], + ["GlobalMaxPool", [globalMaxPool, parseGlobalMaxPoolAttributes]], + ["Greater", [greater]], + ["GreaterOrEqual", [greaterOrEqual]], + ["HardSigmoid", [hardSigmoid, parseHardSigmoidAttributes]], + ["InstanceNormalization", [instanceNorm]], + ["LayerNormalization", [layerNorm]], + ["LeakyRelu", [leakyRelu, parseAlphaAttributes]], + ["Less", [less]], + ["LessOrEqual", [lessOrEqual]], + ["Log", [log]], + ["MatMul", [matMul]], + ["MatMulNBits", [matMulNBits, parseMatMulNBitsAttributes]], + // TODO: support new attributes for MaxPool-8 and MaxPool-10 + ["MaxPool", [maxPool, parseMaxPoolAttributes]], + ["Mul", [mul]], + ["MultiHeadAttention", [multiHeadAttention, parseMultiHeadAttentionAttributes]], + ["Neg", [neg]], + ["Not", [not]], + ["Pad", [pad]], + ["Pow", [pow]], + ["Range", [range]], + ["Reciprocal", [reciprocal]], + ["ReduceMin", [reduceMin]], + ["ReduceMean", [reduceMean]], + ["ReduceMax", [reduceMax]], + ["ReduceSum", [reduceSum]], + ["ReduceProd", [reduceProd]], + ["ReduceL1", [reduceL1]], + ["ReduceL2", [reduceL2]], + ["ReduceLogSum", [reduceLogSum]], + ["ReduceLogSumExp", [reduceLogSumExp]], + ["ReduceSumSquare", [reduceSumSquare]], + ["Relu", [relu]], + ["Resize", [resize, parseResizeAttributes]], + ["RotaryEmbedding", [rotaryEmbedding]], + ["Sigmoid", [sigmoid]], + ["Sin", [sin]], + ["Sinh", [sinh]], + ["Slice", [slice, parseSliceAttributes]], + ["SkipLayerNormalization", [skipLayerNorm]], + ["Split", [split, parseSplitAttributes]], + ["Sqrt", [sqrt]], + ["Softmax", [softmax, parseSoftmaxAttributes]], + ["Sub", [sub]], + ["Tan", [tan]], + ["Tanh", [tanh]], + ["ThresholdedRelu", [thresholdedRelu, parseAlphaAttributes]], + ["Tile", [tile]], + ["Transpose", [transpose, parseTransposeAttributes]], + ["Where", [where]] + ]); + } +}); + +// web/lib/wasm/jsep/webgpu/program-manager.ts +var ProgramManager; +var init_program_manager = __esm({ + "web/lib/wasm/jsep/webgpu/program-manager.ts"() { + "use strict"; + init_esm(); + init_log(); + init_common(); + ProgramManager = class { + constructor(backend) { + this.backend = backend; + this.repo = /* @__PURE__ */ new Map(); + this.attributesBound = false; + } + getArtifact(key) { + return this.repo.get(key); + } + setArtifact(key, artifact) { + this.repo.set(key, artifact); + } + run(buildArtifact, inputs, outputs, dispatchGroup, uniformBufferBinding) { + TRACE_FUNC_BEGIN(buildArtifact.programInfo.name); + const device = this.backend.device; + const computePassEncoder = this.backend.getComputePassEncoder(); + this.backend.writeTimestamp(this.backend.pendingDispatchNumber * 2); + const entries = []; + for (const input of inputs) { + entries.push({ binding: entries.length, resource: { buffer: input.buffer } }); + } + for (const output of outputs) { + entries.push({ binding: entries.length, resource: { buffer: output.buffer } }); + } + if (uniformBufferBinding) { + entries.push({ binding: entries.length, resource: uniformBufferBinding }); + } + const bindGroup = device.createBindGroup( + { layout: buildArtifact.computePipeline.getBindGroupLayout(0), entries, label: buildArtifact.programInfo.name } + ); + if (this.backend.sessionStatus === "capturing") { + const commandInfo = { + kernelId: this.backend.currentKernelId, + computePipeline: buildArtifact.computePipeline, + bindGroup, + dispatchGroup + }; + const sessionCommandList = this.backend.capturedCommandList.get(this.backend.currentSessionId); + sessionCommandList.push(commandInfo); + } + computePassEncoder.setPipeline(buildArtifact.computePipeline); + computePassEncoder.setBindGroup(0, bindGroup); + computePassEncoder.dispatchWorkgroups(...dispatchGroup); + this.backend.writeTimestamp(this.backend.pendingDispatchNumber * 2 + 1); + this.backend.pendingDispatchNumber++; + if (this.backend.pendingDispatchNumber >= this.backend.maxDispatchNumber || this.backend.queryType === "at-passes") { + this.backend.endComputePass(); + } + if (this.backend.pendingDispatchNumber >= this.backend.maxDispatchNumber) { + this.backend.flush(); + } + TRACE_FUNC_END(buildArtifact.programInfo.name); + } + dispose() { + } + build(programInfo, normalizedDispatchGroupSize) { + TRACE_FUNC_BEGIN(programInfo.name); + const device = this.backend.device; + const extensions = []; + if (device.features.has("shader-f16")) { + extensions.push("enable f16;"); + } + const shaderHelper = createShaderHelper(normalizedDispatchGroupSize, this.backend.device.limits); + const userCode = programInfo.getShaderSource(shaderHelper); + const code = `${extensions.join("\n")} +${shaderHelper.additionalImplementations} +${userCode}`; + const shaderModule = device.createShaderModule({ code, label: programInfo.name }); + LOG_DEBUG("verbose", () => `[WebGPU] ${programInfo.name} shader code: ${code}`); + const computePipeline = device.createComputePipeline( + { compute: { module: shaderModule, entryPoint: "main" }, layout: "auto", label: programInfo.name } + ); + TRACE_FUNC_END(programInfo.name); + return { programInfo, computePipeline, uniformVariablesInfo: shaderHelper.variablesInfo }; + } + normalizeDispatchGroupSize(dispatchGroup) { + const x = typeof dispatchGroup === "number" ? dispatchGroup : dispatchGroup.x; + const y = typeof dispatchGroup === "number" ? 1 : dispatchGroup.y || 1; + const z = typeof dispatchGroup === "number" ? 1 : dispatchGroup.z || 1; + const limitPerDimension = this.backend.device.limits.maxComputeWorkgroupsPerDimension; + if (x <= limitPerDimension && y <= limitPerDimension && z <= limitPerDimension) { + return [x, y, z]; + } + const size = x * y * z; + let dispatchAverage = Math.ceil(Math.sqrt(size)); + if (dispatchAverage > limitPerDimension) { + dispatchAverage = Math.ceil(Math.cbrt(size)); + if (dispatchAverage > limitPerDimension) { + throw new Error("Total dispatch size exceeds WebGPU maximum."); + } + return [dispatchAverage, dispatchAverage, dispatchAverage]; + } else { + return [dispatchAverage, dispatchAverage, 1]; + } + } + }; + } +}); + +// web/lib/wasm/jsep/backend-webgpu.ts +var getProgramInputTensorInfoDependencyKey, getProgramInfoUniqueKey, AdapterInfoImpl, WebGpuBackend; +var init_backend_webgpu = __esm({ + "web/lib/wasm/jsep/backend-webgpu.ts"() { + "use strict"; + init_esm(); + init_wasm_common(); + init_log(); + init_tensor_view(); + init_gpu_data_manager(); + init_op_resolve_rules(); + init_program_manager(); + getProgramInputTensorInfoDependencyKey = (inputTensors, inputDependencies) => { + if (inputDependencies.length !== inputTensors.length) { + throw new Error(`inputDependencies length ${inputDependencies.length} is not equal to inputTensors length ${inputTensors.length}.`); + } + const inputInfos = []; + for (let i = 0; i < inputTensors.length; ++i) { + const type = inputTensors[i].dataType; + switch (inputDependencies[i]) { + case "none": { + inputInfos.push(""); + break; + } + case "type": { + inputInfos.push(`${type}`); + break; + } + case "rank": { + const rank = inputTensors[i].dims.length; + inputInfos.push(`${type};${rank}`); + break; + } + case "dims": { + const dims = inputTensors[i].dims.join(","); + inputInfos.push(`${type};${dims}`); + break; + } + default: + throw new Error(`unsupported input dependency: ${inputDependencies[i]}`); + } + } + return inputInfos.join("|"); + }; + getProgramInfoUniqueKey = (programInfo, inputTensors, is1DimensionDispatch) => { + let key = programInfo.name; + if (programInfo.shaderCache?.hint) { + key += "[" + programInfo.shaderCache.hint + "]"; + } + key += ":" + is1DimensionDispatch + `:${getProgramInputTensorInfoDependencyKey( + inputTensors, + programInfo.shaderCache?.inputDependencies ?? new Array(inputTensors.length).fill("dims") + )}`; + return key; + }; + AdapterInfoImpl = class { + constructor(adapterInfo) { + if (adapterInfo) { + this.architecture = adapterInfo.architecture; + this.vendor = adapterInfo.vendor; + } + } + isArchitecture(architecture) { + return this.architecture === architecture; + } + isVendor(vendor) { + return this.vendor === vendor; + } + }; + WebGpuBackend = class { + constructor() { + /** + * representing the session ID of which is currently being run. + * `null` means no session is being run. + * only valid when session.run is executed. + */ + this.currentSessionId = null; + /** + * representing the kernel ID of which is currently being computed (CPU code perspective). + * `null` means no kernel is being computed. + * only one kernel can be computed at a moment. + */ + this.currentKernelId = null; + this.commandEncoder = null; + this.computePassEncoder = null; + this.maxDispatchNumber = 16; + this.pendingDispatchNumber = 0; + // info of kernels pending submission for a single batch + this.pendingKernels = []; + // queryReadBuffer -> pendingKernels mapping for all the batches + this.pendingQueries = /* @__PURE__ */ new Map(); + this.sessionStatus = "default"; + /** + * a SessionID -> CommandInfo[] mapping. It's used to record all GPU commands for corresponding session. + */ + this.capturedCommandList = /* @__PURE__ */ new Map(); + /** + * a SessionID -> PendingKernelInfo[] mapping for profiling. + */ + this.capturedPendingKernels = /* @__PURE__ */ new Map(); + /** + * a SessionID -> a Map of (InputOutputIndex -> [ID, GPUBuffer]) mapping. + */ + this.sessionExternalDataMapping = /* @__PURE__ */ new Map(); + } + /** + * get the custom data of the current kernel + */ + get currentKernelCustomData() { + if (this.currentKernelId === null) { + throw new Error("currentKernelCustomData(): currentKernelId is null. (should not happen)"); + } + let data = this.kernelCustomData.get(this.currentKernelId); + if (!data) { + data = {}; + this.kernelCustomData.set(this.currentKernelId, data); + } + return data; + } + async initialize(env3, adapter) { + this.env = env3; + const requiredFeatures = []; + const deviceDescriptor = { + requiredLimits: { + maxComputeWorkgroupStorageSize: adapter.limits.maxComputeWorkgroupStorageSize, + maxComputeWorkgroupsPerDimension: adapter.limits.maxComputeWorkgroupsPerDimension, + maxStorageBufferBindingSize: adapter.limits.maxStorageBufferBindingSize, + maxBufferSize: adapter.limits.maxBufferSize, + maxComputeInvocationsPerWorkgroup: adapter.limits.maxComputeInvocationsPerWorkgroup, + maxComputeWorkgroupSizeX: adapter.limits.maxComputeWorkgroupSizeX, + maxComputeWorkgroupSizeY: adapter.limits.maxComputeWorkgroupSizeY, + maxComputeWorkgroupSizeZ: adapter.limits.maxComputeWorkgroupSizeZ + }, + requiredFeatures + }; + if (adapter.features.has("chromium-experimental-timestamp-query-inside-passes")) { + requiredFeatures.push("chromium-experimental-timestamp-query-inside-passes"); + } else if (adapter.features.has("timestamp-query")) { + requiredFeatures.push("timestamp-query"); + } + if (adapter.features.has("shader-f16")) { + requiredFeatures.push("shader-f16"); + } + this.device = await adapter.requestDevice(deviceDescriptor); + this.adapterInfo = new AdapterInfoImpl(await adapter.requestAdapterInfo()); + this.gpuDataManager = createGpuDataManager(this); + this.programManager = new ProgramManager(this); + this.kernels = /* @__PURE__ */ new Map(); + this.kernelPersistentData = /* @__PURE__ */ new Map(); + this.kernelCustomData = /* @__PURE__ */ new Map(); + configureLogger(env3.logLevel, !!env3.debug); + this.device.onuncapturederror = (ev) => { + if (ev.error instanceof GPUValidationError) { + console.error(`An uncaught WebGPU validation error was raised: ${ev.error.message}`); + } + }; + Object.defineProperty( + this.env.webgpu, + "device", + { value: this.device, writable: false, enumerable: true, configurable: false } + ); + Object.defineProperty( + this.env.webgpu, + "adapter", + { value: adapter, writable: false, enumerable: true, configurable: false } + ); + this.setQueryType(); + } + dispose() { + if (typeof this.querySet !== "undefined") { + this.querySet.destroy(); + } + this.gpuDataManager.dispose(); + } + getCommandEncoder() { + if (!this.commandEncoder) { + this.commandEncoder = this.device.createCommandEncoder(); + } + return this.commandEncoder; + } + getComputePassEncoder() { + if (!this.computePassEncoder) { + const commandEncoder = this.getCommandEncoder(); + const computePassDescriptor = {}; + if (this.queryType === "at-passes") { + computePassDescriptor.timestampWrites = { + querySet: this.querySet, + beginningOfPassWriteIndex: this.pendingDispatchNumber * 2, + endOfPassWriteIndex: this.pendingDispatchNumber * 2 + 1 + }; + } + this.computePassEncoder = commandEncoder.beginComputePass(computePassDescriptor); + } + return this.computePassEncoder; + } + endComputePass() { + if (this.computePassEncoder) { + this.computePassEncoder.end(); + this.computePassEncoder = null; + } + } + flush() { + if (!this.commandEncoder) { + return; + } + TRACE_FUNC_BEGIN(); + this.endComputePass(); + let queryReadBuffer; + if (this.queryType !== "none") { + this.commandEncoder.resolveQuerySet( + this.querySet, + 0, + this.pendingDispatchNumber * 2, + this.queryResolveBuffer, + 0 + ); + queryReadBuffer = this.device.createBuffer( + // eslint-disable-next-line no-bitwise + { size: this.pendingDispatchNumber * 2 * 8, usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST } + ); + this.pendingQueries.set(queryReadBuffer, this.pendingKernels); + this.pendingKernels = []; + this.commandEncoder.copyBufferToBuffer( + this.queryResolveBuffer, + 0, + queryReadBuffer, + 0, + this.pendingDispatchNumber * 2 * 8 + ); + } + this.device.queue.submit([this.commandEncoder.finish()]); + this.gpuDataManager.refreshPendingBuffers(); + this.commandEncoder = null; + this.pendingDispatchNumber = 0; + if (this.queryType !== "none") { + void queryReadBuffer.mapAsync(GPUMapMode.READ).then(() => { + const mappedData = new BigUint64Array(queryReadBuffer.getMappedRange()); + const pendingKernels = this.pendingQueries.get(queryReadBuffer); + for (let i = 0; i < mappedData.length / 2; i++) { + const pendingKernelInfo = pendingKernels[i]; + const kernelId = pendingKernelInfo.kernelId; + const kernelInfo = this.kernels.get(kernelId); + const kernelType = kernelInfo.kernelType; + const kernelName = kernelInfo.kernelName; + const programName = pendingKernelInfo.programName; + const inputTensorViews = pendingKernelInfo.inputTensorViews; + const outputTensorViews = pendingKernelInfo.outputTensorViews; + const startTimeU64 = mappedData[i * 2]; + const endTimeU64 = mappedData[i * 2 + 1]; + if (typeof this.queryTimeBase === "undefined") { + this.queryTimeBase = startTimeU64; + } + const startTime = Number(startTimeU64 - this.queryTimeBase); + const endTime = Number(endTimeU64 - this.queryTimeBase); + if (!Number.isSafeInteger(startTime) || !Number.isSafeInteger(endTime)) { + throw new RangeError("incorrect timestamp range"); + } + if (this.env.webgpu.profiling?.ondata) { + this.env.webgpu.profiling.ondata({ + version: 1, + inputsMetadata: inputTensorViews.map( + (value) => ({ dims: value.dims, dataType: tensorDataTypeEnumToString(value.dataType) }) + ), + outputsMetadata: outputTensorViews.map( + (value) => ({ dims: value.dims, dataType: tensorDataTypeEnumToString(value.dataType) }) + ), + kernelId, + kernelType, + kernelName, + programName, + startTime, + endTime + }); + } else { + let inputShapes = ""; + inputTensorViews.forEach((value, i2) => { + inputShapes += `input[${i2}]: [${value.dims}] | ${tensorDataTypeEnumToString(value.dataType)}, `; + }); + let outputShapes = ""; + outputTensorViews.forEach((value, i2) => { + outputShapes += `output[${i2}]: [${value.dims}] | ${tensorDataTypeEnumToString(value.dataType)}, `; + }); + console.log(`[profiling] kernel "${kernelId}|${kernelType}|${kernelName}|${programName}" ${inputShapes}${outputShapes}execution time: ${endTime - startTime} ns`); + } + TRACE("GPU", `${programName}::${startTimeU64}::${endTimeU64}`); + } + queryReadBuffer.unmap(); + this.pendingQueries.delete(queryReadBuffer); + }); + } + TRACE_FUNC_END(); + } + /** + * run a WebGPU program. + * @param program a ProgramInfo instance + * @param inputTensorViews a TensorView array. each element represents a value already exists in GPU. + * @param outputIndices an indices array. each element can be either -1 (temporary data), -2 (persistent data) or an + * index to the kernel's output. + * @param createKernelOutput a callback function that create a value to kernel's output with the given index + * @param createIntermediateOutput a callback function that create a value as a intermediate value, either temporary + * or persistent (owned by the current kernel) + * @returns a TensorView array representing the result. + */ + run(program, inputTensorViews, outputIndices, createKernelOutput, createIntermediateOutput, outputCount) { + TRACE_FUNC_BEGIN(program.name); + const inputDatas = []; + for (let i = 0; i < inputTensorViews.length; ++i) { + const data = inputTensorViews[i].data; + if (data === 0) { + continue; + } + const gpuData = this.gpuDataManager.get(data); + if (!gpuData) { + throw new Error(`no GPU data for input: ${data}`); + } + inputDatas.push(gpuData); + } + const { outputs, dispatchGroup, programUniforms } = program.getRunData(inputTensorViews); + const validatedOutputIndices = outputIndices.length === 0 ? outputs.map((_, i) => i) : outputIndices; + if (validatedOutputIndices.length !== outputs.length) { + throw new Error(`Output size ${validatedOutputIndices.length} must be equal to ${outputs.length}.`); + } + const outputTensorViews = []; + const outputDatas = []; + for (let i = 0; i < outputs.length; ++i) { + if (!Number.isInteger(validatedOutputIndices[i]) || validatedOutputIndices[i] < -3 || validatedOutputIndices[i] >= outputCount) { + throw new Error(`Invalid output index: ${validatedOutputIndices[i]}`); + } + if (validatedOutputIndices[i] === -3) { + continue; + } + const isTemporary = validatedOutputIndices[i] === -1; + const isPersistent = validatedOutputIndices[i] === -2; + const tensorView = isTemporary || isPersistent ? createIntermediateOutput(outputs[i].dataType, outputs[i].dims) : createKernelOutput(validatedOutputIndices[i], outputs[i].dataType, outputs[i].dims); + outputTensorViews.push(tensorView); + if (tensorView.data === 0) { + continue; + } + const gpuData = this.gpuDataManager.get(tensorView.data); + if (!gpuData) { + throw new Error(`no GPU data for output: ${tensorView.data}`); + } + if (isTemporary) { + this.temporaryData.push(gpuData); + } + if (isPersistent) { + let persistentData = this.kernelPersistentData.get(this.currentKernelId); + if (!persistentData) { + persistentData = []; + this.kernelPersistentData.set(this.currentKernelId, persistentData); + } + persistentData.push(gpuData); + } + outputDatas.push(gpuData); + } + if (inputDatas.length !== inputTensorViews.length || outputDatas.length !== outputTensorViews.length) { + if (outputDatas.length === 0) { + TRACE_FUNC_END(program.name); + return outputTensorViews; + } + throw new Error( + `Program ${program.name} has zero-sized tensor(s) in inputs or outputs. This is not supported now.` + ); + } + let uniformBufferBinding; + if (programUniforms) { + let currentOffset = 0; + const offsets = []; + programUniforms.forEach((v) => { + const data = typeof v.data === "number" ? [v.data] : v.data; + if (data.length === 0) { + return; + } + const sizeOfElement = v.type === 10 /* float16 */ ? 2 : 4; + let sizeOfVecOrMat; + let baseAlignment; + if (v.type === 10 /* float16 */) { + baseAlignment = data.length > 4 ? 16 : data.length > 2 ? 8 : data.length * sizeOfElement; + sizeOfVecOrMat = data.length > 4 ? 16 : sizeOfElement * data.length; + } else { + baseAlignment = data.length <= 2 ? data.length * sizeOfElement : 16; + sizeOfVecOrMat = 16; + } + currentOffset = Math.ceil(currentOffset / baseAlignment) * baseAlignment; + offsets.push(currentOffset); + const elementPerVecOrMat = v.type === 10 /* float16 */ ? 8 : 4; + currentOffset += data.length > 4 ? Math.ceil(data.length / elementPerVecOrMat) * sizeOfVecOrMat : data.length * sizeOfElement; + }); + const maxAlignmentOfField = 16; + currentOffset = Math.ceil(currentOffset / maxAlignmentOfField) * maxAlignmentOfField; + const arrayBuffer = new ArrayBuffer(currentOffset); + programUniforms.forEach((v, i) => { + const offset = offsets[i]; + const data = typeof v.data === "number" ? [v.data] : v.data; + if (v.type === 6 /* int32 */) { + new Int32Array(arrayBuffer, offset, data.length).set(data); + } else if (v.type === 12 /* uint32 */) { + new Uint32Array(arrayBuffer, offset, data.length).set(data); + } else if (v.type === 10 /* float16 */) { + new Uint16Array(arrayBuffer, offset, data.length).set(data); + } else if (v.type === 1 /* float */) { + new Float32Array(arrayBuffer, offset, data.length).set(data); + } else { + throw new Error(`Unsupported uniform type: ${tensorDataTypeEnumToString(v.type)}`); + } + }); + const uniformBufferData = ( + // eslint-disable-next-line no-bitwise + this.gpuDataManager.create(currentOffset, GPUBufferUsage.COPY_DST | GPUBufferUsage.UNIFORM) + ); + this.device.queue.writeBuffer(uniformBufferData.buffer, 0, arrayBuffer, 0, currentOffset); + this.gpuDataManager.release(uniformBufferData.id); + uniformBufferBinding = { offset: 0, size: currentOffset, buffer: uniformBufferData.buffer }; + } + const normalizedDispatchGroup = this.programManager.normalizeDispatchGroupSize(dispatchGroup); + const is1DimensionDispatch = normalizedDispatchGroup[1] === 1 && normalizedDispatchGroup[2] === 1; + const key = getProgramInfoUniqueKey(program, inputTensorViews, is1DimensionDispatch); + let artifact = this.programManager.getArtifact(key); + if (!artifact) { + artifact = this.programManager.build(program, normalizedDispatchGroup); + this.programManager.setArtifact(key, artifact); + LOG_DEBUG("info", () => `[artifact] key: ${key}, programName: ${program.name}`); + } + if (programUniforms && artifact.uniformVariablesInfo) { + if (programUniforms.length !== artifact.uniformVariablesInfo.length) { + throw new Error(`Uniform variables count mismatch: expect ${artifact.uniformVariablesInfo.length}, got ${programUniforms.length} in program "${artifact.programInfo.name}".`); + } + for (let i = 0; i < programUniforms.length; i++) { + const uniform = programUniforms[i]; + const actualType = uniform.type; + const actualLength = typeof uniform.data === "number" ? 1 : uniform.data.length; + const [type, length] = artifact.uniformVariablesInfo[i]; + if (actualType !== type || actualLength !== length) { + throw new Error(`Uniform variable ${i} mismatch: expect type ${type} with size ${length}, got type ${actualType} with size ${actualLength} in program "${artifact.programInfo.name}".`); + } + } + } + LOG_DEBUG( + "info", + () => `[ProgramManager] run "${program.name}" (key=${key}) with ${normalizedDispatchGroup[0]}x${normalizedDispatchGroup[1]}x${normalizedDispatchGroup[2]}` + ); + if (this.queryType !== "none" || this.sessionStatus === "capturing") { + const pendingKernelInfo = { + kernelId: this.currentKernelId, + programName: artifact.programInfo.name, + inputTensorViews, + outputTensorViews + }; + this.pendingKernels.push(pendingKernelInfo); + if (this.sessionStatus === "capturing") { + const sessionPendingKernels = this.capturedPendingKernels.get(this.currentSessionId); + sessionPendingKernels.push(pendingKernelInfo); + } + } + this.programManager.run(artifact, inputDatas, outputDatas, normalizedDispatchGroup, uniformBufferBinding); + TRACE_FUNC_END(program.name); + return outputTensorViews; + } + upload(gpuDataId, data) { + this.gpuDataManager.upload(gpuDataId, data); + } + memcpy(src, dst) { + this.gpuDataManager.memcpy(src, dst); + } + async download(gpuDataId, getTargetBuffer) { + await this.gpuDataManager.download(gpuDataId, getTargetBuffer); + } + alloc(size) { + return this.gpuDataManager.create(size).id; + } + free(ptr) { + return this.gpuDataManager.release(ptr); + } + createKernel(kernelType, kernelId, attribute, kernelName) { + const op = WEBGPU_OP_RESOLVE_RULES.get(kernelType); + if (!op) { + throw new Error(`kernel not implemented: ${kernelType}`); + } + const kernelInfo = { + kernelType, + kernelName, + kernelEntry: op[0], + attributes: [op[1], attribute] + }; + this.kernels.set(kernelId, kernelInfo); + } + releaseKernel(kernelId) { + const persistentData = this.kernelPersistentData.get(kernelId); + if (persistentData) { + for (const data of persistentData) { + this.gpuDataManager.release(data.id); + } + this.kernelPersistentData.delete(kernelId); + } + this.kernelCustomData.delete(kernelId); + this.kernels.delete(kernelId); + } + computeKernel(kernelId, context, errors) { + const kernel = this.kernels.get(kernelId); + if (!kernel) { + throw new Error(`kernel not created: ${kernelId}`); + } + const kernelType = kernel.kernelType; + const kernelName = kernel.kernelName; + const kernelEntry = kernel.kernelEntry; + const attributes = kernel.attributes; + if (this.currentKernelId !== null) { + throw new Error(`kernel "[${kernelType}] ${kernelName}" is not allowed to be called recursively`); + } + this.currentKernelId = kernelId; + if (attributes[0]) { + attributes[1] = attributes[0](attributes[1]); + attributes[0] = void 0; + } + LOG_DEBUG("info", () => `[WebGPU] Start to run kernel "[${kernelType}] ${kernelName}"...`); + const useErrorScope = this.env.debug; + this.temporaryData = []; + try { + if (useErrorScope) { + this.device.pushErrorScope("validation"); + } + kernelEntry(context, attributes[1]); + return 0; + } catch (e) { + errors.push(Promise.resolve(`[WebGPU] Kernel "[${kernelType}] ${kernelName}" failed. ${e}`)); + return 1; + } finally { + if (useErrorScope) { + errors.push(this.device.popErrorScope().then( + (err) => err ? `GPU validation error for kernel "[${kernelType}] ${kernelName}": ${err.message}` : null + )); + } + for (const data of this.temporaryData) { + this.gpuDataManager.release(data.id); + } + this.temporaryData = []; + this.currentKernelId = null; + } + } + // #region external buffer + registerBuffer(sessionId, index, buffer, size) { + let sessionInputOutputMapping = this.sessionExternalDataMapping.get(sessionId); + if (!sessionInputOutputMapping) { + sessionInputOutputMapping = /* @__PURE__ */ new Map(); + this.sessionExternalDataMapping.set(sessionId, sessionInputOutputMapping); + } + const previousBuffer = sessionInputOutputMapping.get(index); + const id = this.gpuDataManager.registerExternalBuffer(buffer, size, previousBuffer?.[1]); + sessionInputOutputMapping.set(index, [id, buffer]); + return id; + } + unregisterBuffers(sessionId) { + const sessionInputOutputMapping = this.sessionExternalDataMapping.get(sessionId); + if (sessionInputOutputMapping) { + sessionInputOutputMapping.forEach((bufferInfo) => this.gpuDataManager.unregisterExternalBuffer(bufferInfo[1])); + this.sessionExternalDataMapping.delete(sessionId); + } + } + getBuffer(gpuDataId) { + const gpuData = this.gpuDataManager.get(gpuDataId); + if (!gpuData) { + throw new Error(`no GPU data for buffer: ${gpuDataId}`); + } + return gpuData.buffer; + } + createDownloader(gpuBuffer, size, type) { + return async () => { + const data = await downloadGpuData(this, gpuBuffer, size); + return createView(data.buffer, type); + }; + } + // #endregion + writeTimestamp(index) { + if (this.queryType !== "inside-passes") { + return; + } + this.computePassEncoder.writeTimestamp(this.querySet, index); + } + setQueryType() { + this.queryType = "none"; + if (this.env.webgpu.profiling?.mode === "default" || (typeof this.env.trace === "undefined" ? this.env.wasm.trace : this.env.trace)) { + if (this.device.features.has("chromium-experimental-timestamp-query-inside-passes")) { + this.queryType = "inside-passes"; + } else if (this.device.features.has("timestamp-query")) { + this.queryType = "at-passes"; + } + if (this.queryType !== "none" && typeof this.querySet === "undefined") { + this.querySet = this.device.createQuerySet({ + type: "timestamp", + count: this.maxDispatchNumber * 2 + }); + this.queryResolveBuffer = this.device.createBuffer( + // eslint-disable-next-line no-bitwise + { size: this.maxDispatchNumber * 2 * 8, usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.QUERY_RESOLVE } + ); + } + } + } + captureBegin() { + LOG_DEBUG("info", "captureBegin"); + if (!this.capturedCommandList.get(this.currentSessionId)) { + this.capturedCommandList.set(this.currentSessionId, []); + } + if (!this.capturedPendingKernels.get(this.currentSessionId)) { + this.capturedPendingKernels.set(this.currentSessionId, []); + } + this.flush(); + this.sessionStatus = "capturing"; + } + captureEnd() { + LOG_DEBUG("info", "captureEnd"); + this.flush(); + this.sessionStatus = "default"; + } + replay() { + LOG_DEBUG("info", "replay"); + this.sessionStatus = "replaying"; + const sessionCommandList = this.capturedCommandList.get(this.currentSessionId); + const sessionPendingKernels = this.capturedPendingKernels.get(this.currentSessionId); + const length = sessionCommandList.length; + this.pendingKernels = []; + for (let i = 0; i < length; i++) { + const computePassEncoder = this.getComputePassEncoder(); + const command = sessionCommandList[i]; + this.writeTimestamp(this.pendingDispatchNumber * 2); + computePassEncoder.setPipeline(command.computePipeline); + computePassEncoder.setBindGroup(0, command.bindGroup); + computePassEncoder.dispatchWorkgroups(...command.dispatchGroup); + this.writeTimestamp(this.pendingDispatchNumber * 2 + 1); + this.pendingDispatchNumber++; + if (this.queryType !== "none") { + this.pendingKernels.push(sessionPendingKernels[i]); + } + if (this.pendingDispatchNumber >= this.maxDispatchNumber || this.queryType === "at-passes") { + this.endComputePass(); + } + if (this.pendingDispatchNumber >= this.maxDispatchNumber) { + this.flush(); + } + } + this.flush(); + this.sessionStatus = "default"; + } + onReleaseSession(sessionId) { + this.unregisterBuffers(sessionId); + if (this.capturedCommandList.has(sessionId)) { + this.capturedCommandList.delete(sessionId); + } + if (this.capturedPendingKernels.has(sessionId)) { + this.capturedPendingKernels.delete(sessionId); + } + this.gpuDataManager.onReleaseSession(sessionId); + } + onRunStart(sessionId) { + this.currentSessionId = sessionId; + this.setQueryType(); + } + }; + } +}); + +// web/lib/wasm/jsep/init.ts +var init_exports = {}; +__export(init_exports, { + init: () => init +}); +var TensorViewImpl, ComputeContextImpl, init; +var init_init = __esm({ + "web/lib/wasm/jsep/init.ts"() { + "use strict"; + init_wasm_common(); + init_backend_webgpu(); + init_log(); + init_util(); + TensorViewImpl = class _TensorViewImpl { + constructor(module, dataType, data, dims) { + this.module = module; + this.dataType = dataType; + this.data = data; + this.dims = dims; + } + getFloat32Array() { + if (this.dataType !== 1 /* float */) { + throw new Error("Invalid data type"); + } + const elementCount = ShapeUtil.size(this.dims); + return elementCount === 0 ? new Float32Array() : new Float32Array(this.module.HEAP8.buffer, this.data, elementCount); + } + getBigInt64Array() { + if (this.dataType !== 7 /* int64 */) { + throw new Error("Invalid data type"); + } + const elementCount = ShapeUtil.size(this.dims); + return elementCount === 0 ? new BigInt64Array() : new BigInt64Array(this.module.HEAP8.buffer, this.data, elementCount); + } + getInt32Array() { + if (this.dataType !== 6 /* int32 */) { + throw new Error("Invalid data type"); + } + const elementCount = ShapeUtil.size(this.dims); + return elementCount === 0 ? new Int32Array() : new Int32Array(this.module.HEAP8.buffer, this.data, elementCount); + } + reshape(newDims) { + if (ShapeUtil.size(newDims) !== ShapeUtil.size(this.dims)) { + throw new Error("Invalid new shape"); + } + return new _TensorViewImpl(this.module, this.dataType, this.data, newDims); + } + }; + ComputeContextImpl = class { + constructor(module, backend, contextDataOffset) { + this.module = module; + this.backend = backend; + this.customDataOffset = 0; + this.customDataSize = 0; + this.adapterInfo = backend.adapterInfo; + const heapU32 = module.HEAPU32; + let dataIndex = contextDataOffset >>> 2; + this.opKernelContext = heapU32[dataIndex++]; + const inputCount = heapU32[dataIndex++]; + this.outputCount = heapU32[dataIndex++]; + this.customDataOffset = heapU32[dataIndex++]; + this.customDataSize = heapU32[dataIndex++]; + const inputs = []; + for (let i = 0; i < inputCount; i++) { + const dataType = heapU32[dataIndex++]; + const data = heapU32[dataIndex++]; + const dim = heapU32[dataIndex++]; + const dims = []; + for (let d = 0; d < dim; d++) { + dims.push(heapU32[dataIndex++]); + } + inputs.push(new TensorViewImpl(module, dataType, data, dims)); + } + this.inputs = inputs; + } + get kernelCustomData() { + return this.backend.currentKernelCustomData; + } + get customDataBuffer() { + return this.module.HEAPU8.subarray(this.customDataOffset, this.customDataOffset + this.customDataSize); + } + getMaxComputeWorkgroupSizes() { + return [ + this.backend.device.limits.maxComputeWorkgroupSizeX, + this.backend.device.limits.maxComputeWorkgroupSizeY, + this.backend.device.limits.maxComputeWorkgroupSizeZ + ]; + } + getMaxComputeWorkgroupStoragesize() { + return this.backend.device.limits.maxComputeWorkgroupStorageSize; + } + compute(program, inputsOutputsMapping) { + const mappedInputs = inputsOutputsMapping?.inputs?.map((i) => typeof i === "number" ? this.inputs[i] : i) ?? this.inputs; + const outputIndices = inputsOutputsMapping?.outputs ?? []; + const createKernelOutput = (index, dataType, dims) => new TensorViewImpl(this.module, dataType, this.output(index, dims), dims); + const createTemporaryOutput = (dataType, dims) => { + const elementSize = getTensorElementSize(dataType); + if (!elementSize) { + throw new Error(`Unsupported data type: ${dataType}`); + } + const bufferSize = elementSize * ShapeUtil.size(dims); + const gpuDataId = bufferSize > 0 ? this.backend.gpuDataManager.create(bufferSize).id : 0; + return new TensorViewImpl(this.module, dataType, gpuDataId, dims); + }; + return this.backend.run( + program, + mappedInputs, + outputIndices, + createKernelOutput, + createTemporaryOutput, + this.outputCount + ); + } + output(index, dims) { + const stack = this.module.stackSave(); + try { + const data = this.module.stackAlloc( + (1 + dims.length) * 4 + /* sizeof(size_t) */ + ); + let offset = data >> 2; + this.module.HEAPU32[offset++] = dims.length; + for (let i = 0; i < dims.length; i++) { + this.module.HEAPU32[offset++] = dims[i]; + } + return this.module._JsepOutput(this.opKernelContext, index, data); + } catch (e) { + throw new Error( + `Failed to generate kernel's output[${index}] with dims [${dims}]. If you are running with pre-allocated output, please make sure the output type/dims are correct. Error: ${e}` + ); + } finally { + this.module.stackRestore(stack); + } + } + }; + init = async (name, module, env3, gpuAdapter) => { + const jsepInit = module.jsepInit; + if (!jsepInit) { + throw new Error("Failed to initialize JSEP. The WebAssembly module is not built with JSEP support."); + } + if (name === "webgpu") { + const backend = new WebGpuBackend(); + await backend.initialize(env3, gpuAdapter); + jsepInit("webgpu", [ + // backend + backend, + // jsepAlloc() + (size) => backend.alloc(size), + // jsepFree() + (ptr) => backend.free(ptr), + // jsepCopy(src, dst, size, isSourceGpu) + (src, dst, size, isSourceGpu = false) => { + if (isSourceGpu) { + LOG_DEBUG("verbose", () => `[WebGPU] jsepCopyGpuToGpu: src=${src}, dst=${dst}, size=${size}`); + backend.memcpy(src, dst); + } else { + LOG_DEBUG("verbose", () => `[WebGPU] jsepCopyCpuToGpu: dataOffset=${src}, gpuDataId=${dst}, size=${size}`); + const data = module.HEAPU8.subarray(src >>> 0, (src >>> 0) + size); + backend.upload(dst, data); + } + }, + // jsepCopyAsync(src, dst, size) + async (gpuDataId, dataOffset, size) => { + LOG_DEBUG( + "verbose", + () => `[WebGPU] jsepCopyGpuToCpu: gpuDataId=${gpuDataId}, dataOffset=${dataOffset}, size=${size}` + ); + await backend.download( + gpuDataId, + () => module.HEAPU8.subarray(dataOffset >>> 0, (dataOffset >>> 0) + size) + ); + }, + // jsepCreateKernel + (kernelType, kernelId, attribute) => backend.createKernel( + kernelType, + kernelId, + attribute, + module.UTF8ToString(module._JsepGetNodeName(kernelId)) + ), + // jsepReleaseKernel + (kernel) => backend.releaseKernel(kernel), + // jsepRun + (kernel, contextDataOffset, sessionHandle, errors) => { + LOG_DEBUG( + "verbose", + () => `[WebGPU] jsepRun: sessionHandle=${sessionHandle}, kernel=${kernel}, contextDataOffset=${contextDataOffset}` + ); + const context = new ComputeContextImpl(module, backend, contextDataOffset); + return backend.computeKernel(kernel, context, errors); + }, + // jsepCaptureBegin + () => backend.captureBegin(), + // jsepCaptureEnd + () => backend.captureEnd(), + // jsepReplay + () => backend.replay() + ]); + } else { + jsepInit("webnn"); + } + }; + } +}); + +// web/lib/wasm/wasm-core-impl.ts +var initOrt, initRuntime, initEp, activeSessions, getSessionInputOutputCount, copyFromExternalBuffer, createSession, releaseSession, prepareInputOutputTensor, run, endProfiling, extractTransferableBuffers; +var init_wasm_core_impl = __esm({ + "web/lib/wasm/wasm-core-impl.ts"() { + "use strict"; + init_run_options(); + init_session_options(); + init_wasm_common(); + init_wasm_factory(); + init_wasm_utils(); + init_wasm_utils_load_file(); + initOrt = (numThreads, loggingLevel) => { + const errorCode = getInstance()._OrtInit(numThreads, loggingLevel); + if (errorCode !== 0) { + checkLastError("Can't initialize onnxruntime."); + } + }; + initRuntime = async (env3) => { + initOrt(env3.wasm.numThreads, logLevelStringToEnum(env3.logLevel)); + }; + initEp = async (env3, epName) => { + if (true) { + const initJsep = (init_init(), __toCommonJS(init_exports)).init; + if (epName === "webgpu") { + if (typeof navigator === "undefined" || !navigator.gpu) { + throw new Error("WebGPU is not supported in current environment"); + } + let adapter = env3.webgpu.adapter; + if (!adapter) { + const powerPreference = env3.webgpu.powerPreference; + if (powerPreference !== void 0 && powerPreference !== "low-power" && powerPreference !== "high-performance") { + throw new Error(`Invalid powerPreference setting: "${powerPreference}"`); + } + const forceFallbackAdapter = env3.webgpu.forceFallbackAdapter; + if (forceFallbackAdapter !== void 0 && typeof forceFallbackAdapter !== "boolean") { + throw new Error(`Invalid forceFallbackAdapter setting: "${forceFallbackAdapter}"`); + } + adapter = await navigator.gpu.requestAdapter({ powerPreference, forceFallbackAdapter }); + if (!adapter) { + throw new Error( + 'Failed to get GPU adapter. You may need to enable flag "--enable-unsafe-webgpu" if you are using Chrome.' + ); + } + } else { + if (typeof adapter.limits !== "object" || typeof adapter.features !== "object" || typeof adapter.requestDevice !== "function") { + throw new Error("Invalid GPU adapter set in `env.webgpu.adapter`. It must be a GPUAdapter object."); + } + } + if (!env3.wasm.simd) { + throw new Error( + "Not supported for WebGPU=ON and SIMD=OFF. Please set `env.wasm.simd` to true when using `webgpu` EP" + ); + } + await initJsep("webgpu", getInstance(), env3, adapter); + } + if (epName === "webnn") { + if (typeof navigator === "undefined" || !navigator.ml) { + throw new Error("WebNN is not supported in current environment"); + } + await initJsep("webnn", getInstance(), env3); + } + } + }; + activeSessions = /* @__PURE__ */ new Map(); + getSessionInputOutputCount = (sessionHandle) => { + const wasm2 = getInstance(); + const stack = wasm2.stackSave(); + try { + const dataOffset = wasm2.stackAlloc(8); + const errorCode = wasm2._OrtGetInputOutputCount(sessionHandle, dataOffset, dataOffset + 4); + if (errorCode !== 0) { + checkLastError("Can't get session input/output count."); + } + return [wasm2.HEAP32[dataOffset / 4], wasm2.HEAP32[dataOffset / 4 + 1]]; + } finally { + wasm2.stackRestore(stack); + } + }; + copyFromExternalBuffer = (model) => { + const wasm2 = getInstance(); + const modelDataOffset = wasm2._malloc(model.byteLength); + if (modelDataOffset === 0) { + throw new Error(`Can't create a session. failed to allocate a buffer of size ${model.byteLength}.`); + } + wasm2.HEAPU8.set(model, modelDataOffset); + return [modelDataOffset, model.byteLength]; + }; + createSession = async (modelData, options) => { + let modelDataOffset, modelDataLength; + const wasm2 = getInstance(); + if (Array.isArray(modelData)) { + [modelDataOffset, modelDataLength] = modelData; + } else if (modelData.buffer === wasm2.HEAPU8.buffer) { + [modelDataOffset, modelDataLength] = [modelData.byteOffset, modelData.byteLength]; + } else { + [modelDataOffset, modelDataLength] = copyFromExternalBuffer(modelData); + } + let sessionHandle = 0; + let sessionOptionsHandle = 0; + let ioBindingHandle = 0; + let allocs = []; + const inputNamesUTF8Encoded = []; + const outputNamesUTF8Encoded = []; + try { + [sessionOptionsHandle, allocs] = setSessionOptions(options); + if (options?.externalData && wasm2.mountExternalData) { + const loadingPromises = []; + for (const file of options.externalData) { + const path = typeof file === "string" ? file : file.path; + loadingPromises.push(loadFile(typeof file === "string" ? file : file.data).then((data) => { + wasm2.mountExternalData(path, data); + })); + } + await Promise.all(loadingPromises); + } + sessionHandle = await wasm2._OrtCreateSession(modelDataOffset, modelDataLength, sessionOptionsHandle); + if (sessionHandle === 0) { + checkLastError("Can't create a session."); + } + const [inputCount, outputCount] = getSessionInputOutputCount(sessionHandle); + const enableGraphCapture = !!options?.enableGraphCapture; + const inputNames = []; + const outputNames = []; + const outputPreferredLocations = []; + for (let i = 0; i < inputCount; i++) { + const name = wasm2._OrtGetInputName(sessionHandle, i); + if (name === 0) { + checkLastError("Can't get an input name."); + } + inputNamesUTF8Encoded.push(name); + inputNames.push(wasm2.UTF8ToString(name)); + } + for (let i = 0; i < outputCount; i++) { + const name = wasm2._OrtGetOutputName(sessionHandle, i); + if (name === 0) { + checkLastError("Can't get an output name."); + } + outputNamesUTF8Encoded.push(name); + const nameString = wasm2.UTF8ToString(name); + outputNames.push(nameString); + if (true) { + if (enableGraphCapture && options?.preferredOutputLocation === void 0) { + outputPreferredLocations.push("gpu-buffer"); + continue; + } + const location = typeof options?.preferredOutputLocation === "string" ? options.preferredOutputLocation : options?.preferredOutputLocation?.[nameString] ?? "cpu"; + if (location !== "cpu" && location !== "cpu-pinned" && location !== "gpu-buffer") { + throw new Error(`Not supported preferred output location: ${location}.`); + } + if (enableGraphCapture && location !== "gpu-buffer") { + throw new Error(`Not supported preferred output location: ${location}. Only 'gpu-buffer' location is supported when enableGraphCapture is true.`); + } + outputPreferredLocations.push(location); + } + } + let bindingState = null; + if (outputPreferredLocations.some((l) => l === "gpu-buffer")) { + ioBindingHandle = wasm2._OrtCreateBinding(sessionHandle); + if (ioBindingHandle === 0) { + checkLastError("Can't create IO binding."); + } + bindingState = { + handle: ioBindingHandle, + outputPreferredLocations, + outputPreferredLocationsEncoded: outputPreferredLocations.map((l) => dataLocationStringToEnum(l)) + }; + } + activeSessions.set( + sessionHandle, + [sessionHandle, inputNamesUTF8Encoded, outputNamesUTF8Encoded, bindingState, enableGraphCapture, false] + ); + return [sessionHandle, inputNames, outputNames]; + } catch (e) { + inputNamesUTF8Encoded.forEach((buf) => wasm2._OrtFree(buf)); + outputNamesUTF8Encoded.forEach((buf) => wasm2._OrtFree(buf)); + if (ioBindingHandle !== 0) { + wasm2._OrtReleaseBinding(ioBindingHandle); + } + if (sessionHandle !== 0) { + wasm2._OrtReleaseSession(sessionHandle); + } + throw e; + } finally { + wasm2._free(modelDataOffset); + if (sessionOptionsHandle !== 0) { + wasm2._OrtReleaseSessionOptions(sessionOptionsHandle); + } + allocs.forEach((alloc) => wasm2._free(alloc)); + wasm2.unmountExternalData?.(); + } + }; + releaseSession = (sessionId) => { + const wasm2 = getInstance(); + const session = activeSessions.get(sessionId); + if (!session) { + throw new Error(`cannot release session. invalid session id: ${sessionId}`); + } + const [sessionHandle, inputNamesUTF8Encoded, outputNamesUTF8Encoded, ioBindingState, enableGraphCapture] = session; + if (ioBindingState) { + if (enableGraphCapture) { + wasm2._OrtClearBoundOutputs(ioBindingState.handle); + } + wasm2._OrtReleaseBinding(ioBindingState.handle); + } + wasm2.jsepOnReleaseSession?.(sessionId); + inputNamesUTF8Encoded.forEach((buf) => wasm2._OrtFree(buf)); + outputNamesUTF8Encoded.forEach((buf) => wasm2._OrtFree(buf)); + wasm2._OrtReleaseSession(sessionHandle); + activeSessions.delete(sessionId); + }; + prepareInputOutputTensor = (tensor, tensorHandles, allocs, sessionId, index, enableGraphCapture = false) => { + if (!tensor) { + tensorHandles.push(0); + return; + } + const wasm2 = getInstance(); + const dataType = tensor[0]; + const dims = tensor[1]; + const location = tensor[3]; + let rawData; + let dataByteLength; + if (dataType === "string" && location === "gpu-buffer") { + throw new Error("String tensor is not supported on GPU."); + } + if (enableGraphCapture && location !== "gpu-buffer") { + throw new Error( + `External buffer must be provided for input/output index ${index} when enableGraphCapture is true.` + ); + } + if (location === "gpu-buffer") { + const gpuBuffer = tensor[2].gpuBuffer; + const elementSizeInBytes = getTensorElementSize(tensorDataTypeStringToEnum(dataType)); + dataByteLength = dims.reduce((a, b) => a * b, 1) * elementSizeInBytes; + const registerBuffer = wasm2.jsepRegisterBuffer; + if (!registerBuffer) { + throw new Error('Tensor location "gpu-buffer" is not supported without using WebGPU.'); + } + rawData = registerBuffer(sessionId, index, gpuBuffer, dataByteLength); + } else { + const data = tensor[2]; + if (Array.isArray(data)) { + dataByteLength = 4 * data.length; + rawData = wasm2._malloc(dataByteLength); + allocs.push(rawData); + let dataIndex = rawData / 4; + for (let i = 0; i < data.length; i++) { + if (typeof data[i] !== "string") { + throw new TypeError(`tensor data at index ${i} is not a string`); + } + wasm2.HEAPU32[dataIndex++] = allocWasmString(data[i], allocs); + } + } else { + dataByteLength = data.byteLength; + rawData = wasm2._malloc(dataByteLength); + allocs.push(rawData); + wasm2.HEAPU8.set(new Uint8Array(data.buffer, data.byteOffset, dataByteLength), rawData); + } + } + const stack = wasm2.stackSave(); + const dimsOffset = wasm2.stackAlloc(4 * dims.length); + try { + let dimIndex = dimsOffset / 4; + dims.forEach((d) => wasm2.HEAP32[dimIndex++] = d); + const tensor2 = wasm2._OrtCreateTensor( + tensorDataTypeStringToEnum(dataType), + rawData, + dataByteLength, + dimsOffset, + dims.length, + dataLocationStringToEnum(location) + ); + if (tensor2 === 0) { + checkLastError(`Can't create tensor for input/output. session=${sessionId}, index=${index}.`); + } + tensorHandles.push(tensor2); + } finally { + wasm2.stackRestore(stack); + } + }; + run = async (sessionId, inputIndices, inputTensors, outputIndices, outputTensors, options) => { + const wasm2 = getInstance(); + const session = activeSessions.get(sessionId); + if (!session) { + throw new Error(`cannot run inference. invalid session id: ${sessionId}`); + } + const sessionHandle = session[0]; + const inputNamesUTF8Encoded = session[1]; + const outputNamesUTF8Encoded = session[2]; + const ioBindingState = session[3]; + const enableGraphCapture = session[4]; + const inputOutputBound = session[5]; + const inputCount = inputIndices.length; + const outputCount = outputIndices.length; + let runOptionsHandle = 0; + let runOptionsAllocs = []; + const inputTensorHandles = []; + const outputTensorHandles = []; + const inputOutputAllocs = []; + const beforeRunStack = wasm2.stackSave(); + const inputValuesOffset = wasm2.stackAlloc(inputCount * 4); + const inputNamesOffset = wasm2.stackAlloc(inputCount * 4); + const outputValuesOffset = wasm2.stackAlloc(outputCount * 4); + const outputNamesOffset = wasm2.stackAlloc(outputCount * 4); + try { + [runOptionsHandle, runOptionsAllocs] = setRunOptions(options); + for (let i = 0; i < inputCount; i++) { + prepareInputOutputTensor( + inputTensors[i], + inputTensorHandles, + inputOutputAllocs, + sessionId, + inputIndices[i], + enableGraphCapture + ); + } + for (let i = 0; i < outputCount; i++) { + prepareInputOutputTensor( + outputTensors[i], + outputTensorHandles, + inputOutputAllocs, + sessionId, + inputCount + outputIndices[i], + enableGraphCapture + ); + } + let inputValuesIndex = inputValuesOffset / 4; + let inputNamesIndex = inputNamesOffset / 4; + let outputValuesIndex = outputValuesOffset / 4; + let outputNamesIndex = outputNamesOffset / 4; + for (let i = 0; i < inputCount; i++) { + wasm2.HEAPU32[inputValuesIndex++] = inputTensorHandles[i]; + wasm2.HEAPU32[inputNamesIndex++] = inputNamesUTF8Encoded[inputIndices[i]]; + } + for (let i = 0; i < outputCount; i++) { + wasm2.HEAPU32[outputValuesIndex++] = outputTensorHandles[i]; + wasm2.HEAPU32[outputNamesIndex++] = outputNamesUTF8Encoded[outputIndices[i]]; + } + if (ioBindingState && !inputOutputBound) { + const { handle, outputPreferredLocations, outputPreferredLocationsEncoded } = ioBindingState; + if (inputNamesUTF8Encoded.length !== inputCount) { + throw new Error(`input count from feeds (${inputCount}) is expected to be always equal to model's input count (${inputNamesUTF8Encoded.length}).`); + } + for (let i = 0; i < inputCount; i++) { + const index = inputIndices[i]; + const errorCode2 = await wasm2._OrtBindInput(handle, inputNamesUTF8Encoded[index], inputTensorHandles[i]); + if (errorCode2 !== 0) { + checkLastError(`Can't bind input[${i}] for session=${sessionId}.`); + } + } + for (let i = 0; i < outputCount; i++) { + const index = outputIndices[i]; + const location = outputTensors[i]?.[3]; + if (location) { + const errorCode2 = wasm2._OrtBindOutput(handle, outputNamesUTF8Encoded[index], outputTensorHandles[i], 0); + if (errorCode2 !== 0) { + checkLastError(`Can't bind pre-allocated output[${i}] for session=${sessionId}.`); + } + } else { + const errorCode2 = wasm2._OrtBindOutput(handle, outputNamesUTF8Encoded[index], 0, outputPreferredLocationsEncoded[index]); + if (errorCode2 !== 0) { + checkLastError(`Can't bind output[${i}] to ${outputPreferredLocations[i]} for session=${sessionId}.`); + } + } + } + activeSessions.set( + sessionId, + [sessionHandle, inputNamesUTF8Encoded, outputNamesUTF8Encoded, ioBindingState, enableGraphCapture, true] + ); + } + wasm2.jsepOnRunStart?.(sessionHandle); + let errorCode; + if (ioBindingState) { + errorCode = await wasm2._OrtRunWithBinding( + sessionHandle, + ioBindingState.handle, + outputCount, + outputValuesOffset, + runOptionsHandle + ); + } else { + errorCode = await wasm2._OrtRun( + sessionHandle, + inputNamesOffset, + inputValuesOffset, + inputCount, + outputNamesOffset, + outputCount, + outputValuesOffset, + runOptionsHandle + ); + } + if (errorCode !== 0) { + checkLastError("failed to call OrtRun()."); + } + const output = []; + for (let i = 0; i < outputCount; i++) { + const tensor = wasm2.HEAPU32[outputValuesOffset / 4 + i]; + if (tensor === outputTensorHandles[i]) { + output.push(outputTensors[i]); + continue; + } + const beforeGetTensorDataStack = wasm2.stackSave(); + const tensorDataOffset = wasm2.stackAlloc(4 * 4); + let keepOutputTensor = false; + let type, dataOffset = 0; + try { + const errorCode2 = wasm2._OrtGetTensorData( + tensor, + tensorDataOffset, + tensorDataOffset + 4, + tensorDataOffset + 8, + tensorDataOffset + 12 + ); + if (errorCode2 !== 0) { + checkLastError(`Can't access output tensor data on index ${i}.`); + } + let tensorDataIndex = tensorDataOffset / 4; + const dataType = wasm2.HEAPU32[tensorDataIndex++]; + dataOffset = wasm2.HEAPU32[tensorDataIndex++]; + const dimsOffset = wasm2.HEAPU32[tensorDataIndex++]; + const dimsLength = wasm2.HEAPU32[tensorDataIndex++]; + const dims = []; + for (let i2 = 0; i2 < dimsLength; i2++) { + dims.push(wasm2.HEAPU32[dimsOffset / 4 + i2]); + } + wasm2._OrtFree(dimsOffset); + const size = dims.reduce((a, b) => a * b, 1); + type = tensorDataTypeEnumToString(dataType); + const preferredLocation = ioBindingState?.outputPreferredLocations[outputIndices[i]]; + if (type === "string") { + if (preferredLocation === "gpu-buffer") { + throw new Error("String tensor is not supported on GPU."); + } + const stringData = []; + let dataIndex = dataOffset / 4; + for (let i2 = 0; i2 < size; i2++) { + const offset = wasm2.HEAPU32[dataIndex++]; + const maxBytesToRead = i2 === size - 1 ? void 0 : wasm2.HEAPU32[dataIndex] - offset; + stringData.push(wasm2.UTF8ToString(offset, maxBytesToRead)); + } + output.push([type, dims, stringData, "cpu"]); + } else { + if (preferredLocation === "gpu-buffer" && size > 0) { + const getBuffer = wasm2.jsepGetBuffer; + if (!getBuffer) { + throw new Error('preferredLocation "gpu-buffer" is not supported without using WebGPU.'); + } + const gpuBuffer = getBuffer(dataOffset); + const elementSize = getTensorElementSize(dataType); + if (elementSize === void 0 || !isGpuBufferSupportedType(type)) { + throw new Error(`Unsupported data type: ${type}`); + } + keepOutputTensor = true; + output.push([ + type, + dims, + { + gpuBuffer, + download: wasm2.jsepCreateDownloader(gpuBuffer, size * elementSize, type), + dispose: () => { + wasm2._OrtReleaseTensor(tensor); + } + }, + "gpu-buffer" + ]); + } else { + const typedArrayConstructor = tensorTypeToTypedArrayConstructor(type); + const data = new typedArrayConstructor(size); + new Uint8Array(data.buffer, data.byteOffset, data.byteLength).set(wasm2.HEAPU8.subarray(dataOffset, dataOffset + data.byteLength)); + output.push([type, dims, data, "cpu"]); + } + } + } finally { + wasm2.stackRestore(beforeGetTensorDataStack); + if (type === "string" && dataOffset) { + wasm2._free(dataOffset); + } + if (!keepOutputTensor) { + wasm2._OrtReleaseTensor(tensor); + } + } + } + if (ioBindingState && !enableGraphCapture) { + wasm2._OrtClearBoundOutputs(ioBindingState.handle); + activeSessions.set( + sessionId, + [sessionHandle, inputNamesUTF8Encoded, outputNamesUTF8Encoded, ioBindingState, enableGraphCapture, false] + ); + } + return output; + } finally { + wasm2.stackRestore(beforeRunStack); + inputTensorHandles.forEach((v) => wasm2._OrtReleaseTensor(v)); + outputTensorHandles.forEach((v) => wasm2._OrtReleaseTensor(v)); + inputOutputAllocs.forEach((p) => wasm2._free(p)); + if (runOptionsHandle !== 0) { + wasm2._OrtReleaseRunOptions(runOptionsHandle); + } + runOptionsAllocs.forEach((p) => wasm2._free(p)); + } + }; + endProfiling = (sessionId) => { + const wasm2 = getInstance(); + const session = activeSessions.get(sessionId); + if (!session) { + throw new Error("invalid session id"); + } + const sessionHandle = session[0]; + const profileFileName = wasm2._OrtEndProfiling(sessionHandle); + if (profileFileName === 0) { + checkLastError("Can't get an profile file name."); + } + wasm2._OrtFree(profileFileName); + }; + extractTransferableBuffers = (tensors) => { + const buffers = []; + for (const tensor of tensors) { + const data = tensor[2]; + if (!Array.isArray(data) && "buffer" in data) { + buffers.push(data.buffer); + } + } + return buffers; + }; + } +}); + +// proxy-worker:./proxy-worker/main +var require_main = __commonJS({ + "proxy-worker:./proxy-worker/main"(exports, module) { + module.exports = '/*!\n * ONNX Runtime Web v1.18.0\n * Copyright (c) Microsoft Corporation. All rights reserved.\n * Licensed under the MIT License.\n */\n"use strict";\n(() => {\n var __defProp = Object.defineProperty;\n var __getOwnPropDesc = Object.getOwnPropertyDescriptor;\n var __getOwnPropNames = Object.getOwnPropertyNames;\n var __hasOwnProp = Object.prototype.hasOwnProperty;\n var __esm = (fn, res) => function __init() {\n return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;\n };\n var __commonJS = (cb, mod) => function __require() {\n return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;\n };\n var __export = (target, all) => {\n for (var name in all)\n __defProp(target, name, { get: all[name], enumerable: true });\n };\n var __copyProps = (to, from, except, desc) => {\n if (from && typeof from === "object" || typeof from === "function") {\n for (let key of __getOwnPropNames(from))\n if (!__hasOwnProp.call(to, key) && key !== except)\n __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });\n }\n return to;\n };\n var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);\n\n // nodejs-ignore:fs\n var fs_exports = {};\n __export(fs_exports, {\n createReadStream: () => createReadStream,\n readFile: () => readFile,\n readFileSync: () => readFileSync\n });\n var readFile, readFileSync, createReadStream;\n var init_fs = __esm({\n "nodejs-ignore:fs"() {\n readFile = void 0;\n readFileSync = void 0;\n createReadStream = void 0;\n }\n });\n\n // nodejs-ignore:path\n var path_exports = {};\n __export(path_exports, {\n join: () => join2\n });\n var join2;\n var init_path = __esm({\n "nodejs-ignore:path"() {\n join2 = void 0;\n }\n });\n\n // web/lib/wasm/binding/ort-wasm-simd.jsep.js\n var require_ort_wasm_simd_jsep = __commonJS({\n "web/lib/wasm/binding/ort-wasm-simd.jsep.js"(exports, module) {\n "use strict";\n var ortWasm = (() => {\n var _scriptDir = typeof document != "undefined" ? document.currentScript?.src : void 0;\n if (typeof __filename != "undefined")\n _scriptDir ||= __filename;\n return function(moduleArg = {}) {\n var g = moduleArg, aa, ba, readyPromise = new Promise((a, b) => {\n aa = a;\n ba = b;\n });\n "use strict";\n g.mountExternalData = (a, b) => {\n (g.Ph || (g.Ph = /* @__PURE__ */ new Map())).set(a, b);\n };\n g.unmountExternalData = () => {\n delete g.Ph;\n };\n let da = () => {\n const a = (c, d, e) => (...f) => {\n const h = t, k = d?.();\n f = c(...f);\n const l = d?.();\n k !== l && (c = l, e(k), d = e = null);\n return t != h ? ca() : f;\n }, b = (c) => async (...d) => {\n try {\n if (g.Oh)\n throw Error("Session already started");\n const e = g.Oh = { gi: d[0], errors: [] }, f = await c(...d);\n if (g.Oh !== e)\n throw Error("Session mismatch");\n g.Uh?.flush();\n const h = e.errors;\n if (0 < h.length) {\n let k = await Promise.all(h);\n k = k.filter((l) => l);\n if (0 < k.length)\n throw Error(k.join("\\n"));\n }\n return f;\n } finally {\n g.Oh = null;\n }\n };\n g._OrtCreateSession = a(\n g._OrtCreateSession,\n () => g._OrtCreateSession,\n (c) => g._OrtCreateSession = c\n );\n g._OrtRun = b(a(g._OrtRun, () => g._OrtRun, (c) => g._OrtRun = c));\n g._OrtRunWithBinding = b(a(g._OrtRunWithBinding, () => g._OrtRunWithBinding, (c) => g._OrtRunWithBinding = c));\n g._OrtBindInput = a(g._OrtBindInput, () => g._OrtBindInput, (c) => g._OrtBindInput = c);\n da = void 0;\n };\n g.jsepInit = (a, b) => {\n da?.();\n if ("webgpu" === a) {\n [g.Uh, g.Zh, g.ci, g.Vh, g.bi, g.je, g.di, g.fi, g.$h, g.ai, g.ei] = b;\n const c = g.Uh;\n g.jsepRegisterBuffer = (d, e, f, h) => c.registerBuffer(d, e, f, h);\n g.jsepGetBuffer = (d) => c.getBuffer(d);\n g.jsepCreateDownloader = (d, e, f) => c.createDownloader(d, e, f);\n g.jsepOnReleaseSession = (d) => {\n c.onReleaseSession(d);\n };\n g.jsepOnRunStart = (d) => c.onRunStart(d);\n }\n };\n var ea = Object.assign({}, g), fa = "./this.program", ha = (a, b) => {\n throw b;\n }, ia = "object" == typeof window, ja = "function" == typeof importScripts, ka = "object" == typeof process && "object" == typeof process.versions && "string" == typeof process.versions.node, v = "", la, ma, na;\n if (ka) {\n var fs = (init_fs(), __toCommonJS(fs_exports)), oa = (init_path(), __toCommonJS(path_exports));\n v = ja ? oa.dirname(v) + "/" : __dirname + "/";\n la = (a, b) => {\n a = pa(a) ? new URL(a) : oa.normalize(a);\n return fs.readFileSync(a, b ? void 0 : "utf8");\n };\n na = (a) => {\n a = la(a, true);\n a.buffer || (a = new Uint8Array(a));\n return a;\n };\n ma = (a, b, c, d = true) => {\n a = pa(a) ? new URL(a) : oa.normalize(a);\n fs.readFile(a, d ? void 0 : "utf8", (e, f) => {\n e ? c(e) : b(d ? f.buffer : f);\n });\n };\n !g.thisProgram && 1 < process.argv.length && (fa = process.argv[1].replace(/\\\\/g, "/"));\n process.argv.slice(2);\n ha = (a, b) => {\n process.exitCode = a;\n throw b;\n };\n } else if (ia || ja)\n ja ? v = self.location.href : "undefined" != typeof document && document.currentScript && (v = document.currentScript.src), _scriptDir && (v = _scriptDir), v.startsWith("blob:") ? v = "" : v = v.substr(0, v.replace(/[?#].*/, "").lastIndexOf("/") + 1), la = (a) => {\n var b = new XMLHttpRequest();\n b.open("GET", a, false);\n b.send(null);\n return b.responseText;\n }, ja && (na = (a) => {\n var b = new XMLHttpRequest();\n b.open("GET", a, false);\n b.responseType = "arraybuffer";\n b.send(null);\n return new Uint8Array(b.response);\n }), ma = (a, b, c) => {\n var d = new XMLHttpRequest();\n d.open("GET", a, true);\n d.responseType = "arraybuffer";\n d.onload = () => {\n 200 == d.status || 0 == d.status && d.response ? b(d.response) : c();\n };\n d.onerror = c;\n d.send(null);\n };\n var qa = console.log.bind(console), w = console.error.bind(console);\n Object.assign(g, ea);\n ea = null;\n var ra, x = false, sa, z, E, ta, ua, G, I, va, wa, xa, ya;\n function za() {\n var a = ra.buffer;\n g.HEAP8 = z = new Int8Array(a);\n g.HEAP16 = ta = new Int16Array(a);\n g.HEAPU8 = E = new Uint8Array(a);\n g.HEAPU16 = ua = new Uint16Array(a);\n g.HEAP32 = G = new Int32Array(a);\n g.HEAPU32 = I = new Uint32Array(a);\n g.HEAPF32 = va = new Float32Array(a);\n g.HEAPF64 = ya = new Float64Array(a);\n g.HEAP64 = wa = new BigInt64Array(a);\n g.HEAPU64 = xa = new BigUint64Array(a);\n }\n var Aa = [], Ba = [], Ca = [], Da = 0, Ea = null, Fa = null;\n function Ga(a) {\n a = "Aborted(" + a + ")";\n w(a);\n x = true;\n sa = 1;\n a = new WebAssembly.RuntimeError(a + ". Build with -sASSERTIONS for more info.");\n ba(a);\n throw a;\n }\n var Ha = (a) => a.startsWith("data:application/octet-stream;base64,"), pa = (a) => a.startsWith("file://"), Ia;\n Ia = "ort-wasm-simd.wasm";\n if (!Ha(Ia)) {\n var Ja = Ia;\n Ia = g.locateFile ? g.locateFile(Ja, v) : v + Ja;\n }\n function Ka(a) {\n if (na)\n return na(a);\n throw "both async and sync fetching of the wasm failed";\n }\n function La(a) {\n if (ia || ja) {\n if ("function" == typeof fetch && !pa(a))\n return fetch(a, { credentials: "same-origin" }).then((b) => {\n if (!b.ok)\n throw `failed to load wasm binary file at \'${a}\'`;\n return b.arrayBuffer();\n }).catch(() => Ka(a));\n if (ma)\n return new Promise((b, c) => {\n ma(a, (d) => b(new Uint8Array(d)), c);\n });\n }\n return Promise.resolve().then(() => Ka(a));\n }\n function Ma(a, b, c) {\n return La(a).then((d) => WebAssembly.instantiate(d, b)).then(c, (d) => {\n w(`failed to asynchronously prepare wasm: ${d}`);\n Ga(d);\n });\n }\n function Na(a, b) {\n var c = Ia;\n return "function" != typeof WebAssembly.instantiateStreaming || Ha(c) || pa(c) || ka || "function" != typeof fetch ? Ma(c, a, b) : fetch(c, { credentials: "same-origin" }).then((d) => WebAssembly.instantiateStreaming(d, a).then(b, function(e) {\n w(`wasm streaming compile failed: ${e}`);\n w("falling back to ArrayBuffer instantiation");\n return Ma(c, a, b);\n }));\n }\n var Oa = {\n 1261504: (a, b, c, d) => {\n if ("undefined" == typeof g || !g.Ph)\n return 1;\n a = J(a >>> 0);\n a.startsWith("./") && (a = a.substring(2));\n a = g.Ph.get(a);\n if (!a)\n return 2;\n b >>>= 0;\n c >>>= 0;\n if (b + c > a.byteLength)\n return 3;\n try {\n return E.set(a.subarray(b, b + c), d >>> 0 >>> 0), 0;\n } catch {\n return 4;\n }\n },\n 1262005: (a, b, c) => {\n c = J(c);\n const d = new Uint8Array(b);\n d.set(E.subarray(a >>> 0, a + b >>> 0));\n "object" == typeof process && "object" == typeof process.versions && "string" == typeof process.versions.node ? (init_fs(), __toCommonJS(fs_exports)).writeFileSync(c, d) : (a = new File([d], c, { type: "application/octet-stream" }), a = URL.createObjectURL(a), window.open(a, "_blank"));\n },\n 1262513: () => {\n g.$h();\n },\n 1262544: () => {\n g.ai();\n },\n 1262573: () => {\n g.ei();\n },\n 1262598: (a) => g.Zh(a),\n 1262631: (a) => g.ci(a),\n 1262663: (a, b, c) => {\n g.Vh(a, b, c, true);\n },\n 1262702: (a, b, c) => {\n g.Vh(a, b, c);\n },\n 1262735: (a) => {\n g.je("Abs", a, void 0);\n },\n 1262786: (a) => {\n g.je("Neg", a, void 0);\n },\n 1262837: (a) => {\n g.je("Floor", a, void 0);\n },\n 1262890: (a) => {\n g.je("Ceil", a, void 0);\n },\n 1262942: (a) => {\n g.je("Reciprocal", a, void 0);\n },\n 1263e3: (a) => {\n g.je("Sqrt", a, void 0);\n },\n 1263052: (a) => {\n g.je("Exp", a, void 0);\n },\n 1263103: (a) => {\n g.je("Erf", a, void 0);\n },\n 1263154: (a) => {\n g.je("Sigmoid", a, void 0);\n },\n 1263209: (a, b, c) => {\n g.je("HardSigmoid", a, { alpha: b, beta: c });\n },\n 1263288: (a) => {\n g.je("Log", a, void 0);\n },\n 1263339: (a) => {\n g.je("Sin", a, void 0);\n },\n 1263390: (a) => {\n g.je("Cos", a, void 0);\n },\n 1263441: (a) => {\n g.je("Tan", a, void 0);\n },\n 1263492: (a) => {\n g.je("Asin", a, void 0);\n },\n 1263544: (a) => {\n g.je("Acos", a, void 0);\n },\n 1263596: (a) => {\n g.je("Atan", a, void 0);\n },\n 1263648: (a) => {\n g.je("Sinh", a, void 0);\n },\n 1263700: (a) => {\n g.je("Cosh", a, void 0);\n },\n 1263752: (a) => {\n g.je("Asinh", a, void 0);\n },\n 1263805: (a) => {\n g.je("Acosh", a, void 0);\n },\n 1263858: (a) => {\n g.je("Atanh", a, void 0);\n },\n 1263911: (a) => {\n g.je("Tanh", a, void 0);\n },\n 1263963: (a) => {\n g.je("Not", a, void 0);\n },\n 1264014: (a, b, c) => {\n g.je("Clip", a, { min: b, max: c });\n },\n 1264083: (a) => {\n g.je("Clip", a, void 0);\n },\n 1264135: (a, b) => {\n g.je("Elu", a, { alpha: b });\n },\n 1264193: (a) => {\n g.je("Relu", a, void 0);\n },\n 1264245: (a, b) => {\n g.je("LeakyRelu", a, { alpha: b });\n },\n 1264309: (a, b) => {\n g.je("ThresholdedRelu", a, { alpha: b });\n },\n 1264379: (a, b) => {\n g.je("Cast", a, { to: b });\n },\n 1264437: (a) => {\n g.je("Add", a, void 0);\n },\n 1264488: (a) => {\n g.je("Sub", a, void 0);\n },\n 1264539: (a) => {\n g.je("Mul", a, void 0);\n },\n 1264590: (a) => {\n g.je("Div", a, void 0);\n },\n 1264641: (a) => {\n g.je("Pow", a, void 0);\n },\n 1264692: (a) => {\n g.je("Equal", a, void 0);\n },\n 1264745: (a) => {\n g.je("Greater", a, void 0);\n },\n 1264800: (a) => {\n g.je("GreaterOrEqual", a, void 0);\n },\n 1264862: (a) => {\n g.je("Less", a, void 0);\n },\n 1264914: (a) => {\n g.je("LessOrEqual", a, void 0);\n },\n 1264973: (a, b, c, d, e) => {\n g.je("ReduceMean", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: d ? Array.from(G.subarray(d >>> 0, e >>> 0)) : [] });\n },\n 1265132: (a, b, c, d, e) => {\n g.je("ReduceMax", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: d ? Array.from(G.subarray(d >>> 0, e >>> 0)) : [] });\n },\n 1265290: (a, b, c, d, e) => {\n g.je("ReduceMin", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: d ? Array.from(G.subarray(d >>> 0, e >>> 0)) : [] });\n },\n 1265448: (a, b, c, d, e) => {\n g.je("ReduceProd", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: d ? Array.from(G.subarray(d >>> 0, e >>> 0)) : [] });\n },\n 1265607: (a, b, c, d, e) => {\n g.je("ReduceSum", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: d ? Array.from(G.subarray(d >>> 0, e >>> 0)) : [] });\n },\n 1265765: (a, b, c, d, e) => {\n g.je("ReduceL1", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: d ? Array.from(G.subarray(d >>> 0, e >>> 0)) : [] });\n },\n 1265922: (a, b, c, d, e) => {\n g.je("ReduceL2", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: d ? Array.from(G.subarray(d >>> 0, e >>> 0)) : [] });\n },\n 1266079: (a, b, c, d, e) => {\n g.je("ReduceLogSum", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: d ? Array.from(G.subarray(d >>> 0, e >>> 0)) : [] });\n },\n 1266240: (a, b, c, d, e) => {\n g.je("ReduceSumSquare", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: d ? Array.from(G.subarray(d >>> 0, e >>> 0)) : [] });\n },\n 1266404: (a, b, c, d, e) => {\n g.je("ReduceLogSumExp", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: d ? Array.from(G.subarray(d >>> 0, e >>> 0)) : [] });\n },\n 1266568: (a) => {\n g.je("Where", a, void 0);\n },\n 1266621: (a, b, c) => {\n g.je("Transpose", a, { perm: b ? Array.from(G.subarray(b >>> 0, c >>> 0)) : [] });\n },\n 1266729: (a, b, c, d) => {\n g.je("DepthToSpace", a, { blocksize: b, mode: J(c), format: d ? "NHWC" : "NCHW" });\n },\n 1266862: (a, b, c, d) => {\n g.je("DepthToSpace", a, { blocksize: b, mode: J(c), format: d ? "NHWC" : "NCHW" });\n },\n 1266995: (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u) => {\n g.je("ConvTranspose", a, { format: l ? "NHWC" : "NCHW", autoPad: b, dilations: [c], group: d, kernelShape: [e], pads: [f, h], strides: [k], wIsConst: () => !!z[m >>> 0], outputPadding: n ? Array.from(G.subarray(n >>> 0, q >>> 0)) : [], outputShape: r ? Array.from(G.subarray(r >>> 0, p >>> 0)) : [], activation: J(u) });\n },\n 1267396: (a, b, c, d, e, f, h, k, l, m, n, q, r, p) => {\n g.je("ConvTranspose", a, { format: k ? "NHWC" : "NCHW", autoPad: b, dilations: Array.from(G.subarray(c >>> 0, (c >>> 0) + 2 >>> 0)), group: d, kernelShape: Array.from(G.subarray(e >>> 0, (e >>> 0) + 2 >>> 0)), pads: Array.from(G.subarray(f >>> 0, (f >>> 0) + 4 >>> 0)), strides: Array.from(G.subarray(h >>> 0, (h >>> 0) + 2 >>> 0)), wIsConst: () => !!z[l >>> 0], outputPadding: m ? Array.from(G.subarray(m >>> 0, n >>> 0)) : [], outputShape: q ? Array.from(G.subarray(q >>> 0, r >>> 0)) : [], activation: J(p) });\n },\n 1267961: (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u) => {\n g.je("ConvTranspose", a, { format: l ? "NHWC" : "NCHW", autoPad: b, dilations: [c], group: d, kernelShape: [e], pads: [f, h], strides: [k], wIsConst: () => !!z[m >>> 0], outputPadding: n ? Array.from(G.subarray(n >>> 0, q >>> 0)) : [], outputShape: r ? Array.from(G.subarray(r >>> 0, p >>> 0)) : [], activation: J(u) });\n },\n 1268362: (a, b, c, d, e, f, h, k, l, m, n, q, r, p) => {\n g.je("ConvTranspose", a, {\n format: k ? "NHWC" : "NCHW",\n autoPad: b,\n dilations: Array.from(G.subarray(c >>> 0, (c >>> 0) + 2 >>> 0)),\n group: d,\n kernelShape: Array.from(G.subarray(e >>> 0, (e >>> 0) + 2 >>> 0)),\n pads: Array.from(G.subarray(f >>> 0, (f >>> 0) + 4 >>> 0)),\n strides: Array.from(G.subarray(h >>> 0, (h >>> 0) + 2 >>> 0)),\n wIsConst: () => !!z[l >>> 0],\n outputPadding: m ? Array.from(G.subarray(m >>> 0, n >>> 0)) : [],\n outputShape: q ? Array.from(G.subarray(q >>> 0, r >>> 0)) : [],\n activation: J(p)\n });\n },\n 1268927: (a, b) => {\n g.je("GlobalAveragePool", a, { format: b ? "NHWC" : "NCHW" });\n },\n 1269018: (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y) => {\n g.je("AveragePool", a, {\n format: y ? "NHWC" : "NCHW",\n auto_pad: b,\n ceil_mode: c,\n count_include_pad: d,\n storage_order: e,\n dilations: [f, h],\n kernel_shape: [k, l],\n pads: [m, n, q, r],\n strides: [p, u]\n });\n },\n 1269302: (a, b) => {\n g.je("GlobalAveragePool", a, { format: b ? "NHWC" : "NCHW" });\n },\n 1269393: (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y) => {\n g.je("AveragePool", a, { format: y ? "NHWC" : "NCHW", auto_pad: b, ceil_mode: c, count_include_pad: d, storage_order: e, dilations: [f, h], kernel_shape: [k, l], pads: [m, n, q, r], strides: [p, u] });\n },\n 1269677: (a, b) => {\n g.je("GlobalMaxPool", a, { format: b ? "NHWC" : "NCHW" });\n },\n 1269764: (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y) => {\n g.je("MaxPool", a, { format: y ? "NHWC" : "NCHW", auto_pad: b, ceil_mode: c, count_include_pad: d, storage_order: e, dilations: [f, h], kernel_shape: [k, l], pads: [m, n, q, r], strides: [p, u] });\n },\n 1270044: (a, b) => {\n g.je("GlobalMaxPool", a, { format: b ? "NHWC" : "NCHW" });\n },\n 1270131: (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y) => {\n g.je("MaxPool", a, { format: y ? "NHWC" : "NCHW", auto_pad: b, ceil_mode: c, count_include_pad: d, storage_order: e, dilations: [f, h], kernel_shape: [k, l], pads: [m, n, q, r], strides: [p, u] });\n },\n 1270411: (a, b, c, d, e) => {\n g.je("Gemm", a, { alpha: b, beta: c, transA: d, transB: e });\n },\n 1270515: (a) => {\n g.je("MatMul", a, void 0);\n },\n 1270569: (a, b, c, d) => {\n g.je("ArgMax", a, { keepDims: !!b, selectLastIndex: !!c, axis: d });\n },\n 1270677: (a, b, c, d) => {\n g.je("ArgMin", a, { keepDims: !!b, selectLastIndex: !!c, axis: d });\n },\n 1270785: (a, b) => {\n g.je("Softmax", a, { axis: b });\n },\n 1270848: (a, b) => {\n g.je("Concat", a, { axis: b });\n },\n 1270908: (a, b, c, d, e) => {\n g.je("Split", a, { axis: b, numOutputs: c, splitSizes: d ? Array.from(G.subarray(d >>> 0, e >>> 0)) : [] });\n },\n 1271048: (a) => {\n g.je("Expand", a, void 0);\n },\n 1271102: (a, b) => {\n g.je("Gather", a, { axis: Number(b) });\n },\n 1271173: (a, b) => {\n g.je(\n "GatherElements",\n a,\n { axis: Number(b) }\n );\n },\n 1271252: (a, b, c, d, e, f, h, k, l, m, n) => {\n g.je("Resize", a, { antialias: b, axes: c ? Array.from(G.subarray(c >>> 0, d >>> 0)) : [], coordinateTransformMode: J(e), cubicCoeffA: f, excludeOutside: h, extrapolationValue: k, keepAspectRatioPolicy: J(l), mode: J(m), nearestMode: J(n) });\n },\n 1271598: (a, b, c, d, e, f, h) => {\n g.je("Slice", a, { starts: b ? Array.from(G.subarray(b >>> 0, c >>> 0)) : [], ends: d ? Array.from(G.subarray(d >>> 0, e >>> 0)) : [], axes: f ? Array.from(G.subarray(f >>> 0, h >>> 0)) : [] });\n },\n 1271814: (a) => {\n g.je("Tile", a, void 0);\n },\n 1271866: (a, b, c, d) => {\n g.je("LayerNormalization", a, { axis: b, epsilon: c, simplified: !!d });\n },\n 1271977: (a, b, c) => {\n g.je("InstanceNormalization", a, { epsilon: b, format: c ? "NHWC" : "NCHW" });\n },\n 1272091: (a, b, c) => {\n g.je("InstanceNormalization", a, { epsilon: b, format: c ? "NHWC" : "NCHW" });\n },\n 1272205: (a) => {\n g.je("Range", a, void 0);\n },\n 1272258: (a, b) => {\n g.je("Einsum", a, { equation: J(b) });\n },\n 1272339: (a, b, c, d, e) => {\n g.je("Pad", a, { mode: b, value: c, pads: d ? Array.from(G.subarray(d >>> 0, e >>> 0)) : [] });\n },\n 1272466: (a, b, c, d, e, f) => {\n g.je("BatchNormalization", a, {\n epsilon: b,\n momentum: c,\n spatial: !!e,\n trainingMode: !!d,\n format: f ? "NHWC" : "NCHW"\n });\n },\n 1272635: (a, b, c, d, e, f) => {\n g.je("BatchNormalization", a, { epsilon: b, momentum: c, spatial: !!e, trainingMode: !!d, format: f ? "NHWC" : "NCHW" });\n },\n 1272804: (a, b, c) => {\n g.je("CumSum", a, { exclusive: Number(b), reverse: Number(c) });\n },\n 1272901: (a, b, c, d, e, f, h, k, l) => {\n g.je("Attention", a, { numHeads: b, isUnidirectional: c, maskFilterValue: d, scale: e, doRotary: f, qkvHiddenSizes: h ? Array.from(G.subarray(Number(k) >>> 0, Number(k) + h >>> 0)) : [], pastPresentShareBuffer: !!l });\n },\n 1273173: (a) => {\n g.je(\n "BiasAdd",\n a,\n void 0\n );\n },\n 1273228: (a) => {\n g.je("BiasSplitGelu", a, void 0);\n },\n 1273289: (a) => {\n g.je("FastGelu", a, void 0);\n },\n 1273345: (a, b, c, d, e, f, h, k, l, m, n, q, r) => {\n g.je("Conv", a, { format: l ? "NHWC" : "NCHW", auto_pad: b, dilations: [c], group: d, kernel_shape: [e], pads: f ? Array.from(G.subarray(f >>> 0, h >>> 0)) : [], strides: [k], w_is_const: () => !!z[m >>> 0], activation: J(n), activation_params: q ? Array.from(va.subarray(q >>> 0, r >>> 0)) : [] });\n },\n 1273715: (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y) => {\n g.je("Conv", a, { format: q ? "NHWC" : "NCHW", auto_pad: b, dilations: [c, d], group: e, kernel_shape: [\n f,\n h\n ], pads: k ? Array.from(G.subarray(k >>> 0, l >>> 0)) : [], strides: [m, n], w_is_const: () => !!z[r >>> 0], activation: J(p), activation_params: u ? Array.from(va.subarray(u >>> 0, y >>> 0)) : [] });\n },\n 1274106: (a) => {\n g.je("Gelu", a, void 0);\n },\n 1274158: (a, b, c, d, e, f) => {\n g.je("MatMulNBits", a, { k: b, n: c, accuracyLevel: d, bits: e, blockSize: f });\n },\n 1274285: (a, b, c, d, e, f) => {\n g.je("MultiHeadAttention", a, { numHeads: b, isUnidirectional: c, maskFilterValue: d, scale: e, doRotary: f });\n },\n 1274444: (a, b, c, d, e) => {\n g.je("RotaryEmbedding", a, {\n interleaved: !!b,\n numHeads: c,\n rotaryEmbeddingDim: d,\n scale: e\n });\n },\n 1274583: (a, b, c) => {\n g.je("SkipLayerNormalization", a, { epsilon: b, simplified: !!c });\n },\n 1274685: (a, b, c) => {\n g.je("SkipLayerNormalization", a, { epsilon: b, simplified: !!c });\n },\n 1274787: (a, b, c, d) => {\n g.je("LayerNormalization", a, { axis: b, epsilon: c, simplified: !!d });\n },\n 1274898: (a) => {\n g.di(a);\n },\n 1274932: (a, b) => g.fi(a, b, g.Oh.gi, g.Oh.errors)\n };\n function Pa(a) {\n this.name = "ExitStatus";\n this.message = `Program terminated with exit(${a})`;\n this.status = a;\n }\n var Qa = [], Ra = 0, L = 0;\n class Sa {\n constructor(a) {\n this.Nh = a;\n this.Ih = a - 24;\n }\n }\n var Za = (a) => {\n var b = L;\n if (!b)\n return Ta(0), 0;\n var c = new Sa(b);\n I[c.Ih + 16 >>> 2 >>> 0] = b;\n var d = I[c.Ih + 4 >>> 2 >>> 0];\n if (!d)\n return Ta(0), b;\n for (var e in a) {\n var f = a[e];\n if (0 === f || f === d)\n break;\n if (Ua(f, d, c.Ih + 16))\n return Ta(f), b;\n }\n Ta(d);\n return b;\n }, $a = "undefined" != typeof TextDecoder ? new TextDecoder("utf8") : void 0, ab = (a, b, c) => {\n b >>>= 0;\n var d = b + c;\n for (c = b; a[c] && !(c >= d); )\n ++c;\n if (16 < c - b && a.buffer && $a)\n return $a.decode(a.subarray(b, c));\n for (d = ""; b < c; ) {\n var e = a[b++];\n if (e & 128) {\n var f = a[b++] & 63;\n if (192 == (e & 224))\n d += String.fromCharCode((e & 31) << 6 | f);\n else {\n var h = a[b++] & 63;\n e = 224 == (e & 240) ? (e & 15) << 12 | f << 6 | h : (e & 7) << 18 | f << 12 | h << 6 | a[b++] & 63;\n 65536 > e ? d += String.fromCharCode(e) : (e -= 65536, d += String.fromCharCode(55296 | e >> 10, 56320 | e & 1023));\n }\n } else\n d += String.fromCharCode(e);\n }\n return d;\n }, J = (a, b) => (a >>>= 0) ? ab(E, a, b) : "", bb = (a) => {\n for (var b = 0, c = 0; c < a.length; ++c) {\n var d = a.charCodeAt(c);\n 127 >= d ? b++ : 2047 >= d ? b += 2 : 55296 <= d && 57343 >= d ? (b += 4, ++c) : b += 3;\n }\n return b;\n }, M = (a, b, c, d) => {\n c >>>= 0;\n if (!(0 < d))\n return 0;\n var e = c;\n d = c + d - 1;\n for (var f = 0; f < a.length; ++f) {\n var h = a.charCodeAt(f);\n if (55296 <= h && 57343 >= h) {\n var k = a.charCodeAt(++f);\n h = 65536 + ((h & 1023) << 10) | k & 1023;\n }\n if (127 >= h) {\n if (c >= d)\n break;\n b[c++ >>> 0] = h;\n } else {\n if (2047 >= h) {\n if (c + 1 >= d)\n break;\n b[c++ >>> 0] = 192 | h >> 6;\n } else {\n if (65535 >= h) {\n if (c + 2 >= d)\n break;\n b[c++ >>> 0] = 224 | h >> 12;\n } else {\n if (c + 3 >= d)\n break;\n b[c++ >>> 0] = 240 | h >> 18;\n b[c++ >>> 0] = 128 | h >> 12 & 63;\n }\n b[c++ >>> 0] = 128 | h >> 6 & 63;\n }\n b[c++ >>> 0] = 128 | h & 63;\n }\n }\n b[c >>> 0] = 0;\n return c - e;\n }, cb, N = (a) => {\n for (var b = ""; E[a >>> 0]; )\n b += cb[E[a++ >>> 0]];\n return b;\n }, db = {}, eb = {}, fb = {}, O;\n function gb(a, b, c = {}) {\n var d = b.name;\n if (!a)\n throw new O(`type "${d}" must have a positive integer typeid pointer`);\n if (eb.hasOwnProperty(a)) {\n if (c.Xh)\n return;\n throw new O(`Cannot register type \'${d}\' twice`);\n }\n eb[a] = b;\n delete fb[a];\n db.hasOwnProperty(a) && (b = db[a], delete db[a], b.forEach((e) => e()));\n }\n function P(a, b, c = {}) {\n if (!("argPackAdvance" in b))\n throw new TypeError("registerType registeredInstance requires argPackAdvance");\n return gb(a, b, c);\n }\n var hb = (a, b, c) => {\n switch (b) {\n case 1:\n return c ? (d) => z[d >>> 0] : (d) => E[d >>> 0];\n case 2:\n return c ? (d) => ta[d >>> 1 >>> 0] : (d) => ua[d >>> 1 >>> 0];\n case 4:\n return c ? (d) => G[d >>> 2 >>> 0] : (d) => I[d >>> 2 >>> 0];\n case 8:\n return c ? (d) => wa[d >>> 3] : (d) => xa[d >>> 3];\n default:\n throw new TypeError(`invalid integer width (${b}): ${a}`);\n }\n }, ib = [], Q = [];\n function jb(a) {\n a >>>= 0;\n 9 < a && 0 === --Q[a + 1] && (Q[a] = void 0, ib.push(a));\n }\n var R = (a) => {\n if (!a)\n throw new O("Cannot use deleted val. handle = " + a);\n return Q[a];\n }, S = (a) => {\n switch (a) {\n case void 0:\n return 2;\n case null:\n return 4;\n case true:\n return 6;\n case false:\n return 8;\n default:\n const b = ib.pop() || Q.length;\n Q[b] = a;\n Q[b + 1] = 1;\n return b;\n }\n };\n function kb(a) {\n return this.fromWireType(I[a >>> 2 >>> 0]);\n }\n var lb = { name: "emscripten::val", fromWireType: (a) => {\n var b = R(a);\n jb(a);\n return b;\n }, toWireType: (a, b) => S(b), argPackAdvance: 8, readValueFromPointer: kb, Mh: null }, mb = (a, b) => {\n switch (b) {\n case 4:\n return function(c) {\n return this.fromWireType(va[c >>> 2 >>> 0]);\n };\n case 8:\n return function(c) {\n return this.fromWireType(ya[c >>> 3 >>> 0]);\n };\n default:\n throw new TypeError(`invalid float width (${b}): ${a}`);\n }\n }, nb = "undefined" != typeof TextDecoder ? new TextDecoder("utf-16le") : void 0, ob = (a, b) => {\n var c = a >> 1;\n for (var d = c + b / 2; !(c >= d) && ua[c >>> 0]; )\n ++c;\n c <<= 1;\n if (32 < c - a && nb)\n return nb.decode(E.subarray(a >>> 0, c >>> 0));\n c = "";\n for (d = 0; !(d >= b / 2); ++d) {\n var e = ta[a + 2 * d >>> 1 >>> 0];\n if (0 == e)\n break;\n c += String.fromCharCode(e);\n }\n return c;\n }, pb = (a, b, c) => {\n c ??= 2147483647;\n if (2 > c)\n return 0;\n c -= 2;\n var d = b;\n c = c < 2 * a.length ? c / 2 : a.length;\n for (var e = 0; e < c; ++e)\n ta[b >>> 1 >>> 0] = a.charCodeAt(e), b += 2;\n ta[b >>> 1 >>> 0] = 0;\n return b - d;\n }, qb = (a) => 2 * a.length, rb = (a, b) => {\n for (var c = 0, d = ""; !(c >= b / 4); ) {\n var e = G[a + 4 * c >>> 2 >>> 0];\n if (0 == e)\n break;\n ++c;\n 65536 <= e ? (e -= 65536, d += String.fromCharCode(55296 | e >> 10, 56320 | e & 1023)) : d += String.fromCharCode(e);\n }\n return d;\n }, sb = (a, b, c) => {\n b >>>= 0;\n c ??= 2147483647;\n if (4 > c)\n return 0;\n var d = b;\n c = d + c - 4;\n for (var e = 0; e < a.length; ++e) {\n var f = a.charCodeAt(e);\n if (55296 <= f && 57343 >= f) {\n var h = a.charCodeAt(++e);\n f = 65536 + ((f & 1023) << 10) | h & 1023;\n }\n G[b >>> 2 >>> 0] = f;\n b += 4;\n if (b + 4 > c)\n break;\n }\n G[b >>> 2 >>> 0] = 0;\n return b - d;\n }, tb = (a) => {\n for (var b = 0, c = 0; c < a.length; ++c) {\n var d = a.charCodeAt(c);\n 55296 <= d && 57343 >= d && ++c;\n b += 4;\n }\n return b;\n }, vb = (a, b) => {\n var c = eb[a];\n if (void 0 === c)\n throw a = ub(a), c = N(a), T(a), new O(`${b} has unknown type ${c}`);\n return c;\n }, wb = (a, b, c) => {\n var d = [];\n a = a.toWireType(d, c);\n d.length && (I[b >>> 2 >>> 0] = S(d));\n return a;\n }, xb = (a) => {\n try {\n a();\n } catch (b) {\n Ga(b);\n }\n }, yb = (a) => {\n if (!x)\n try {\n a();\n try {\n sa = sa = a = sa, g.onExit?.(a), x = true, ha(a, new Pa(a));\n } catch (b) {\n b instanceof Pa || "unwind" == b || ha(1, b);\n }\n } catch (b) {\n b instanceof Pa || "unwind" == b || ha(1, b);\n }\n };\n function zb() {\n var a = U, b = {};\n for (let [c, d] of Object.entries(a))\n b[c] = "function" == typeof d ? (...e) => {\n Ab.push(c);\n try {\n return d(...e);\n } finally {\n x || (Ab.pop(), t && 1 === V && 0 === Ab.length && (V = 0, xb(Bb), "undefined" != typeof Fibers && Fibers.mi()));\n }\n } : d;\n return b;\n }\n var V = 0, t = null, Cb = 0, Ab = [], Db = {}, Eb = {}, Fb = 0, Gb = null, Hb = [];\n function ca() {\n return new Promise((a, b) => {\n Gb = { resolve: a, reject: b };\n });\n }\n function Ib() {\n var a = Jb(65548), b = a + 12;\n I[a >>> 2 >>> 0] = b;\n I[a + 4 >>> 2 >>> 0] = b + 65536;\n b = Ab[0];\n var c = Db[b];\n void 0 === c && (c = Fb++, Db[b] = c, Eb[c] = b);\n G[a + 8 >>> 2 >>> 0] = c;\n return a;\n }\n function Kb(a) {\n if (!x) {\n if (0 === V) {\n var b = false, c = false;\n a((d = 0) => {\n if (!x && (Cb = d, b = true, c)) {\n V = 2;\n xb(() => Lb(t));\n "undefined" != typeof Browser && Browser.Sh.Wh && Browser.Sh.resume();\n d = false;\n try {\n var e = (0, U[Eb[G[t + 8 >>> 2 >>> 0]]])();\n } catch (k) {\n e = k, d = true;\n }\n var f = false;\n if (!t) {\n var h = Gb;\n h && (Gb = null, (d ? h.reject : h.resolve)(e), f = true);\n }\n if (d && !f)\n throw e;\n }\n });\n c = true;\n b || (V = 1, t = Ib(), "undefined" != typeof Browser && Browser.Sh.Wh && Browser.Sh.pause(), xb(() => Mb(t)));\n } else\n 2 === V ? (V = 0, xb(Nb), T(t), t = null, Hb.forEach(yb)) : Ga(`invalid state: ${V}`);\n return Cb;\n }\n }\n function Ob(a) {\n return Kb((b) => {\n a().then(b);\n });\n }\n var Pb = [], Qb = {}, Rb = (a) => {\n var b = Qb[a];\n return void 0 === b ? N(a) : b;\n }, Sb = () => "object" == typeof globalThis ? globalThis : Function("return this")(), Tb = (a) => {\n var b = Pb.length;\n Pb.push(a);\n return b;\n }, Ub = (a, b) => {\n for (var c = Array(a), d = 0; d < a; ++d)\n c[d] = vb(I[b + 4 * d >>> 2 >>> 0], "parameter " + d);\n return c;\n }, Vb = (a, b) => Object.defineProperty(b, "name", { value: a });\n function Wb(a) {\n var b = Function;\n if (!(b instanceof Function))\n throw new TypeError(`new_ called with constructor type ${typeof b} which is not a function`);\n var c = Vb(b.name || "unknownFunctionName", function() {\n });\n c.prototype = b.prototype;\n c = new c();\n a = b.apply(c, a);\n return a instanceof Object ? a : c;\n }\n var W = (a) => 0 === a % 4 && (0 !== a % 100 || 0 === a % 400), Xb = [0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335], Yb = [0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334], Zb = [], $b = (a, b) => {\n Zb.length = 0;\n for (var c; c = E[a++ >>> 0]; ) {\n var d = 105 != c;\n d &= 112 != c;\n b += d && b % 8 ? 4 : 0;\n Zb.push(112 == c ? I[b >>> 2 >>> 0] : 106 == c ? wa[b >>> 3] : 105 == c ? G[b >>> 2 >>> 0] : ya[b >>> 3 >>> 0]);\n b += d ? 8 : 4;\n }\n return Zb;\n }, ac = {}, cc = () => {\n if (!bc) {\n var a = { USER: "web_user", LOGNAME: "web_user", PATH: "/", PWD: "/", HOME: "/home/web_user", LANG: ("object" == typeof navigator && navigator.languages && navigator.languages[0] || "C").replace("-", "_") + ".UTF-8", _: fa || "./this.program" }, b;\n for (b in ac)\n void 0 === ac[b] ? delete a[b] : a[b] = ac[b];\n var c = [];\n for (b in a)\n c.push(`${b}=${a[b]}`);\n bc = c;\n }\n return bc;\n }, bc, dc = [null, [], []], ec = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], fc = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];\n function gc(a) {\n var b = Array(bb(a) + 1);\n M(a, b, 0, b.length);\n return b;\n }\n function hc(a, b, c, d) {\n function e(p, u, y) {\n for (p = "number" == typeof p ? p.toString() : p || ""; p.length < u; )\n p = y[0] + p;\n return p;\n }\n function f(p, u) {\n return e(p, u, "0");\n }\n function h(p, u) {\n function y(B) {\n return 0 > B ? -1 : 0 < B ? 1 : 0;\n }\n var A;\n 0 === (A = y(p.getFullYear() - u.getFullYear())) && 0 === (A = y(p.getMonth() - u.getMonth())) && (A = y(p.getDate() - u.getDate()));\n return A;\n }\n function k(p) {\n switch (p.getDay()) {\n case 0:\n return new Date(p.getFullYear() - 1, 11, 29);\n case 1:\n return p;\n case 2:\n return new Date(p.getFullYear(), 0, 3);\n case 3:\n return new Date(\n p.getFullYear(),\n 0,\n 2\n );\n case 4:\n return new Date(p.getFullYear(), 0, 1);\n case 5:\n return new Date(p.getFullYear() - 1, 11, 31);\n case 6:\n return new Date(p.getFullYear() - 1, 11, 30);\n }\n }\n function l(p) {\n var u = p.Kh;\n for (p = new Date(new Date(p.Lh + 1900, 0, 1).getTime()); 0 < u; ) {\n var y = p.getMonth(), A = (W(p.getFullYear()) ? ec : fc)[y];\n if (u > A - p.getDate())\n u -= A - p.getDate() + 1, p.setDate(1), 11 > y ? p.setMonth(y + 1) : (p.setMonth(0), p.setFullYear(p.getFullYear() + 1));\n else {\n p.setDate(p.getDate() + u);\n break;\n }\n }\n y = new Date(p.getFullYear() + 1, 0, 4);\n u = k(new Date(\n p.getFullYear(),\n 0,\n 4\n ));\n y = k(y);\n return 0 >= h(u, p) ? 0 >= h(y, p) ? p.getFullYear() + 1 : p.getFullYear() : p.getFullYear() - 1;\n }\n a >>>= 0;\n b >>>= 0;\n c >>>= 0;\n d >>>= 0;\n var m = I[d + 40 >>> 2 >>> 0];\n d = { ji: G[d >>> 2 >>> 0], ii: G[d + 4 >>> 2 >>> 0], Qh: G[d + 8 >>> 2 >>> 0], Th: G[d + 12 >>> 2 >>> 0], Rh: G[d + 16 >>> 2 >>> 0], Lh: G[d + 20 >>> 2 >>> 0], Jh: G[d + 24 >>> 2 >>> 0], Kh: G[d + 28 >>> 2 >>> 0], li: G[d + 32 >>> 2 >>> 0], hi: G[d + 36 >>> 2 >>> 0], ki: m ? J(m) : "" };\n c = J(c);\n m = {\n "%c": "%a %b %d %H:%M:%S %Y",\n "%D": "%m/%d/%y",\n "%F": "%Y-%m-%d",\n "%h": "%b",\n "%r": "%I:%M:%S %p",\n "%R": "%H:%M",\n "%T": "%H:%M:%S",\n "%x": "%m/%d/%y",\n "%X": "%H:%M:%S",\n "%Ec": "%c",\n "%EC": "%C",\n "%Ex": "%m/%d/%y",\n "%EX": "%H:%M:%S",\n "%Ey": "%y",\n "%EY": "%Y",\n "%Od": "%d",\n "%Oe": "%e",\n "%OH": "%H",\n "%OI": "%I",\n "%Om": "%m",\n "%OM": "%M",\n "%OS": "%S",\n "%Ou": "%u",\n "%OU": "%U",\n "%OV": "%V",\n "%Ow": "%w",\n "%OW": "%W",\n "%Oy": "%y"\n };\n for (var n in m)\n c = c.replace(new RegExp(n, "g"), m[n]);\n var q = "Sunday Monday Tuesday Wednesday Thursday Friday Saturday".split(" "), r = "January February March April May June July August September October November December".split(" ");\n m = { "%a": (p) => q[p.Jh].substring(0, 3), "%A": (p) => q[p.Jh], "%b": (p) => r[p.Rh].substring(0, 3), "%B": (p) => r[p.Rh], "%C": (p) => f((p.Lh + 1900) / 100 | 0, 2), "%d": (p) => f(p.Th, 2), "%e": (p) => e(p.Th, 2, " "), "%g": (p) => l(p).toString().substring(2), "%G": l, "%H": (p) => f(p.Qh, 2), "%I": (p) => {\n p = p.Qh;\n 0 == p ? p = 12 : 12 < p && (p -= 12);\n return f(p, 2);\n }, "%j": (p) => {\n for (var u = 0, y = 0; y <= p.Rh - 1; u += (W(p.Lh + 1900) ? ec : fc)[y++])\n ;\n return f(p.Th + u, 3);\n }, "%m": (p) => f(p.Rh + 1, 2), "%M": (p) => f(p.ii, 2), "%n": () => "\\n", "%p": (p) => 0 <= p.Qh && 12 > p.Qh ? "AM" : "PM", "%S": (p) => f(p.ji, 2), "%t": () => " ", "%u": (p) => p.Jh || 7, "%U": (p) => f(Math.floor((p.Kh + 7 - p.Jh) / 7), 2), "%V": (p) => {\n var u = Math.floor((p.Kh + 7 - (p.Jh + 6) % 7) / 7);\n 2 >= (p.Jh + 371 - p.Kh - 2) % 7 && u++;\n if (u)\n 53 == u && (y = (p.Jh + 371 - p.Kh) % 7, 4 == y || 3 == y && W(p.Lh) || (u = 1));\n else {\n u = 52;\n var y = (p.Jh + 7 - p.Kh - 1) % 7;\n (4 == y || 5 == y && W(p.Lh % 400 - 1)) && u++;\n }\n return f(u, 2);\n }, "%w": (p) => p.Jh, "%W": (p) => f(Math.floor((p.Kh + 7 - (p.Jh + 6) % 7) / 7), 2), "%y": (p) => (p.Lh + 1900).toString().substring(2), "%Y": (p) => p.Lh + 1900, "%z": (p) => {\n p = p.hi;\n var u = 0 <= p;\n p = Math.abs(p) / 60;\n return (u ? "+" : "-") + String("0000" + (p / 60 * 100 + p % 60)).slice(-4);\n }, "%Z": (p) => p.ki, "%%": () => "%" };\n c = c.replace(/%%/g, "\\0\\0");\n for (n in m)\n c.includes(n) && (c = c.replace(new RegExp(n, "g"), m[n](d)));\n c = c.replace(/\\0\\0/g, "%");\n n = gc(c);\n if (n.length > b)\n return 0;\n z.set(n, a >>> 0);\n return n.length - 1;\n }\n for (var ic = Array(256), jc = 0; 256 > jc; ++jc)\n ic[jc] = String.fromCharCode(jc);\n cb = ic;\n O = g.BindingError = class extends Error {\n constructor(a) {\n super(a);\n this.name = "BindingError";\n }\n };\n g.InternalError = class extends Error {\n constructor(a) {\n super(a);\n this.name = "InternalError";\n }\n };\n Q.push(0, 1, void 0, 1, null, 1, true, 1, false, 1);\n g.count_emval_handles = () => Q.length / 2 - 5 - ib.length;\n var Cf = {\n bd: function(a, b, c) {\n return Ob(async () => {\n await g.bi(a, b, c);\n });\n },\n v: function(a) {\n a = new Sa(a >>> 0);\n 0 == z[a.Ih + 12 >>> 0] && (z[a.Ih + 12 >>> 0] = 1, Ra--);\n z[a.Ih + 13 >>> 0] = 0;\n Qa.push(a);\n kc(a.Nh);\n if (lc(I[a.Ih + 4 >>> 2 >>> 0]))\n a = I[a.Nh >>> 2 >>> 0];\n else {\n var b = I[a.Ih + 16 >>> 2 >>> 0];\n a = 0 !== b ? b : a.Nh;\n }\n return a;\n },\n N: () => {\n X(0, 0);\n var a = Qa.pop();\n mc(a.Nh);\n L = 0;\n },\n a: function() {\n return Za([]);\n },\n m: function(a) {\n return Za([a >>> 0]);\n },\n x: function(a, b) {\n return Za([a >>> 0, b >>> 0]);\n },\n q: function(a, b, c) {\n return Za([a >>> 0, b >>> 0, c >>> 0]);\n },\n Bb: () => {\n var a = Qa.pop();\n a || Ga("no exception to throw");\n var b = a.Nh;\n 0 == z[a.Ih + 13 >>> 0] && (Qa.push(a), z[a.Ih + 13 >>> 0] = 1, z[a.Ih + 12 >>> 0] = 0, Ra++);\n L = b;\n throw L;\n },\n s: function(a, b, c) {\n a >>>= 0;\n var d = new Sa(a);\n I[d.Ih + 16 >>> 2 >>> 0] = 0;\n I[d.Ih + 4 >>> 2 >>> 0] = b >>> 0;\n I[d.Ih + 8 >>> 2 >>> 0] = c >>> 0;\n L = a;\n Ra++;\n throw L;\n },\n fb: () => Ra,\n g: function(a) {\n L ||= a >>> 0;\n throw L;\n },\n Cb: function() {\n return 0;\n },\n $c: function() {\n },\n Mc: function() {\n },\n Oc: function() {\n },\n Gc: function() {\n return 0;\n },\n Zc: function() {\n },\n Uc: function() {\n },\n Yc: function() {\n },\n _b: function() {\n },\n Nc: function() {\n },\n Kc: function() {\n },\n _c: function() {\n },\n Lc: function() {\n },\n Wb: function(a, b, c) {\n b = N(b >>> 0);\n P(a >>> 0, { name: b, fromWireType: (d) => d, toWireType: function(d, e) {\n if ("bigint" != typeof e && "number" != typeof e)\n throw null === e ? e = "null" : (d = typeof e, e = "object" === d || "array" === d || "function" === d ? e.toString() : "" + e), new TypeError(`Cannot convert "${e}" to ${this.name}`);\n "number" == typeof e && (e = BigInt(e));\n return e;\n }, argPackAdvance: 8, readValueFromPointer: hb(b, c >>> 0, -1 == b.indexOf("u")), Mh: null });\n },\n Ec: function(a, b, c, d) {\n b = N(b >>> 0);\n P(a >>> 0, { name: b, fromWireType: function(e) {\n return !!e;\n }, toWireType: function(e, f) {\n return f ? c : d;\n }, argPackAdvance: 8, readValueFromPointer: function(e) {\n return this.fromWireType(E[e >>> 0]);\n }, Mh: null });\n },\n Cc: function(a) {\n return P(a >>> 0, lb);\n },\n Vb: function(a, b, c) {\n b = N(b >>> 0);\n P(a >>> 0, { name: b, fromWireType: (d) => d, toWireType: (d, e) => e, argPackAdvance: 8, readValueFromPointer: mb(b, c >>> 0), Mh: null });\n },\n Aa: function(a, b, c, d, e) {\n a >>>= 0;\n c >>>= 0;\n b = N(b >>> 0);\n -1 === e && (e = 4294967295);\n e = (k) => k;\n if (0 === d) {\n var f = 32 - 8 * c;\n e = (k) => k << f >>> f;\n }\n var h = b.includes("unsigned") ? function(k, l) {\n return l >>> 0;\n } : function(k, l) {\n return l;\n };\n P(a, {\n name: b,\n fromWireType: e,\n toWireType: h,\n argPackAdvance: 8,\n readValueFromPointer: hb(b, c, 0 !== d),\n Mh: null\n });\n },\n _: function(a, b, c) {\n function d(f) {\n return new e(z.buffer, I[f + 4 >>> 2 >>> 0], I[f >>> 2 >>> 0]);\n }\n var e = [Int8Array, Uint8Array, Int16Array, Uint16Array, Int32Array, Uint32Array, Float32Array, Float64Array, BigInt64Array, BigUint64Array][b];\n c = N(c >>> 0);\n P(a >>> 0, { name: c, fromWireType: d, argPackAdvance: 8, readValueFromPointer: d }, { Xh: true });\n },\n Xb: function(a, b) {\n b = N(b >>> 0);\n var c = "std::string" === b;\n P(a >>> 0, { name: b, fromWireType: function(d) {\n var e = I[d >>> 2 >>> 0], f = d + 4;\n if (c)\n for (var h = f, k = 0; k <= e; ++k) {\n var l = f + k;\n if (k == e || 0 == E[l >>> 0]) {\n h = J(h, l - h);\n if (void 0 === m)\n var m = h;\n else\n m += String.fromCharCode(0), m += h;\n h = l + 1;\n }\n }\n else {\n m = Array(e);\n for (k = 0; k < e; ++k)\n m[k] = String.fromCharCode(E[f + k >>> 0]);\n m = m.join("");\n }\n T(d);\n return m;\n }, toWireType: function(d, e) {\n e instanceof ArrayBuffer && (e = new Uint8Array(e));\n var f = "string" == typeof e;\n if (!(f || e instanceof Uint8Array || e instanceof Uint8ClampedArray || e instanceof Int8Array))\n throw new O("Cannot pass non-string to std::string");\n var h = c && f ? bb(e) : e.length;\n var k = Jb(4 + h + 1), l = k + 4;\n I[k >>> 2 >>> 0] = h;\n if (c && f)\n M(e, E, l, h + 1);\n else if (f)\n for (f = 0; f < h; ++f) {\n var m = e.charCodeAt(f);\n if (255 < m)\n throw T(l), new O("String has UTF-16 code units that do not fit in 8 bits");\n E[l + f >>> 0] = m;\n }\n else\n for (f = 0; f < h; ++f)\n E[l + f >>> 0] = e[f];\n null !== d && d.push(T, k);\n return k;\n }, argPackAdvance: 8, readValueFromPointer: kb, Mh(d) {\n T(d);\n } });\n },\n Ab: function(a, b, c) {\n b >>>= 0;\n c >>>= 0;\n c = N(c);\n if (2 === b) {\n var d = ob;\n var e = pb;\n var f = qb;\n var h = (k) => ua[k >>> 1 >>> 0];\n } else\n 4 === b && (d = rb, e = sb, f = tb, h = (k) => I[k >>> 2 >>> 0]);\n P(a >>> 0, {\n name: c,\n fromWireType: (k) => {\n for (var l = I[k >>> 2 >>> 0], m, n = k + 4, q = 0; q <= l; ++q) {\n var r = k + 4 + q * b;\n if (q == l || 0 == h(r))\n n = d(n, r - n), void 0 === m ? m = n : (m += String.fromCharCode(0), m += n), n = r + b;\n }\n T(k);\n return m;\n },\n toWireType: (k, l) => {\n if ("string" != typeof l)\n throw new O(`Cannot pass non-string to C++ string type ${c}`);\n var m = f(l), n = Jb(4 + m + b);\n I[n >>> 2 >>> 0] = m / b;\n e(l, n + 4, m + b);\n null !== k && k.push(T, n);\n return n;\n },\n argPackAdvance: 8,\n readValueFromPointer: kb,\n Mh(k) {\n T(k);\n }\n });\n },\n Fc: function(a, b) {\n b = N(b >>> 0);\n P(a >>> 0, { Yh: true, name: b, argPackAdvance: 0, fromWireType: () => {\n }, toWireType: () => {\n } });\n },\n ad: () => 1,\n kd: function(a, b, c) {\n b >>>= 0;\n c >>>= 0;\n a = R(a >>> 0);\n b = vb(b, "emval::as");\n return wb(b, c, a);\n },\n Cd: function(a) {\n a >>>= 0;\n return Ob(() => {\n a = R(a);\n return a.then(S);\n });\n },\n ud: function(a, b, c, d) {\n c >>>= 0;\n d >>>= 0;\n a = Pb[a >>> 0];\n b = R(b >>> 0);\n return a(null, b, c, d);\n },\n ia: function(a, b, c, d, e) {\n c >>>= 0;\n d >>>= 0;\n e >>>= 0;\n a = Pb[a >>> 0];\n b = R(b >>> 0);\n c = Rb(c);\n return a(b, b[c], d, e);\n },\n Bc: jb,\n qd: function(a, b) {\n b >>>= 0;\n a = R(a >>> 0);\n b = R(b);\n return a == b;\n },\n zd: function(a) {\n a >>>= 0;\n if (0 === a)\n return S(Sb());\n a = Rb(a);\n return S(Sb()[a]);\n },\n ha: function(a, b, c) {\n b = Ub(a, b >>> 0);\n var d = b.shift();\n a--;\n var e = "return function (obj, func, destructorsRef, args) {\\n", f = 0, h = [];\n 0 === c && h.push("obj");\n for (var k = ["retType"], l = [d], m = 0; m < a; ++m)\n h.push("arg" + m), k.push("argType" + m), l.push(b[m]), e += ` var arg${m} = argType${m}.readValueFromPointer(args${f ? "+" + f : ""});\n`, f += b[m].argPackAdvance;\n e += ` var rv = ${1 === c ? "new func" : "func.call"}(${h.join(", ")});\n`;\n d.Yh || (k.push("emval_returnValue"), l.push(wb), e += " return emval_returnValue(retType, destructorsRef, rv);\\n");\n k.push(e + "};\\n");\n a = Wb(k)(...l);\n c = `methodCaller<(${b.map((n) => n.name).join(", ")}) => ${d.name}>`;\n return Tb(Vb(c, a));\n },\n yd: function(a, b) {\n b >>>= 0;\n a = R(a >>> 0);\n b = R(b);\n return S(a[b]);\n },\n ba: function(a) {\n a >>>= 0;\n 9 < a && (Q[a + 1] += 1);\n },\n md: function() {\n return S([]);\n },\n hd: function(a) {\n a = R(a >>> 0);\n for (var b = Array(a.length), c = 0; c < a.length; c++)\n b[c] = a[c];\n return S(b);\n },\n U: function(a) {\n return S(Rb(a >>> 0));\n },\n Xa: function() {\n return S({});\n },\n vd: function(a) {\n a >>>= 0;\n for (var b = R(a); b.length; ) {\n var c = b.pop();\n b.pop()(c);\n }\n jb(a);\n },\n sd: function(a, b, c) {\n b >>>= 0;\n c >>>= 0;\n a = R(a >>> 0);\n b = R(b);\n c = R(c);\n a[b] = c;\n },\n zb: function(a, b) {\n b >>>= 0;\n a = vb(a >>> 0, "_emval_take_value");\n a = a.readValueFromPointer(b);\n return S(a);\n },\n Rc: function(a, b) {\n a = -9007199254740992 > a || 9007199254740992 < a ? NaN : Number(a);\n b >>>= 0;\n a = new Date(1e3 * a);\n G[b >>> 2 >>> 0] = a.getUTCSeconds();\n G[b + 4 >>> 2 >>> 0] = a.getUTCMinutes();\n G[b + 8 >>> 2 >>> 0] = a.getUTCHours();\n G[b + 12 >>> 2 >>> 0] = a.getUTCDate();\n G[b + 16 >>> 2 >>> 0] = a.getUTCMonth();\n G[b + 20 >>> 2 >>> 0] = a.getUTCFullYear() - 1900;\n G[b + 24 >>> 2 >>> 0] = a.getUTCDay();\n G[b + 28 >>> 2 >>> 0] = (a.getTime() - Date.UTC(\n a.getUTCFullYear(),\n 0,\n 1,\n 0,\n 0,\n 0,\n 0\n )) / 864e5 | 0;\n },\n Sc: function(a, b) {\n a = -9007199254740992 > a || 9007199254740992 < a ? NaN : Number(a);\n b >>>= 0;\n a = new Date(1e3 * a);\n G[b >>> 2 >>> 0] = a.getSeconds();\n G[b + 4 >>> 2 >>> 0] = a.getMinutes();\n G[b + 8 >>> 2 >>> 0] = a.getHours();\n G[b + 12 >>> 2 >>> 0] = a.getDate();\n G[b + 16 >>> 2 >>> 0] = a.getMonth();\n G[b + 20 >>> 2 >>> 0] = a.getFullYear() - 1900;\n G[b + 24 >>> 2 >>> 0] = a.getDay();\n G[b + 28 >>> 2 >>> 0] = (W(a.getFullYear()) ? Xb : Yb)[a.getMonth()] + a.getDate() - 1 | 0;\n G[b + 36 >>> 2 >>> 0] = -(60 * a.getTimezoneOffset());\n var c = new Date(a.getFullYear(), 6, 1).getTimezoneOffset(), d = new Date(a.getFullYear(), 0, 1).getTimezoneOffset();\n G[b + 32 >>> 2 >>> 0] = (c != d && a.getTimezoneOffset() == Math.min(d, c)) | 0;\n },\n Tc: function(a) {\n a >>>= 0;\n var b = new Date(G[a + 20 >>> 2 >>> 0] + 1900, G[a + 16 >>> 2 >>> 0], G[a + 12 >>> 2 >>> 0], G[a + 8 >>> 2 >>> 0], G[a + 4 >>> 2 >>> 0], G[a >>> 2 >>> 0], 0), c = G[a + 32 >>> 2 >>> 0], d = b.getTimezoneOffset(), e = new Date(b.getFullYear(), 6, 1).getTimezoneOffset(), f = new Date(b.getFullYear(), 0, 1).getTimezoneOffset(), h = Math.min(f, e);\n 0 > c ? G[a + 32 >>> 2 >>> 0] = Number(e != f && h == d) : 0 < c != (h == d) && (e = Math.max(f, e), b.setTime(b.getTime() + 6e4 * ((0 < c ? h : e) - d)));\n G[a + 24 >>> 2 >>> 0] = b.getDay();\n G[a + 28 >>> 2 >>> 0] = (W(b.getFullYear()) ? Xb : Yb)[b.getMonth()] + b.getDate() - 1 | 0;\n G[a >>> 2 >>> 0] = b.getSeconds();\n G[a + 4 >>> 2 >>> 0] = b.getMinutes();\n G[a + 8 >>> 2 >>> 0] = b.getHours();\n G[a + 12 >>> 2 >>> 0] = b.getDate();\n G[a + 16 >>> 2 >>> 0] = b.getMonth();\n G[a + 20 >>> 2 >>> 0] = b.getYear();\n a = b.getTime();\n return BigInt(isNaN(a) ? -1 : a / 1e3);\n },\n Pc: function() {\n return -52;\n },\n Qc: function() {\n },\n Ic: function(a, b, c, d) {\n c >>>= 0;\n d >>>= 0;\n var e = (/* @__PURE__ */ new Date()).getFullYear(), f = new Date(e, 0, 1), h = new Date(e, 6, 1);\n e = f.getTimezoneOffset();\n var k = h.getTimezoneOffset();\n I[a >>> 0 >>> 2 >>> 0] = 60 * Math.max(e, k);\n G[b >>> 0 >>> 2 >>> 0] = Number(e != k);\n a = (l) => l.toLocaleTimeString(void 0, { hour12: false, timeZoneName: "short" }).split(" ")[1];\n f = a(f);\n h = a(h);\n k < e ? (M(f, E, c, 17), M(h, E, d, 17)) : (M(f, E, d, 17), M(h, E, c, 17));\n },\n bb: () => {\n Ga("");\n },\n A: function(a, b, c) {\n a >>>= 0;\n b = $b(b >>> 0, c >>> 0);\n return Oa[a](...b);\n },\n bc: function(a, b, c) {\n a >>>= 0;\n b = $b(b >>> 0, c >>> 0);\n return Oa[a](...b);\n },\n $b: () => Date.now(),\n Jc: function() {\n return 4294901760;\n },\n ga: () => performance.now(),\n Hc: function(a) {\n a >>>= 0;\n var b = E.length;\n if (4294901760 < a)\n return false;\n for (var c = 1; 4 >= c; c *= 2) {\n var d = b * (1 + 0.2 / c);\n d = Math.min(d, a + 100663296);\n var e = Math;\n d = Math.max(a, d);\n a: {\n e = (e.min.call(e, 4294901760, d + (65536 - d % 65536) % 65536) - ra.buffer.byteLength + 65535) / 65536;\n try {\n ra.grow(e);\n za();\n var f = 1;\n break a;\n } catch (h) {\n }\n f = void 0;\n }\n if (f)\n return true;\n }\n return false;\n },\n Wc: function(a, b) {\n a >>>= 0;\n b >>>= 0;\n var c = 0;\n cc().forEach((d, e) => {\n var f = b + c;\n e = I[a + 4 * e >>> 2 >>> 0] = f;\n for (f = 0; f < d.length; ++f)\n z[e++ >>> 0] = d.charCodeAt(f);\n z[e >>> 0] = 0;\n c += d.length + 1;\n });\n return 0;\n },\n Xc: function(a, b) {\n a >>>= 0;\n b >>>= 0;\n var c = cc();\n I[a >>> 2 >>> 0] = c.length;\n var d = 0;\n c.forEach((e) => d += e.length + 1);\n I[b >>> 2 >>> 0] = d;\n return 0;\n },\n Db: () => 52,\n Zb: function() {\n return 52;\n },\n Vc: function() {\n return 70;\n },\n Yb: function(a, b, c, d) {\n b >>>= 0;\n c >>>= 0;\n d >>>= 0;\n for (var e = 0, f = 0; f < c; f++) {\n var h = I[b >>> 2 >>> 0], k = I[b + 4 >>> 2 >>> 0];\n b += 8;\n for (var l = 0; l < k; l++) {\n var m = E[h + l >>> 0], n = dc[a];\n 0 === m || 10 === m ? ((1 === a ? qa : w)(ab(n, 0)), n.length = 0) : n.push(m);\n }\n e += k;\n }\n I[d >>> 2 >>> 0] = e;\n return 0;\n },\n xb: nc,\n cd: oc,\n ua: pc,\n W: qc,\n $: rc,\n ra: sc,\n ta: tc,\n dd: uc,\n ob: vc,\n P: wc,\n z: xc,\n b: yc,\n Ub: zc,\n ya: Ac,\n e: Bc,\n kb: Cc,\n h: Dc,\n X: Ec,\n i: Fc,\n ed: Gc,\n j: Hc,\n t: Ic,\n r: Jc,\n o: Kc,\n Wa: Lc,\n Ca: Mc,\n ma: Nc,\n Qb: Oc,\n db: Pc,\n Ib: Qc,\n mb: Rc,\n kc: Sc,\n xc: Tc,\n hc: Uc,\n ic: Vc,\n ac: Wc,\n oa: Xc,\n yb: Yc,\n Ba: Zc,\n Eb: $c,\n ea: ad,\n jc: bd,\n Ta: cd,\n F: dd,\n G: ed,\n Gb: fd,\n jd: gd,\n qa: hd,\n O: jd,\n V: kd,\n T: ld,\n y: md,\n Fb: nd,\n gc: od,\n D: pd,\n Hb: qd,\n id: rd,\n Ua: sd,\n wa: td,\n lc: ud,\n cc: vd,\n Nb: wd,\n aa: xd,\n I: yd,\n C: zd,\n _a: Ad,\n fc: Bd,\n Q: Cd,\n d: Dd,\n ab: Ed,\n n: Fd,\n Ya: Gd,\n va: Hd,\n wb: Id,\n f: Jd,\n yc: Kd,\n da: Ld,\n gb: Md,\n Da: Nd,\n lb: Od,\n hb: Pd,\n c: Qd,\n vc: Rd,\n od: Sd,\n k: Td,\n tc: Ud,\n l: Vd,\n wc: Wd,\n sc: Xd,\n rd: Yd,\n p: Zd,\n Ra: $d,\n tb: ae,\n Qa: be,\n Kb: ce,\n B: de,\n K: ee,\n S: fe,\n $a: ge,\n pc: he,\n ub: ie,\n za: je,\n ka: ke,\n xa: le,\n Sb: me,\n La: ne,\n jb: oe,\n Ga: pe,\n nc: qe,\n Ha: re,\n Ia: se,\n fd: te,\n xd: ue,\n Z: ve,\n pa: we,\n pd: xe,\n wd: ye,\n Mb: ze,\n Ma: Ae,\n Ka: Be,\n Tb: Ce,\n rc: De,\n Ja: Ee,\n Na: Fe,\n pb: Ge,\n la: He,\n Ea: Ie,\n mc: Je,\n qc: Ke,\n Jb: Le,\n Fa: Me,\n ja: Ne,\n Ad: Oe,\n nd: Pe,\n R: Qe,\n eb: Re,\n Za: Se,\n ec: Te,\n ib: Ue,\n E: Ve,\n M: We,\n Va: Xe,\n ld: Ye,\n ca: Ze,\n nb: $e,\n na: af,\n dc: bf,\n Ac: cf,\n u: df,\n L: ef,\n td: ff,\n Pb: gf,\n oc: hf,\n Bd: jf,\n Ob: kf,\n Lb: lf,\n cb: mf,\n zc: nf,\n Rb: of,\n Oa: pf,\n Y: qf,\n uc: rf,\n J: sf,\n gd: tf,\n vb: uf,\n sa: vf,\n H: wf,\n rb: xf,\n Pa: yf,\n Sa: zf,\n sb: Af,\n qb: Bf,\n w: function(a) {\n return a >>> 0;\n },\n Dc: hc,\n fa: function(a, b, c, d) {\n return hc(a >>> 0, b >>> 0, c >>> 0, d >>> 0);\n }\n }, U = function() {\n function a(c) {\n U = c.exports;\n U = zb();\n U = Df();\n ra = U.Dd;\n za();\n Ba.unshift(U.Ed);\n Da--;\n 0 == Da && (null !== Ea && (clearInterval(Ea), Ea = null), Fa && (c = Fa, Fa = null, c()));\n return U;\n }\n var b = { a: Cf };\n Da++;\n if (g.instantiateWasm)\n try {\n return g.instantiateWasm(b, a);\n } catch (c) {\n w(`Module.instantiateWasm callback failed with error: ${c}`), ba(c);\n }\n Na(b, function(c) {\n a(c.instance);\n }).catch(ba);\n return {};\n }(), ub = (a) => (ub = U.Fd)(a);\n g._OrtInit = (a, b) => (g._OrtInit = U.Gd)(a, b);\n g._OrtGetLastError = (a, b) => (g._OrtGetLastError = U.Hd)(a, b);\n g._OrtCreateSessionOptions = (a, b, c, d, e, f, h, k, l, m) => (g._OrtCreateSessionOptions = U.Id)(a, b, c, d, e, f, h, k, l, m);\n g._OrtAppendExecutionProvider = (a, b) => (g._OrtAppendExecutionProvider = U.Jd)(a, b);\n g._OrtAddFreeDimensionOverride = (a, b, c) => (g._OrtAddFreeDimensionOverride = U.Kd)(a, b, c);\n g._OrtAddSessionConfigEntry = (a, b, c) => (g._OrtAddSessionConfigEntry = U.Ld)(a, b, c);\n g._OrtReleaseSessionOptions = (a) => (g._OrtReleaseSessionOptions = U.Md)(a);\n g._OrtCreateSession = (a, b, c) => (g._OrtCreateSession = U.Nd)(a, b, c);\n g._OrtReleaseSession = (a) => (g._OrtReleaseSession = U.Od)(a);\n g._OrtGetInputOutputCount = (a, b, c) => (g._OrtGetInputOutputCount = U.Pd)(a, b, c);\n g._OrtGetInputName = (a, b) => (g._OrtGetInputName = U.Qd)(a, b);\n g._OrtGetOutputName = (a, b) => (g._OrtGetOutputName = U.Rd)(a, b);\n g._OrtFree = (a) => (g._OrtFree = U.Sd)(a);\n g._OrtCreateTensor = (a, b, c, d, e, f) => (g._OrtCreateTensor = U.Td)(a, b, c, d, e, f);\n g._OrtGetTensorData = (a, b, c, d, e) => (g._OrtGetTensorData = U.Ud)(a, b, c, d, e);\n g._OrtReleaseTensor = (a) => (g._OrtReleaseTensor = U.Vd)(a);\n g._OrtCreateRunOptions = (a, b, c, d) => (g._OrtCreateRunOptions = U.Wd)(a, b, c, d);\n g._OrtAddRunConfigEntry = (a, b, c) => (g._OrtAddRunConfigEntry = U.Xd)(a, b, c);\n g._OrtReleaseRunOptions = (a) => (g._OrtReleaseRunOptions = U.Yd)(a);\n g._OrtCreateBinding = (a) => (g._OrtCreateBinding = U.Zd)(a);\n g._OrtBindInput = (a, b, c) => (g._OrtBindInput = U._d)(a, b, c);\n g._OrtBindOutput = (a, b, c, d) => (g._OrtBindOutput = U.$d)(a, b, c, d);\n g._OrtClearBoundOutputs = (a) => (g._OrtClearBoundOutputs = U.ae)(a);\n g._OrtReleaseBinding = (a) => (g._OrtReleaseBinding = U.be)(a);\n g._OrtRunWithBinding = (a, b, c, d, e) => (g._OrtRunWithBinding = U.ce)(a, b, c, d, e);\n g._OrtRun = (a, b, c, d, e, f, h, k) => (g._OrtRun = U.de)(a, b, c, d, e, f, h, k);\n g._OrtEndProfiling = (a) => (g._OrtEndProfiling = U.ee)(a);\n g._JsepOutput = (a, b, c) => (g._JsepOutput = U.fe)(a, b, c);\n g._JsepGetNodeName = (a) => (g._JsepGetNodeName = U.ge)(a);\n var Jb = g._malloc = (a) => (Jb = g._malloc = U.he)(a), T = g._free = (a) => (T = g._free = U.ie)(a), X = (a, b) => (X = U.ke)(a, b), Ta = (a) => (Ta = U.le)(a), Y = (a) => (Y = U.me)(a), Ef = (a) => (Ef = U.ne)(a), Z = () => (Z = U.oe)(), mc = (a) => (mc = U.pe)(a), kc = (a) => (kc = U.qe)(a), Ua = (a, b, c) => (Ua = U.re)(a, b, c), lc = (a) => (lc = U.se)(a), dynCall_vii = g.dynCall_vii = (a, b, c) => (dynCall_vii = g.dynCall_vii = U.te)(a, b, c), Ff = g.dynCall_iiii = (a, b, c, d) => (Ff = g.dynCall_iiii = U.ue)(a, b, c, d), dynCall_iii = g.dynCall_iii = (a, b, c) => (dynCall_iii = g.dynCall_iii = U.ve)(a, b, c), Gf = g.dynCall_ii = (a, b) => (Gf = g.dynCall_ii = U.we)(a, b), Hf = g.dynCall_iiiiiii = (a, b, c, d, e, f, h) => (Hf = g.dynCall_iiiiiii = U.xe)(a, b, c, d, e, f, h), dynCall_vi = g.dynCall_vi = (a, b) => (dynCall_vi = g.dynCall_vi = U.ye)(a, b), dynCall_v = g.dynCall_v = (a) => (dynCall_v = g.dynCall_v = U.ze)(a), If = g.dynCall_iiiiii = (a, b, c, d, e, f) => (If = g.dynCall_iiiiii = U.Ae)(a, b, c, d, e, f), Jf = g.dynCall_iiij = (a, b, c, d) => (Jf = g.dynCall_iiij = U.Be)(a, b, c, d), Kf = g.dynCall_iiiii = (a, b, c, d, e) => (Kf = g.dynCall_iiiii = U.Ce)(a, b, c, d, e), Lf = g.dynCall_viii = (a, b, c, d) => (Lf = g.dynCall_viii = U.De)(a, b, c, d), Mf = g.dynCall_viiiii = (a, b, c, d, e, f) => (Mf = g.dynCall_viiiii = U.Ee)(a, b, c, d, e, f), Nf = g.dynCall_viiii = (a, b, c, d, e) => (Nf = g.dynCall_viiii = U.Fe)(a, b, c, d, e), Of = g.dynCall_viiiiii = (a, b, c, d, e, f, h) => (Of = g.dynCall_viiiiii = U.Ge)(a, b, c, d, e, f, h), Pf = g.dynCall_viiji = (a, b, c, d, e) => (Pf = g.dynCall_viiji = U.He)(a, b, c, d, e), Qf = g.dynCall_viiiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q) => (Qf = g.dynCall_viiiiiiiiiii = U.Ie)(a, b, c, d, e, f, h, k, l, m, n, q), Rf = g.dynCall_viiijjjii = (a, b, c, d, e, f, h, k, l) => (Rf = g.dynCall_viiijjjii = U.Je)(a, b, c, d, e, f, h, k, l), Sf = g.dynCall_iid = (a, b, c) => (Sf = g.dynCall_iid = U.Ke)(a, b, c), Tf = g.dynCall_iif = (a, b, c) => (Tf = g.dynCall_iif = U.Le)(a, b, c), Uf = g.dynCall_iij = (a, b, c) => (Uf = g.dynCall_iij = U.Me)(a, b, c), Vf = g.dynCall_jii = (a, b, c) => (Vf = g.dynCall_jii = U.Ne)(a, b, c), Wf = g.dynCall_i = (a) => (Wf = g.dynCall_i = U.Oe)(a), Xf = g.dynCall_viiiiiiii = (a, b, c, d, e, f, h, k, l) => (Xf = g.dynCall_viiiiiiii = U.Pe)(a, b, c, d, e, f, h, k, l), Yf = g.dynCall_viiiiij = (a, b, c, d, e, f, h) => (Yf = g.dynCall_viiiiij = U.Qe)(a, b, c, d, e, f, h), Zf = g.dynCall_ji = (a, b) => (Zf = g.dynCall_ji = U.Re)(a, b), $f = g.dynCall_viij = (a, b, c, d) => ($f = g.dynCall_viij = U.Se)(a, b, c, d), ag = g.dynCall_iiiiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q) => (ag = g.dynCall_iiiiiiiiiiii = U.Te)(a, b, c, d, e, f, h, k, l, m, n, q), bg = g.dynCall_viiiiiiiii = (a, b, c, d, e, f, h, k, l, m) => (bg = g.dynCall_viiiiiiiii = U.Ue)(a, b, c, d, e, f, h, k, l, m), cg = g.dynCall_ij = (a, b) => (cg = g.dynCall_ij = U.Ve)(a, b), dg = g.dynCall_iiiiij = (a, b, c, d, e, f) => (dg = g.dynCall_iiiiij = U.We)(a, b, c, d, e, f), eg = g.dynCall_j = (a) => (eg = g.dynCall_j = U.Xe)(a), fg = g.dynCall_vij = (a, b, c) => (fg = g.dynCall_vij = U.Ye)(a, b, c), gg = g.dynCall_viijjjiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q) => (gg = g.dynCall_viijjjiiiiii = U.Ze)(a, b, c, d, e, f, h, k, l, m, n, q), hg = g.dynCall_viiijiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q) => (hg = g.dynCall_viiijiiiiiii = U._e)(a, b, c, d, e, f, h, k, l, m, n, q), ig = g.dynCall_iiiiiiii = (a, b, c, d, e, f, h, k) => (ig = g.dynCall_iiiiiiii = U.$e)(a, b, c, d, e, f, h, k), jg = g.dynCall_viiiiiii = (a, b, c, d, e, f, h, k) => (jg = g.dynCall_viiiiiii = U.af)(a, b, c, d, e, f, h, k), kg = g.dynCall_iiiiiiiij = (a, b, c, d, e, f, h, k, l) => (kg = g.dynCall_iiiiiiiij = U.bf)(a, b, c, d, e, f, h, k, l), lg = g.dynCall_viiiiiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p) => (lg = g.dynCall_viiiiiiiiiiiii = U.cf)(a, b, c, d, e, f, h, k, l, m, n, q, r, p), mg = g.dynCall_iiiiiiiii = (a, b, c, d, e, f, h, k, l) => (mg = g.dynCall_iiiiiiiii = U.df)(a, b, c, d, e, f, h, k, l), ng = g.dynCall_iiiiijiiiii = (a, b, c, d, e, f, h, k, l, m, n) => (ng = g.dynCall_iiiiijiiiii = U.ef)(a, b, c, d, e, f, h, k, l, m, n), og = g.dynCall_vijjjiiij = (a, b, c, d, e, f, h, k, l) => (og = g.dynCall_vijjjiiij = U.ff)(a, b, c, d, e, f, h, k, l), pg = g.dynCall_fi = (a, b) => (pg = g.dynCall_fi = U.gf)(a, b), qg = g.dynCall_fii = (a, b, c) => (qg = g.dynCall_fii = U.hf)(a, b, c), rg = g.dynCall_di = (a, b) => (rg = g.dynCall_di = U.jf)(a, b), sg = g.dynCall_dii = (a, b, c) => (sg = g.dynCall_dii = U.kf)(a, b, c), tg = g.dynCall_vijj = (a, b, c, d) => (tg = g.dynCall_vijj = U.lf)(a, b, c, d), ug = g.dynCall_iiiiiiiiii = (a, b, c, d, e, f, h, k, l, m) => (ug = g.dynCall_iiiiiiiiii = U.mf)(a, b, c, d, e, f, h, k, l, m), vg = g.dynCall_viijiii = (a, b, c, d, e, f, h) => (vg = g.dynCall_viijiii = U.nf)(a, b, c, d, e, f, h), wg = g.dynCall_viid = (a, b, c, d) => (wg = g.dynCall_viid = U.of)(a, b, c, d), xg = g.dynCall_viffiii = (a, b, c, d, e, f, h) => (xg = g.dynCall_viffiii = U.pf)(a, b, c, d, e, f, h), yg = g.dynCall_viifiii = (a, b, c, d, e, f, h) => (yg = g.dynCall_viifiii = U.qf)(a, b, c, d, e, f, h), zg = g.dynCall_viiiiidiidi = (a, b, c, d, e, f, h, k, l, m, n) => (zg = g.dynCall_viiiiidiidi = U.rf)(a, b, c, d, e, f, h, k, l, m, n), Ag = g.dynCall_viiiiiiiiidi = (a, b, c, d, e, f, h, k, l, m, n, q) => (Ag = g.dynCall_viiiiiiiiidi = U.sf)(a, b, c, d, e, f, h, k, l, m, n, q), Bg = g.dynCall_jiii = (a, b, c, d) => (Bg = g.dynCall_jiii = U.tf)(a, b, c, d), Cg = g.dynCall_vjiiiiii = (a, b, c, d, e, f, h, k) => (Cg = g.dynCall_vjiiiiii = U.uf)(a, b, c, d, e, f, h, k), Dg = g.dynCall_viiid = (a, b, c, d, e) => (Dg = g.dynCall_viiid = U.vf)(a, b, c, d, e), Eg = g.dynCall_viiiiiiiiiji = (a, b, c, d, e, f, h, k, l, m, n, q) => (Eg = g.dynCall_viiiiiiiiiji = U.wf)(a, b, c, d, e, f, h, k, l, m, n, q), Fg = g.dynCall_viji = (a, b, c, d) => (Fg = g.dynCall_viji = U.xf)(a, b, c, d), Gg = g.dynCall_vijjjjjjjjjjjjji = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y) => (Gg = g.dynCall_vijjjjjjjjjjjjji = U.yf)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y), Hg = g.dynCall_viiiji = (a, b, c, d, e, f) => (Hg = g.dynCall_viiiji = U.zf)(a, b, c, d, e, f), Ig = g.dynCall_vijjjiiji = (a, b, c, d, e, f, h, k, l) => (Ig = g.dynCall_vijjjiiji = U.Af)(a, b, c, d, e, f, h, k, l), Jg = g.dynCall_iiiji = (a, b, c, d, e) => (Jg = g.dynCall_iiiji = U.Bf)(a, b, c, d, e), Kg = g.dynCall_iiijiiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p) => (Kg = g.dynCall_iiijiiiiiiiiii = U.Cf)(a, b, c, d, e, f, h, k, l, m, n, q, r, p), Lg = g.dynCall_vj = (a, b) => (Lg = g.dynCall_vj = U.Df)(a, b), Mg = g.dynCall_jjj = (a, b, c) => (Mg = g.dynCall_jjj = U.Ef)(a, b, c), Ng = g.dynCall_iiijiiiiii = (a, b, c, d, e, f, h, k, l, m) => (Ng = g.dynCall_iiijiiiiii = U.Ff)(a, b, c, d, e, f, h, k, l, m), Og = g.dynCall_vfiii = (a, b, c, d, e) => (Og = g.dynCall_vfiii = U.Gf)(a, b, c, d, e), Pg = g.dynCall_viiiiff = (a, b, c, d, e, f, h) => (Pg = g.dynCall_viiiiff = U.Hf)(a, b, c, d, e, f, h), Qg = g.dynCall_viiiiiff = (a, b, c, d, e, f, h, k) => (Qg = g.dynCall_viiiiiff = U.If)(a, b, c, d, e, f, h, k), Rg = g.dynCall_viiff = (a, b, c, d, e) => (Rg = g.dynCall_viiff = U.Jf)(a, b, c, d, e), Sg = g.dynCall_viiiiiiiiifiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p) => (Sg = g.dynCall_viiiiiiiiifiii = U.Kf)(a, b, c, d, e, f, h, k, l, m, n, q, r, p), Tg = g.dynCall_viiiiiiiijj = (a, b, c, d, e, f, h, k, l, m, n) => (Tg = g.dynCall_viiiiiiiijj = U.Lf)(a, b, c, d, e, f, h, k, l, m, n), Ug = g.dynCall_iiiiiiiiiiiiiifii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A) => (Ug = g.dynCall_iiiiiiiiiiiiiifii = U.Mf)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A), Vg = g.dynCall_viiiiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r) => (Vg = g.dynCall_viiiiiiiiiiii = U.Nf)(a, b, c, d, e, f, h, k, l, m, n, q, r), Wg = g.dynCall_iiiiiiiiiiiiiiiiifii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D) => (Wg = g.dynCall_iiiiiiiiiiiiiiiiifii = U.Of)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D), Xg = g.dynCall_vijjiiiiii = (a, b, c, d, e, f, h, k, l, m) => (Xg = g.dynCall_vijjiiiiii = U.Pf)(a, b, c, d, e, f, h, k, l, m), Yg = g.dynCall_iiiijjj = (a, b, c, d, e, f, h) => (Yg = g.dynCall_iiiijjj = U.Qf)(a, b, c, d, e, f, h), Zg = g.dynCall_viiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n) => (Zg = g.dynCall_viiiiiiiiii = U.Rf)(a, b, c, d, e, f, h, k, l, m, n), $g = g.dynCall_iiijjj = (a, b, c, d, e, f) => ($g = g.dynCall_iiijjj = U.Sf)(a, b, c, d, e, f), ah = g.dynCall_fffffff = (a, b, c, d, e, f, h) => (ah = g.dynCall_fffffff = U.Tf)(a, b, c, d, e, f, h), bh = g.dynCall_viiiij = (a, b, c, d, e, f) => (bh = g.dynCall_viiiij = U.Uf)(a, b, c, d, e, f), ch = g.dynCall_viiiiiijiifiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p) => (ch = g.dynCall_viiiiiijiifiii = U.Vf)(a, b, c, d, e, f, h, k, l, m, n, q, r, p), dh = g.dynCall_vjjjjjjffjifiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B) => (dh = g.dynCall_vjjjjjjffjifiiiiii = U.Wf)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B), eh = g.dynCall_viiiiiiffjifiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A) => (eh = g.dynCall_viiiiiiffjifiiiii = U.Xf)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A), fh = g.dynCall_viiiiiiffjfiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y) => (fh = g.dynCall_viiiiiiffjfiiiii = U.Yf)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y), gh = g.dynCall_viiiiiiffjiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u) => (gh = g.dynCall_viiiiiiffjiiiii = U.Zf)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u), hh = g.dynCall_vjjjjjjjjfffiiifiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F) => (hh = g.dynCall_vjjjjjjjjfffiiifiiiii = U._f)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F), ih = g.dynCall_vjjjjjjfffifijiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C) => (ih = g.dynCall_vjjjjjjfffifijiiiii = U.$f)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C), jh = g.dynCall_vjjjjjjfffifiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B) => (jh = g.dynCall_vjjjjjjfffifiiiiii = U.ag)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B), kh = g.dynCall_vjjjjjjjjfffjifiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F) => (kh = g.dynCall_vjjjjjjjjfffjifiiiiii = U.bg)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F), lh = g.dynCall_vijiiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r) => (lh = g.dynCall_vijiiiiiiiiii = U.cg)(a, b, c, d, e, f, h, k, l, m, n, q, r), mh = g.dynCall_vijjfffiii = (a, b, c, d, e, f, h, k, l, m) => (mh = g.dynCall_vijjfffiii = U.dg)(a, b, c, d, e, f, h, k, l, m), nh = g.dynCall_viiiiiiijiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r) => (nh = g.dynCall_viiiiiiijiiii = U.eg)(a, b, c, d, e, f, h, k, l, m, n, q, r), oh = g.dynCall_vijjjjjjifiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u) => (oh = g.dynCall_vijjjjjjifiiiii = U.fg)(\n a,\n b,\n c,\n d,\n e,\n f,\n h,\n k,\n l,\n m,\n n,\n q,\n r,\n p,\n u\n ), ph = g.dynCall_viifi = (a, b, c, d, e) => (ph = g.dynCall_viifi = U.gg)(a, b, c, d, e), qh = g.dynCall_vjjjjjiiii = (a, b, c, d, e, f, h, k, l, m) => (qh = g.dynCall_vjjjjjiiii = U.hg)(a, b, c, d, e, f, h, k, l, m), rh = g.dynCall_vjjjjfiii = (a, b, c, d, e, f, h, k, l) => (rh = g.dynCall_vjjjjfiii = U.ig)(a, b, c, d, e, f, h, k, l), sh = g.dynCall_viiiiiijiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p) => (sh = g.dynCall_viiiiiijiiiiii = U.jg)(a, b, c, d, e, f, h, k, l, m, n, q, r, p), th = g.dynCall_vijjii = (a, b, c, d, e, f) => (th = g.dynCall_vijjii = U.kg)(a, b, c, d, e, f), uh = g.dynCall_viiiiijjiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r) => (uh = g.dynCall_viiiiijjiiiii = U.lg)(a, b, c, d, e, f, h, k, l, m, n, q, r), vh = g.dynCall_iiiiiji = (a, b, c, d, e, f, h) => (vh = g.dynCall_iiiiiji = U.mg)(a, b, c, d, e, f, h), wh = g.dynCall_iiiiji = (a, b, c, d, e, f) => (wh = g.dynCall_iiiiji = U.ng)(a, b, c, d, e, f), xh = g.dynCall_viiiiijiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r) => (xh = g.dynCall_viiiiijiiiiii = U.og)(a, b, c, d, e, f, h, k, l, m, n, q, r), yh = g.dynCall_viiijiiiiii = (a, b, c, d, e, f, h, k, l, m, n) => (yh = g.dynCall_viiijiiiiii = U.pg)(a, b, c, d, e, f, h, k, l, m, n), zh = g.dynCall_viijj = (a, b, c, d, e) => (zh = g.dynCall_viijj = U.qg)(a, b, c, d, e), Ah = g.dynCall_viiiijii = (a, b, c, d, e, f, h, k) => (Ah = g.dynCall_viiiijii = U.rg)(a, b, c, d, e, f, h, k), Bh = g.dynCall_viijjiii = (a, b, c, d, e, f, h, k) => (Bh = g.dynCall_viijjiii = U.sg)(a, b, c, d, e, f, h, k), Ch = g.dynCall_ijii = (a, b, c, d) => (Ch = g.dynCall_ijii = U.tg)(a, b, c, d), Dh = g.dynCall_viiiiijjji = (a, b, c, d, e, f, h, k, l, m) => (Dh = g.dynCall_viiiiijjji = U.ug)(a, b, c, d, e, f, h, k, l, m), Eh = g.dynCall_vijjjjiij = (a, b, c, d, e, f, h, k, l) => (Eh = g.dynCall_vijjjjiij = U.vg)(a, b, c, d, e, f, h, k, l), Fh = g.dynCall_viiiiijij = (a, b, c, d, e, f, h, k, l) => (Fh = g.dynCall_viiiiijij = U.wg)(a, b, c, d, e, f, h, k, l), Gh = g.dynCall_viiiiiijij = (a, b, c, d, e, f, h, k, l, m) => (Gh = g.dynCall_viiiiiijij = U.xg)(a, b, c, d, e, f, h, k, l, m), Hh = g.dynCall_vijiii = (a, b, c, d, e, f) => (Hh = g.dynCall_vijiii = U.yg)(a, b, c, d, e, f), Ih = g.dynCall_viiiiiiiiifi = (a, b, c, d, e, f, h, k, l, m, n, q) => (Ih = g.dynCall_viiiiiiiiifi = U.zg)(a, b, c, d, e, f, h, k, l, m, n, q), Jh = g.dynCall_iiijiiii = (a, b, c, d, e, f, h, k) => (Jh = g.dynCall_iiijiiii = U.Ag)(a, b, c, d, e, f, h, k), Kh = g.dynCall_viiiiiijjiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p) => (Kh = g.dynCall_viiiiiijjiiiii = U.Bg)(\n a,\n b,\n c,\n d,\n e,\n f,\n h,\n k,\n l,\n m,\n n,\n q,\n r,\n p\n ), Lh = g.dynCall_viiiiiiijiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u) => (Lh = g.dynCall_viiiiiiijiiiiii = U.Cg)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u), Mh = g.dynCall_vif = (a, b, c) => (Mh = g.dynCall_vif = U.Dg)(a, b, c), Nh = g.dynCall_viif = (a, b, c, d) => (Nh = g.dynCall_viif = U.Eg)(a, b, c, d), Oh = g.dynCall_fiii = (a, b, c, d) => (Oh = g.dynCall_fiii = U.Fg)(a, b, c, d), Ph = g.dynCall_diii = (a, b, c, d) => (Ph = g.dynCall_diii = U.Gg)(a, b, c, d), Qh = g.dynCall_viiiiiifii = (a, b, c, d, e, f, h, k, l, m) => (Qh = g.dynCall_viiiiiifii = U.Hg)(a, b, c, d, e, f, h, k, l, m), Rh = g.dynCall_viiiiijiiiiiiiiiiiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F, H, K, Va, Wa, Xa) => (Rh = g.dynCall_viiiiijiiiiiiiiiiiiiiiiiii = U.Ig)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F, H, K, Va, Wa, Xa), Sh = g.dynCall_viijji = (a, b, c, d, e, f) => (Sh = g.dynCall_viijji = U.Jg)(a, b, c, d, e, f), Th = g.dynCall_iiiiiiiiiiiji = (a, b, c, d, e, f, h, k, l, m, n, q, r) => (Th = g.dynCall_iiiiiiiiiiiji = U.Kg)(a, b, c, d, e, f, h, k, l, m, n, q, r), Uh = g.dynCall_viifiifijjjii = (a, b, c, d, e, f, h, k, l, m, n, q, r) => (Uh = g.dynCall_viifiifijjjii = U.Lg)(a, b, c, d, e, f, h, k, l, m, n, q, r), Vh = g.dynCall_viiiiiiiiiiiiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F) => (Vh = g.dynCall_viiiiiiiiiiiiiiiiiiii = U.Mg)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F), Wh = g.dynCall_viiiiifiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r) => (Wh = g.dynCall_viiiiifiiiiii = U.Ng)(a, b, c, d, e, f, h, k, l, m, n, q, r), Xh = g.dynCall_vijiiiiiiijjii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p) => (Xh = g.dynCall_vijiiiiiiijjii = U.Og)(a, b, c, d, e, f, h, k, l, m, n, q, r, p), Yh = g.dynCall_viiiiiiiiiiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C) => (Yh = g.dynCall_viiiiiiiiiiiiiiiiii = U.Pg)(\n a,\n b,\n c,\n d,\n e,\n f,\n h,\n k,\n l,\n m,\n n,\n q,\n r,\n p,\n u,\n y,\n A,\n B,\n C\n ), Zh = g.dynCall_viiiiiiiiiiiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D) => (Zh = g.dynCall_viiiiiiiiiiiiiiiiiii = U.Qg)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D), $h = g.dynCall_viiiiiiiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y) => ($h = g.dynCall_viiiiiiiiiiiiiii = U.Rg)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y), ai = g.dynCall_viiiiiiiiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A) => (ai = g.dynCall_viiiiiiiiiiiiiiii = U.Sg)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A), bi = g.dynCall_viiiijjj = (a, b, c, d, e, f, h, k) => (bi = g.dynCall_viiiijjj = U.Tg)(a, b, c, d, e, f, h, k), ci = g.dynCall_iiiiid = (a, b, c, d, e, f) => (ci = g.dynCall_iiiiid = U.Ug)(a, b, c, d, e, f), di = g.dynCall_viiiiiiijjj = (a, b, c, d, e, f, h, k, l, m, n) => (di = g.dynCall_viiiiiiijjj = U.Vg)(a, b, c, d, e, f, h, k, l, m, n), ei = g.dynCall_iiiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n) => (ei = g.dynCall_iiiiiiiiiii = U.Wg)(a, b, c, d, e, f, h, k, l, m, n), fi = g.dynCall_iiiiiiiiiiiiiiiiiifi = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D) => (fi = g.dynCall_iiiiiiiiiiiiiiiiiifi = U.Xg)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D), gi = g.dynCall_viiif = (a, b, c, d, e) => (gi = g.dynCall_viiif = U.Yg)(a, b, c, d, e), hi = g.dynCall_viiijiiiii = (a, b, c, d, e, f, h, k, l, m) => (hi = g.dynCall_viiijiiiii = U.Zg)(a, b, c, d, e, f, h, k, l, m), ii = g.dynCall_viiij = (a, b, c, d, e) => (ii = g.dynCall_viiij = U._g)(a, b, c, d, e), ji = g.dynCall_iijjj = (a, b, c, d, e) => (ji = g.dynCall_iijjj = U.$g)(a, b, c, d, e), ki = g.dynCall_viiiiji = (a, b, c, d, e, f, h) => (ki = g.dynCall_viiiiji = U.ah)(a, b, c, d, e, f, h), li = g.dynCall_iijjji = (a, b, c, d, e, f) => (li = g.dynCall_iijjji = U.bh)(a, b, c, d, e, f), mi = g.dynCall_ijijji = (a, b, c, d, e, f) => (mi = g.dynCall_ijijji = U.ch)(\n a,\n b,\n c,\n d,\n e,\n f\n ), ni = g.dynCall_viiijjiii = (a, b, c, d, e, f, h, k, l) => (ni = g.dynCall_viiijjiii = U.dh)(a, b, c, d, e, f, h, k, l), oi = g.dynCall_iiiiijji = (a, b, c, d, e, f, h, k) => (oi = g.dynCall_iiiiijji = U.eh)(a, b, c, d, e, f, h, k), pi = g.dynCall_iiiifi = (a, b, c, d, e, f) => (pi = g.dynCall_iiiifi = U.fh)(a, b, c, d, e, f), qi = g.dynCall_iiijii = (a, b, c, d, e, f) => (qi = g.dynCall_iiijii = U.gh)(a, b, c, d, e, f), ri = g.dynCall_iiiiiiiiijii = (a, b, c, d, e, f, h, k, l, m, n, q) => (ri = g.dynCall_iiiiiiiiijii = U.hh)(a, b, c, d, e, f, h, k, l, m, n, q), si = g.dynCall_iiiijjii = (a, b, c, d, e, f, h, k) => (si = g.dynCall_iiiijjii = U.ih)(a, b, c, d, e, f, h, k), ti = g.dynCall_iiiiiijjjii = (a, b, c, d, e, f, h, k, l, m, n) => (ti = g.dynCall_iiiiiijjjii = U.jh)(a, b, c, d, e, f, h, k, l, m, n), ui = g.dynCall_iiijiii = (a, b, c, d, e, f, h) => (ui = g.dynCall_iiijiii = U.kh)(a, b, c, d, e, f, h), vi = g.dynCall_iiiiiiiijjjfi = (a, b, c, d, e, f, h, k, l, m, n, q, r) => (vi = g.dynCall_iiiiiiiijjjfi = U.lh)(a, b, c, d, e, f, h, k, l, m, n, q, r), wi = g.dynCall_iijiiii = (a, b, c, d, e, f, h) => (wi = g.dynCall_iijiiii = U.mh)(a, b, c, d, e, f, h), xi = g.dynCall_iijjjii = (a, b, c, d, e, f, h) => (xi = g.dynCall_iijjjii = U.nh)(a, b, c, d, e, f, h), yi = g.dynCall_jij = (a, b, c) => (yi = g.dynCall_jij = U.oh)(a, b, c), zi = g.dynCall_iiji = (a, b, c, d) => (zi = g.dynCall_iiji = U.ph)(a, b, c, d), Ai = g.dynCall_iiif = (a, b, c, d) => (Ai = g.dynCall_iiif = U.qh)(a, b, c, d), Bi = g.dynCall_vidi = (a, b, c, d) => (Bi = g.dynCall_vidi = U.rh)(a, b, c, d), Ci = g.dynCall_vjiii = (a, b, c, d, e) => (Ci = g.dynCall_vjiii = U.sh)(a, b, c, d, e), Di = g.dynCall_diiii = (a, b, c, d, e) => (Di = g.dynCall_diiii = U.th)(a, b, c, d, e), Ei = g.dynCall_diiiii = (a, b, c, d, e, f) => (Ei = g.dynCall_diiiii = U.uh)(a, b, c, d, e, f), Fi = g.dynCall_viiijjiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q) => (Fi = g.dynCall_viiijjiiiiii = U.vh)(a, b, c, d, e, f, h, k, l, m, n, q), Gi = g.dynCall_viijjijjjjiii = (a, b, c, d, e, f, h, k, l, m, n, q, r) => (Gi = g.dynCall_viijjijjjjiii = U.wh)(a, b, c, d, e, f, h, k, l, m, n, q, r), Hi = g.dynCall_iiiij = (a, b, c, d, e) => (Hi = g.dynCall_iiiij = U.xh)(a, b, c, d, e), Ii = g.dynCall_viiijii = (a, b, c, d, e, f, h) => (Ii = g.dynCall_viiijii = U.yh)(a, b, c, d, e, f, h), Ji = g.dynCall_viijiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r) => (Ji = g.dynCall_viijiiiiiiiii = U.zh)(a, b, c, d, e, f, h, k, l, m, n, q, r), Ki = g.dynCall_fiiii = (a, b, c, d, e) => (Ki = g.dynCall_fiiii = U.Ah)(a, b, c, d, e), Li = g.dynCall_jfi = (a, b, c) => (Li = g.dynCall_jfi = U.Bh)(a, b, c), Mi = g.dynCall_viiiiiiiiiiiiii = (a, b, c, d, e, f, h, k, l, m, n, q, r, p, u) => (Mi = g.dynCall_viiiiiiiiiiiiii = U.Ch)(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u), Ni = g.dynCall_jiij = (a, b, c, d) => (Ni = g.dynCall_jiij = U.Dh)(a, b, c, d), Mb = (a) => (Mb = U.Eh)(a), Bb = () => (Bb = U.Fh)(), Lb = (a) => (Lb = U.Gh)(a), Nb = () => (Nb = U.Hh)();\n g.___start_em_js = 1275044;\n g.___stop_em_js = 1275205;\n function Dc(a, b, c, d) {\n var e = Z();\n try {\n return Ff(a, b, c, d);\n } catch (f) {\n Y(e);\n if (f !== f + 0)\n throw f;\n X(1, 0);\n }\n }\n function Bc(a, b, c) {\n var d = Z();\n try {\n return dynCall_iii(a, b, c);\n } catch (e) {\n Y(d);\n if (e !== e + 0)\n throw e;\n X(1, 0);\n }\n }\n function Jd(a, b, c) {\n var d = Z();\n try {\n dynCall_vii(a, b, c);\n } catch (e) {\n Y(d);\n if (e !== e + 0)\n throw e;\n X(1, 0);\n }\n }\n function yc(a, b) {\n var c = Z();\n try {\n return Gf(a, b);\n } catch (d) {\n Y(c);\n if (d !== d + 0)\n throw d;\n X(1, 0);\n }\n }\n function Fd(a, b) {\n var c = Z();\n try {\n dynCall_vi(a, b);\n } catch (d) {\n Y(c);\n if (d !== d + 0)\n throw d;\n X(1, 0);\n }\n }\n function dd(a, b, c, d) {\n var e = Z();\n try {\n return Jf(a, b, c, d);\n } catch (f) {\n Y(e);\n if (f !== f + 0)\n throw f;\n X(1, 0);\n }\n }\n function Dd(a) {\n var b = Z();\n try {\n dynCall_v(a);\n } catch (c) {\n Y(b);\n if (c !== c + 0)\n throw c;\n X(1, 0);\n }\n }\n function Ic(a, b, c, d, e, f, h) {\n var k = Z();\n try {\n return Hf(a, b, c, d, e, f, h);\n } catch (l) {\n Y(k);\n if (l !== l + 0)\n throw l;\n X(1, 0);\n }\n }\n function Hc(a, b, c, d, e, f) {\n var h = Z();\n try {\n return If(a, b, c, d, e, f);\n } catch (k) {\n Y(h);\n if (k !== k + 0)\n throw k;\n X(1, 0);\n }\n }\n function Fc(a, b, c, d, e) {\n var f = Z();\n try {\n return Kf(a, b, c, d, e);\n } catch (h) {\n Y(f);\n if (h !== h + 0)\n throw h;\n X(1, 0);\n }\n }\n function Qd(a, b, c, d) {\n var e = Z();\n try {\n Lf(a, b, c, d);\n } catch (f) {\n Y(e);\n if (f !== f + 0)\n throw f;\n X(1, 0);\n }\n }\n function Td(a, b, c, d, e) {\n var f = Z();\n try {\n Nf(a, b, c, d, e);\n } catch (h) {\n Y(f);\n if (h !== h + 0)\n throw h;\n X(1, 0);\n }\n }\n function Vd(a, b, c, d, e, f) {\n var h = Z();\n try {\n Mf(a, b, c, d, e, f);\n } catch (k) {\n Y(h);\n if (k !== k + 0)\n throw k;\n X(1, 0);\n }\n }\n function Zd(a, b, c, d, e, f, h) {\n var k = Z();\n try {\n Of(a, b, c, d, e, f, h);\n } catch (l) {\n Y(k);\n if (l !== l + 0)\n throw l;\n X(1, 0);\n }\n }\n function ke(a, b, c, d, e, f, h, k, l, m, n, q) {\n var r = Z();\n try {\n Qf(a, b, c, d, e, f, h, k, l, m, n, q);\n } catch (p) {\n Y(r);\n if (p !== p + 0)\n throw p;\n X(1, 0);\n }\n }\n function zc(a, b, c) {\n var d = Z();\n try {\n return Sf(a, b, c);\n } catch (e) {\n Y(d);\n if (e !== e + 0)\n throw e;\n X(1, 0);\n }\n }\n function Ac(a, b, c) {\n var d = Z();\n try {\n return Tf(a, b, c);\n } catch (e) {\n Y(d);\n if (e !== e + 0)\n throw e;\n X(1, 0);\n }\n }\n function md(a, b, c) {\n var d = Z();\n try {\n return Uf(a, b, c);\n } catch (e) {\n Y(d);\n if (e !== e + 0)\n throw e;\n X(1, 0);\n }\n }\n function yd(a, b, c) {\n var d = Z();\n try {\n return Vf(a, b, c);\n } catch (e) {\n Y(d);\n if (e !== e + 0)\n throw e;\n X(1, 0);\n return 0n;\n }\n }\n function ee(a, b, c, d, e, f, h, k, l) {\n var m = Z();\n try {\n Xf(a, b, c, d, e, f, h, k, l);\n } catch (n) {\n Y(m);\n if (n !== n + 0)\n throw n;\n X(1, 0);\n }\n }\n function xc(a) {\n var b = Z();\n try {\n return Wf(a);\n } catch (c) {\n Y(b);\n if (c !== c + 0)\n throw c;\n X(1, 0);\n }\n }\n function df(a, b, c) {\n var d = Z();\n try {\n fg(a, b, c);\n } catch (e) {\n Y(d);\n if (e !== e + 0)\n throw e;\n X(1, 0);\n }\n }\n function We(a, b, c, d, e) {\n var f = Z();\n try {\n Pf(a, b, c, d, e);\n } catch (h) {\n Y(f);\n if (h !== h + 0)\n throw h;\n X(1, 0);\n }\n }\n function Ce(a, b, c, d, e, f, h) {\n var k = Z();\n try {\n Yf(a, b, c, d, e, f, h);\n } catch (l) {\n Y(k);\n if (l !== l + 0)\n throw l;\n X(1, 0);\n }\n }\n function cf(a, b, c, d, e, f, h, k, l, m, n, q) {\n var r = Z();\n try {\n gg(a, b, c, d, e, f, h, k, l, m, n, q);\n } catch (p) {\n Y(r);\n if (p !== p + 0)\n throw p;\n X(1, 0);\n }\n }\n function Re(a, b, c, d, e, f, h, k, l, m, n, q) {\n var r = Z();\n try {\n hg(a, b, c, d, e, f, h, k, l, m, n, q);\n } catch (p) {\n Y(r);\n if (p !== p + 0)\n throw p;\n X(1, 0);\n }\n }\n function Jc(a, b, c, d, e, f, h, k) {\n var l = Z();\n try {\n return ig(a, b, c, d, e, f, h, k);\n } catch (m) {\n Y(l);\n if (m !== m + 0)\n throw m;\n X(1, 0);\n }\n }\n function de(a, b, c, d, e, f, h, k) {\n var l = Z();\n try {\n jg(a, b, c, d, e, f, h, k);\n } catch (m) {\n Y(l);\n if (m !== m + 0)\n throw m;\n X(1, 0);\n }\n }\n function Nc(a, b, c, d, e, f, h, k, l, m, n, q) {\n var r = Z();\n try {\n return ag(a, b, c, d, e, f, h, k, l, m, n, q);\n } catch (p) {\n Y(r);\n if (p !== p + 0)\n throw p;\n X(1, 0);\n }\n }\n function fe(a, b, c, d, e, f, h, k, l, m) {\n var n = Z();\n try {\n bg(a, b, c, d, e, f, h, k, l, m);\n } catch (q) {\n Y(n);\n if (q !== q + 0)\n throw q;\n X(1, 0);\n }\n }\n function me(a, b, c, d, e, f, h, k, l, m, n, q, r, p) {\n var u = Z();\n try {\n lg(a, b, c, d, e, f, h, k, l, m, n, q, r, p);\n } catch (y) {\n Y(u);\n if (y !== y + 0)\n throw y;\n X(1, 0);\n }\n }\n function Yc(a, b, c, d, e, f, h, k, l, m, n) {\n var q = Z();\n try {\n return ng(a, b, c, d, e, f, h, k, l, m, n);\n } catch (r) {\n Y(q);\n if (r !== r + 0)\n throw r;\n X(1, 0);\n }\n }\n function Kc(a, b, c, d, e, f, h, k, l) {\n var m = Z();\n try {\n return mg(a, b, c, d, e, f, h, k, l);\n } catch (n) {\n Y(m);\n if (n !== n + 0)\n throw n;\n X(1, 0);\n }\n }\n function nf(a, b, c, d, e, f, h, k, l) {\n var m = Z();\n try {\n og(a, b, c, d, e, f, h, k, l);\n } catch (n) {\n Y(m);\n if (n !== n + 0)\n throw n;\n X(1, 0);\n }\n }\n function Ve(a, b, c, d) {\n var e = Z();\n try {\n $f(a, b, c, d);\n } catch (f) {\n Y(e);\n if (f !== f + 0)\n throw f;\n X(1, 0);\n }\n }\n function tc(a, b) {\n var c = Z();\n try {\n return pg(a, b);\n } catch (d) {\n Y(c);\n if (d !== d + 0)\n throw d;\n X(1, 0);\n }\n }\n function xd(a, b) {\n var c = Z();\n try {\n return Zf(a, b);\n } catch (d) {\n Y(c);\n if (d !== d + 0)\n throw d;\n X(1, 0);\n return 0n;\n }\n }\n function nc(a, b) {\n var c = Z();\n try {\n return rg(a, b);\n } catch (d) {\n Y(c);\n if (d !== d + 0)\n throw d;\n X(1, 0);\n }\n }\n function jf(a, b, c, d) {\n var e = Z();\n try {\n tg(a, b, c, d);\n } catch (f) {\n Y(e);\n if (f !== f + 0)\n throw f;\n X(1, 0);\n }\n }\n function Oe(a, b, c, d, e, f, h) {\n var k = Z();\n try {\n Ii(a, b, c, d, e, f, h);\n } catch (l) {\n Y(k);\n if (l !== l + 0)\n throw l;\n X(1, 0);\n }\n }\n function Ze(a, b, c, d, e) {\n var f = Z();\n try {\n zh(a, b, c, d, e);\n } catch (h) {\n Y(f);\n if (h !== h + 0)\n throw h;\n X(1, 0);\n }\n }\n function Lc(a, b, c, d, e, f, h, k, l, m) {\n var n = Z();\n try {\n return ug(a, b, c, d, e, f, h, k, l, m);\n } catch (q) {\n Y(n);\n if (q !== q + 0)\n throw q;\n X(1, 0);\n }\n }\n function Xe(a, b, c, d, e, f, h) {\n var k = Z();\n try {\n vg(a, b, c, d, e, f, h);\n } catch (l) {\n Y(k);\n if (l !== l + 0)\n throw l;\n X(1, 0);\n }\n }\n function Kd(a, b, c, d) {\n var e = Z();\n try {\n wg(a, b, c, d);\n } catch (f) {\n Y(e);\n if (f !== f + 0)\n throw f;\n X(1, 0);\n }\n }\n function Tc(a, b, c, d, e, f, h, k, l) {\n var m = Z();\n try {\n return kg(a, b, c, d, e, f, h, k, l);\n } catch (n) {\n Y(m);\n if (n !== n + 0)\n throw n;\n X(1, 0);\n }\n }\n function Id(a, b, c, d, e, f, h) {\n var k = Z();\n try {\n xg(a, b, c, d, e, f, h);\n } catch (l) {\n Y(k);\n if (l !== l + 0)\n throw l;\n X(1, 0);\n }\n }\n function Ue(a, b, c, d, e, f, h, k, l) {\n var m = Z();\n try {\n Rf(a, b, c, d, e, f, h, k, l);\n } catch (n) {\n Y(m);\n if (n !== n + 0)\n throw n;\n X(1, 0);\n }\n }\n function Pd(a, b, c, d, e, f, h) {\n var k = Z();\n try {\n yg(a, b, c, d, e, f, h);\n } catch (l) {\n Y(k);\n if (l !== l + 0)\n throw l;\n X(1, 0);\n }\n }\n function Wd(a, b, c, d, e, f, h, k, l, m, n) {\n var q = Z();\n try {\n zg(a, b, c, d, e, f, h, k, l, m, n);\n } catch (r) {\n Y(q);\n if (r !== r + 0)\n throw r;\n X(1, 0);\n }\n }\n function zd(a, b, c, d) {\n var e = Z();\n try {\n return Bg(a, b, c, d);\n } catch (f) {\n Y(e);\n if (f !== f + 0)\n throw f;\n X(1, 0);\n return 0n;\n }\n }\n function uf(a, b, c, d, e, f, h, k) {\n var l = Z();\n try {\n Cg(a, b, c, d, e, f, h, k);\n } catch (m) {\n Y(l);\n if (m !== m + 0)\n throw m;\n X(1, 0);\n }\n }\n function Rd(a, b, c, d, e) {\n var f = Z();\n try {\n Dg(a, b, c, d, e);\n } catch (h) {\n Y(f);\n if (h !== h + 0)\n throw h;\n X(1, 0);\n }\n }\n function ef(a, b, c, d) {\n var e = Z();\n try {\n Fg(a, b, c, d);\n } catch (f) {\n Y(e);\n if (f !== f + 0)\n throw f;\n X(1, 0);\n }\n }\n function rf(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y) {\n var A = Z();\n try {\n Gg(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y);\n } catch (B) {\n Y(A);\n if (B !== B + 0)\n throw B;\n X(1, 0);\n }\n }\n function Ne(a, b, c, d, e, f) {\n var h = Z();\n try {\n Hg(a, b, c, d, e, f);\n } catch (k) {\n Y(h);\n if (k !== k + 0)\n throw k;\n X(1, 0);\n }\n }\n function of(a, b, c, d, e, f, h, k, l) {\n var m = Z();\n try {\n Ig(a, b, c, d, e, f, h, k, l);\n } catch (n) {\n Y(m);\n if (n !== n + 0)\n throw n;\n X(1, 0);\n }\n }\n function ed(a, b, c, d, e) {\n var f = Z();\n try {\n return Jg(a, b, c, d, e);\n } catch (h) {\n Y(f);\n if (h !== h + 0)\n throw h;\n X(1, 0);\n }\n }\n function kd(a, b, c, d, e, f, h, k, l, m, n, q, r, p) {\n var u = Z();\n try {\n return Kg(a, b, c, d, e, f, h, k, l, m, n, q, r, p);\n } catch (y) {\n Y(u);\n if (y !== y + 0)\n throw y;\n X(1, 0);\n }\n }\n function sf(a, b) {\n var c = Z();\n try {\n Lg(a, b);\n } catch (d) {\n Y(c);\n if (d !== d + 0)\n throw d;\n X(1, 0);\n }\n }\n function Cd(a, b, c) {\n var d = Z();\n try {\n return Mg(a, b, c);\n } catch (e) {\n Y(d);\n if (e !== e + 0)\n throw e;\n X(1, 0);\n return 0n;\n }\n }\n function jd(a, b, c, d, e, f, h, k, l, m) {\n var n = Z();\n try {\n return Ng(a, b, c, d, e, f, h, k, l, m);\n } catch (q) {\n Y(n);\n if (q !== q + 0)\n throw q;\n X(1, 0);\n }\n }\n function ie(a, b, c, d, e, f, h, k, l, m, n, q, r, p) {\n var u = Z();\n try {\n Sg(a, b, c, d, e, f, h, k, l, m, n, q, r, p);\n } catch (y) {\n Y(u);\n if (y !== y + 0)\n throw y;\n X(1, 0);\n }\n }\n function Ed(a, b, c, d, e) {\n var f = Z();\n try {\n Og(a, b, c, d, e);\n } catch (h) {\n Y(f);\n if (h !== h + 0)\n throw h;\n X(1, 0);\n }\n }\n function Ud(a, b, c, d, e, f, h) {\n var k = Z();\n try {\n Pg(a, b, c, d, e, f, h);\n } catch (l) {\n Y(k);\n if (l !== l + 0)\n throw l;\n X(1, 0);\n }\n }\n function Md(a, b, c, d, e) {\n var f = Z();\n try {\n Rg(a, b, c, d, e);\n } catch (h) {\n Y(f);\n if (h !== h + 0)\n throw h;\n X(1, 0);\n }\n }\n function Xd(a, b, c, d, e, f, h, k) {\n var l = Z();\n try {\n Qg(a, b, c, d, e, f, h, k);\n } catch (m) {\n Y(l);\n if (m !== m + 0)\n throw m;\n X(1, 0);\n }\n }\n function ue(a, b, c, d, e, f, h, k, l, m, n) {\n var q = Z();\n try {\n Tg(a, b, c, d, e, f, h, k, l, m, n);\n } catch (r) {\n Y(q);\n if (r !== r + 0)\n throw r;\n X(1, 0);\n }\n }\n function Oc(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A) {\n var B = Z();\n try {\n return Ug(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A);\n } catch (C) {\n Y(B);\n if (C !== C + 0)\n throw C;\n X(1, 0);\n }\n }\n function le(a, b, c, d, e, f, h, k, l, m, n, q, r) {\n var p = Z();\n try {\n Vg(a, b, c, d, e, f, h, k, l, m, n, q, r);\n } catch (u) {\n Y(p);\n if (u !== u + 0)\n throw u;\n X(1, 0);\n }\n }\n function sd(a, b) {\n var c = Z();\n try {\n return cg(a, b);\n } catch (d) {\n Y(c);\n if (d !== d + 0)\n throw d;\n X(1, 0);\n }\n }\n function wc(a, b, c, d, e) {\n var f = Z();\n try {\n return Ki(a, b, c, d, e);\n } catch (h) {\n Y(f);\n if (h !== h + 0)\n throw h;\n X(1, 0);\n }\n }\n function Pc(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D) {\n var F = Z();\n try {\n return Wg(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D);\n } catch (H) {\n Y(F);\n if (H !== H + 0)\n throw H;\n X(1, 0);\n }\n }\n function mf(a, b, c, d, e, f, h, k, l, m) {\n var n = Z();\n try {\n Xg(a, b, c, d, e, f, h, k, l, m);\n } catch (q) {\n Y(n);\n if (q !== q + 0)\n throw q;\n X(1, 0);\n }\n }\n function cd(a, b, c, d, e, f, h) {\n var k = Z();\n try {\n return Yg(a, b, c, d, e, f, h);\n } catch (l) {\n Y(k);\n if (l !== l + 0)\n throw l;\n X(1, 0);\n }\n }\n function je(a, b, c, d, e, f, h, k, l, m, n) {\n var q = Z();\n try {\n Zg(a, b, c, d, e, f, h, k, l, m, n);\n } catch (r) {\n Y(q);\n if (r !== r + 0)\n throw r;\n X(1, 0);\n }\n }\n function ld(a, b, c, d, e, f) {\n var h = Z();\n try {\n return $g(a, b, c, d, e, f);\n } catch (k) {\n Y(h);\n if (k !== k + 0)\n throw k;\n X(1, 0);\n }\n }\n function Ie(a, b, c, d, e, f) {\n var h = Z();\n try {\n bh(a, b, c, d, e, f);\n } catch (k) {\n Y(h);\n if (k !== k + 0)\n throw k;\n X(1, 0);\n }\n }\n function ye(a, b, c, d, e, f, h, k, l, m, n, q, r, p) {\n var u = Z();\n try {\n ch(a, b, c, d, e, f, h, k, l, m, n, q, r, p);\n } catch (y) {\n Y(u);\n if (y !== y + 0)\n throw y;\n X(1, 0);\n }\n }\n function zf(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B) {\n var C = Z();\n try {\n dh(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B);\n } catch (D) {\n Y(C);\n if (D !== D + 0)\n throw D;\n X(1, 0);\n }\n }\n function ae(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A) {\n var B = Z();\n try {\n eh(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A);\n } catch (C) {\n Y(B);\n if (C !== C + 0)\n throw C;\n X(1, 0);\n }\n }\n function $d(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y) {\n var A = Z();\n try {\n fh(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y);\n } catch (B) {\n Y(A);\n if (B !== B + 0)\n throw B;\n X(1, 0);\n }\n }\n function be(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u) {\n var y = Z();\n try {\n gh(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u);\n } catch (A) {\n Y(y);\n if (A !== A + 0)\n throw A;\n X(1, 0);\n }\n }\n function Af(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F) {\n var H = Z();\n try {\n hh(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F);\n } catch (K) {\n Y(H);\n if (K !== K + 0)\n throw K;\n X(1, 0);\n }\n }\n function yf(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C) {\n var D = Z();\n try {\n ih(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C);\n } catch (F) {\n Y(D);\n if (F !== F + 0)\n throw F;\n X(1, 0);\n }\n }\n function xf(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B) {\n var C = Z();\n try {\n jh(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B);\n } catch (D) {\n Y(C);\n if (D !== D + 0)\n throw D;\n X(1, 0);\n }\n }\n function Bf(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F) {\n var H = Z();\n try {\n kh(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F);\n } catch (K) {\n Y(H);\n if (K !== K + 0)\n throw K;\n X(1, 0);\n }\n }\n function gf(a, b, c, d, e, f, h, k, l, m, n, q, r) {\n var p = Z();\n try {\n lh(a, b, c, d, e, f, h, k, l, m, n, q, r);\n } catch (u) {\n Y(p);\n if (u !== u + 0)\n throw u;\n X(1, 0);\n }\n }\n function kf(a, b, c, d, e, f, h, k, l, m) {\n var n = Z();\n try {\n mh(a, b, c, d, e, f, h, k, l, m);\n } catch (q) {\n Y(n);\n if (q !== q + 0)\n throw q;\n X(1, 0);\n }\n }\n function ve(a, b, c, d, e, f, h, k, l, m, n, q, r) {\n var p = Z();\n try {\n nh(a, b, c, d, e, f, h, k, l, m, n, q, r);\n } catch (u) {\n Y(p);\n if (u !== u + 0)\n throw u;\n X(1, 0);\n }\n }\n function qf(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u) {\n var y = Z();\n try {\n oh(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u);\n } catch (A) {\n Y(y);\n if (A !== A + 0)\n throw A;\n X(1, 0);\n }\n }\n function wf(a, b, c, d, e, f, h, k, l, m) {\n var n = Z();\n try {\n qh(a, b, c, d, e, f, h, k, l, m);\n } catch (q) {\n Y(n);\n if (q !== q + 0)\n throw q;\n X(1, 0);\n }\n }\n function vf(a, b, c, d, e, f, h, k, l) {\n var m = Z();\n try {\n rh(a, b, c, d, e, f, h, k, l);\n } catch (n) {\n Y(m);\n if (n !== n + 0)\n throw n;\n X(1, 0);\n }\n }\n function sc(a, b, c, d, e, f, h) {\n var k = Z();\n try {\n return ah(a, b, c, d, e, f, h);\n } catch (l) {\n Y(k);\n if (l !== l + 0)\n throw l;\n X(1, 0);\n }\n }\n function Nd(a, b, c, d, e) {\n var f = Z();\n try {\n ph(a, b, c, d, e);\n } catch (h) {\n Y(f);\n if (h !== h + 0)\n throw h;\n X(1, 0);\n }\n }\n function wd(a, b, c) {\n var d = Z();\n try {\n return Li(a, b, c);\n } catch (e) {\n Y(d);\n if (e !== e + 0)\n throw e;\n X(1, 0);\n return 0n;\n }\n }\n function ze(a, b, c, d, e, f, h, k, l, m, n, q, r, p) {\n var u = Z();\n try {\n sh(a, b, c, d, e, f, h, k, l, m, n, q, r, p);\n } catch (y) {\n Y(u);\n if (y !== y + 0)\n throw y;\n X(1, 0);\n }\n }\n function Ge(a, b, c, d, e, f, h, k, l, m, n, q, r) {\n var p = Z();\n try {\n uh(a, b, c, d, e, f, h, k, l, m, n, q, r);\n } catch (u) {\n Y(p);\n if (u !== u + 0)\n throw u;\n X(1, 0);\n }\n }\n function Xc(a, b, c, d, e, f, h) {\n var k = Z();\n try {\n return vh(a, b, c, d, e, f, h);\n } catch (l) {\n Y(k);\n if (l !== l + 0)\n throw l;\n X(1, 0);\n }\n }\n function ad(a, b, c, d, e, f) {\n var h = Z();\n try {\n return wh(a, b, c, d, e, f);\n } catch (k) {\n Y(h);\n if (k !== k + 0)\n throw k;\n X(1, 0);\n }\n }\n function De(a, b, c, d, e, f, h, k, l, m, n, q, r) {\n var p = Z();\n try {\n xh(a, b, c, d, e, f, h, k, l, m, n, q, r);\n } catch (u) {\n Y(p);\n if (u !== u + 0)\n throw u;\n X(1, 0);\n }\n }\n function lf(a, b, c, d, e, f) {\n var h = Z();\n try {\n th(a, b, c, d, e, f);\n } catch (k) {\n Y(h);\n if (k !== k + 0)\n throw k;\n X(1, 0);\n }\n }\n function Qe(a, b, c, d, e, f, h, k, l, m, n) {\n var q = Z();\n try {\n yh(a, b, c, d, e, f, h, k, l, m, n);\n } catch (r) {\n Y(q);\n if (r !== r + 0)\n throw r;\n X(1, 0);\n }\n }\n function Ke(a, b, c, d, e, f, h, k) {\n var l = Z();\n try {\n Ah(a, b, c, d, e, f, h, k);\n } catch (m) {\n Y(l);\n if (m !== m + 0)\n throw m;\n X(1, 0);\n }\n }\n function af(a, b, c, d, e, f, h, k) {\n var l = Z();\n try {\n Bh(a, b, c, d, e, f, h, k);\n } catch (m) {\n Y(l);\n if (m !== m + 0)\n throw m;\n X(1, 0);\n }\n }\n function td(a, b, c, d) {\n var e = Z();\n try {\n return Ch(a, b, c, d);\n } catch (f) {\n Y(e);\n if (f !== f + 0)\n throw f;\n X(1, 0);\n }\n }\n function He(a, b, c, d, e, f, h, k, l, m) {\n var n = Z();\n try {\n Dh(a, b, c, d, e, f, h, k, l, m);\n } catch (q) {\n Y(n);\n if (q !== q + 0)\n throw q;\n X(1, 0);\n }\n }\n function pf(a, b, c, d, e, f, h, k, l) {\n var m = Z();\n try {\n Eh(a, b, c, d, e, f, h, k, l);\n } catch (n) {\n Y(m);\n if (n !== n + 0)\n throw n;\n X(1, 0);\n }\n }\n function Fe(a, b, c, d, e, f, h, k, l) {\n var m = Z();\n try {\n Fh(a, b, c, d, e, f, h, k, l);\n } catch (n) {\n Y(m);\n if (n !== n + 0)\n throw n;\n X(1, 0);\n }\n }\n function Ae(a, b, c, d, e, f, h, k, l, m) {\n var n = Z();\n try {\n Gh(a, b, c, d, e, f, h, k, l, m);\n } catch (q) {\n Y(n);\n if (q !== q + 0)\n throw q;\n X(1, 0);\n }\n }\n function ff(a, b, c, d, e, f) {\n var h = Z();\n try {\n Hh(a, b, c, d, e, f);\n } catch (k) {\n Y(h);\n if (k !== k + 0)\n throw k;\n X(1, 0);\n }\n }\n function he(a, b, c, d, e, f, h, k, l, m, n, q) {\n var r = Z();\n try {\n Ih(a, b, c, d, e, f, h, k, l, m, n, q);\n } catch (p) {\n Y(r);\n if (p !== p + 0)\n throw p;\n X(1, 0);\n }\n }\n function hd(a, b, c, d, e, f, h, k) {\n var l = Z();\n try {\n return Jh(a, b, c, d, e, f, h, k);\n } catch (m) {\n Y(l);\n if (m !== m + 0)\n throw m;\n X(1, 0);\n }\n }\n function ne(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u) {\n var y = Z();\n try {\n Mi(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u);\n } catch (A) {\n Y(y);\n if (A !== A + 0)\n throw A;\n X(1, 0);\n }\n }\n function Be(a, b, c, d, e, f, h, k, l, m, n, q, r, p) {\n var u = Z();\n try {\n Kh(a, b, c, d, e, f, h, k, l, m, n, q, r, p);\n } catch (y) {\n Y(u);\n if (y !== y + 0)\n throw y;\n X(1, 0);\n }\n }\n function we(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u) {\n var y = Z();\n try {\n Lh(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u);\n } catch (A) {\n Y(y);\n if (A !== A + 0)\n throw A;\n X(1, 0);\n }\n }\n function Hd(a, b, c) {\n var d = Z();\n try {\n Mh(a, b, c);\n } catch (e) {\n Y(d);\n if (e !== e + 0)\n throw e;\n X(1, 0);\n }\n }\n function vc(a, b, c, d) {\n var e = Z();\n try {\n return Oh(a, b, c, d);\n } catch (f) {\n Y(e);\n if (f !== f + 0)\n throw f;\n X(1, 0);\n }\n }\n function pc(a, b, c, d) {\n var e = Z();\n try {\n return Ph(a, b, c, d);\n } catch (f) {\n Y(e);\n if (f !== f + 0)\n throw f;\n X(1, 0);\n }\n }\n function ce(a, b, c, d, e, f, h, k, l, m) {\n var n = Z();\n try {\n Qh(a, b, c, d, e, f, h, k, l, m);\n } catch (q) {\n Y(n);\n if (q !== q + 0)\n throw q;\n X(1, 0);\n }\n }\n function Ee(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F, H, K, Va, Wa, Xa) {\n var Qi = Z();\n try {\n Rh(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F, H, K, Va, Wa, Xa);\n } catch (Ya) {\n Y(Qi);\n if (Ya !== Ya + 0)\n throw Ya;\n X(1, 0);\n }\n }\n function $e(a, b, c, d, e, f) {\n var h = Z();\n try {\n Sh(a, b, c, d, e, f);\n } catch (k) {\n Y(h);\n if (k !== k + 0)\n throw k;\n X(1, 0);\n }\n }\n function Rc(a, b, c, d, e, f, h, k, l, m, n, q, r) {\n var p = Z();\n try {\n return Th(a, b, c, d, e, f, h, k, l, m, n, q, r);\n } catch (u) {\n Y(p);\n if (u !== u + 0)\n throw u;\n X(1, 0);\n }\n }\n function Od(a, b, c, d, e, f, h, k, l, m, n, q, r) {\n var p = Z();\n try {\n Uh(a, b, c, d, e, f, h, k, l, m, n, q, r);\n } catch (u) {\n Y(p);\n if (u !== u + 0)\n throw u;\n X(1, 0);\n }\n }\n function se(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F) {\n var H = Z();\n try {\n Vh(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D, F);\n } catch (K) {\n Y(H);\n if (K !== K + 0)\n throw K;\n X(1, 0);\n }\n }\n function Yd(a, b, c, d, e, f, h, k, l, m, n, q, r) {\n var p = Z();\n try {\n Wh(a, b, c, d, e, f, h, k, l, m, n, q, r);\n } catch (u) {\n Y(p);\n if (u !== u + 0)\n throw u;\n X(1, 0);\n }\n }\n function hf(a, b, c, d, e, f, h, k, l, m, n, q, r, p) {\n var u = Z();\n try {\n Xh(a, b, c, d, e, f, h, k, l, m, n, q, r, p);\n } catch (y) {\n Y(u);\n if (y !== y + 0)\n throw y;\n X(1, 0);\n }\n }\n function re(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D) {\n var F = Z();\n try {\n Zh(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D);\n } catch (H) {\n Y(F);\n if (H !== H + 0)\n throw H;\n X(1, 0);\n }\n }\n function pe(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A) {\n var B = Z();\n try {\n ai(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A);\n } catch (C) {\n Y(B);\n if (C !== C + 0)\n throw C;\n X(1, 0);\n }\n }\n function qe(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C) {\n var D = Z();\n try {\n Yh(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C);\n } catch (F) {\n Y(D);\n if (F !== F + 0)\n throw F;\n X(1, 0);\n }\n }\n function Le(a, b, c, d, e, f, h, k) {\n var l = Z();\n try {\n bi(a, b, c, d, e, f, h, k);\n } catch (m) {\n Y(l);\n if (m !== m + 0)\n throw m;\n X(1, 0);\n }\n }\n function xe(a, b, c, d, e, f, h, k, l, m, n) {\n var q = Z();\n try {\n di(a, b, c, d, e, f, h, k, l, m, n);\n } catch (r) {\n Y(q);\n if (r !== r + 0)\n throw r;\n X(1, 0);\n }\n }\n function Mc(a, b, c, d, e, f, h, k, l, m, n) {\n var q = Z();\n try {\n return ei(a, b, c, d, e, f, h, k, l, m, n);\n } catch (r) {\n Y(q);\n if (r !== r + 0)\n throw r;\n X(1, 0);\n }\n }\n function Qc(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D) {\n var F = Z();\n try {\n return fi(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y, A, B, C, D);\n } catch (H) {\n Y(F);\n if (H !== H + 0)\n throw H;\n X(1, 0);\n }\n }\n function Sd(a, b, c, d, e) {\n var f = Z();\n try {\n gi(a, b, c, d, e);\n } catch (h) {\n Y(f);\n if (h !== h + 0)\n throw h;\n X(1, 0);\n }\n }\n function ge(a, b, c, d, e, f, h, k, l, m, n, q) {\n var r = Z();\n try {\n Ag(a, b, c, d, e, f, h, k, l, m, n, q);\n } catch (p) {\n Y(r);\n if (p !== p + 0)\n throw p;\n X(1, 0);\n }\n }\n function Pe(a, b, c, d, e, f, h, k, l, m) {\n var n = Z();\n try {\n hi(a, b, c, d, e, f, h, k, l, m);\n } catch (q) {\n Y(n);\n if (q !== q + 0)\n throw q;\n X(1, 0);\n }\n }\n function Me(a, b, c, d, e) {\n var f = Z();\n try {\n ii(a, b, c, d, e);\n } catch (h) {\n Y(f);\n if (h !== h + 0)\n throw h;\n X(1, 0);\n }\n }\n function Ye(a, b, c, d, e, f, h, k, l, m, n, q, r) {\n var p = Z();\n try {\n Ji(a, b, c, d, e, f, h, k, l, m, n, q, r);\n } catch (u) {\n Y(p);\n if (u !== u + 0)\n throw u;\n X(1, 0);\n }\n }\n function pd(a, b, c, d, e) {\n var f = Z();\n try {\n return ji(a, b, c, d, e);\n } catch (h) {\n Y(f);\n if (h !== h + 0)\n throw h;\n X(1, 0);\n }\n }\n function Ad(a, b, c, d) {\n var e = Z();\n try {\n return Ni(a, b, c, d);\n } catch (f) {\n Y(e);\n if (f !== f + 0)\n throw f;\n X(1, 0);\n return 0n;\n }\n }\n function Je(a, b, c, d, e, f, h) {\n var k = Z();\n try {\n ki(a, b, c, d, e, f, h);\n } catch (l) {\n Y(k);\n if (l !== l + 0)\n throw l;\n X(1, 0);\n }\n }\n function qd(a, b, c, d, e, f) {\n var h = Z();\n try {\n return li(a, b, c, d, e, f);\n } catch (k) {\n Y(h);\n if (k !== k + 0)\n throw k;\n X(1, 0);\n }\n }\n function ud(a, b, c, d, e, f) {\n var h = Z();\n try {\n return mi(a, b, c, d, e, f);\n } catch (k) {\n Y(h);\n if (k !== k + 0)\n throw k;\n X(1, 0);\n }\n }\n function Se(a, b, c, d, e, f, h, k, l) {\n var m = Z();\n try {\n ni(a, b, c, d, e, f, h, k, l);\n } catch (n) {\n Y(m);\n if (n !== n + 0)\n throw n;\n X(1, 0);\n }\n }\n function Ld(a, b, c, d) {\n var e = Z();\n try {\n Nh(a, b, c, d);\n } catch (f) {\n Y(e);\n if (f !== f + 0)\n throw f;\n X(1, 0);\n }\n }\n function Zc(a, b, c, d, e, f, h, k) {\n var l = Z();\n try {\n return oi(a, b, c, d, e, f, h, k);\n } catch (m) {\n Y(l);\n if (m !== m + 0)\n throw m;\n X(1, 0);\n }\n }\n function Ec(a, b, c, d, e, f) {\n var h = Z();\n try {\n return pi(a, b, c, d, e, f);\n } catch (k) {\n Y(h);\n if (k !== k + 0)\n throw k;\n X(1, 0);\n }\n }\n function fd(a, b, c, d, e, f) {\n var h = Z();\n try {\n return qi(a, b, c, d, e, f);\n } catch (k) {\n Y(h);\n if (k !== k + 0)\n throw k;\n X(1, 0);\n }\n }\n function Sc(a, b, c, d, e, f, h, k, l, m, n, q) {\n var r = Z();\n try {\n return ri(a, b, c, d, e, f, h, k, l, m, n, q);\n } catch (p) {\n Y(r);\n if (p !== p + 0)\n throw p;\n X(1, 0);\n }\n }\n function bd(a, b, c, d, e, f, h, k) {\n var l = Z();\n try {\n return si(a, b, c, d, e, f, h, k);\n } catch (m) {\n Y(l);\n if (m !== m + 0)\n throw m;\n X(1, 0);\n }\n }\n function Vc(a, b, c, d, e, f, h, k, l, m, n) {\n var q = Z();\n try {\n return ti(a, b, c, d, e, f, h, k, l, m, n);\n } catch (r) {\n Y(q);\n if (r !== r + 0)\n throw r;\n X(1, 0);\n }\n }\n function gd(a, b, c, d, e, f, h) {\n var k = Z();\n try {\n return ui(a, b, c, d, e, f, h);\n } catch (l) {\n Y(k);\n if (l !== l + 0)\n throw l;\n X(1, 0);\n }\n }\n function Uc(a, b, c, d, e, f, h, k, l, m, n, q, r) {\n var p = Z();\n try {\n return vi(a, b, c, d, e, f, h, k, l, m, n, q, r);\n } catch (u) {\n Y(p);\n if (u !== u + 0)\n throw u;\n X(1, 0);\n }\n }\n function od(a, b, c, d, e, f, h) {\n var k = Z();\n try {\n return wi(a, b, c, d, e, f, h);\n } catch (l) {\n Y(k);\n if (l !== l + 0)\n throw l;\n X(1, 0);\n }\n }\n function rd(a, b, c, d, e, f, h) {\n var k = Z();\n try {\n return xi(a, b, c, d, e, f, h);\n } catch (l) {\n Y(k);\n if (l !== l + 0)\n throw l;\n X(1, 0);\n }\n }\n function Bd(a, b, c) {\n var d = Z();\n try {\n return yi(a, b, c);\n } catch (e) {\n Y(d);\n if (e !== e + 0)\n throw e;\n X(1, 0);\n return 0n;\n }\n }\n function nd(a, b, c, d) {\n var e = Z();\n try {\n return zi(a, b, c, d);\n } catch (f) {\n Y(e);\n if (f !== f + 0)\n throw f;\n X(1, 0);\n }\n }\n function Gd(a, b, c, d) {\n var e = Z();\n try {\n Bi(a, b, c, d);\n } catch (f) {\n Y(e);\n if (f !== f + 0)\n throw f;\n X(1, 0);\n }\n }\n function Cc(a, b, c, d) {\n var e = Z();\n try {\n return Ai(a, b, c, d);\n } catch (f) {\n Y(e);\n if (f !== f + 0)\n throw f;\n X(1, 0);\n }\n }\n function tf(a, b, c, d, e) {\n var f = Z();\n try {\n Ci(a, b, c, d, e);\n } catch (h) {\n Y(f);\n if (h !== h + 0)\n throw h;\n X(1, 0);\n }\n }\n function qc(a, b, c, d, e) {\n var f = Z();\n try {\n return Di(a, b, c, d, e);\n } catch (h) {\n Y(f);\n if (h !== h + 0)\n throw h;\n X(1, 0);\n }\n }\n function rc(a, b, c, d, e, f) {\n var h = Z();\n try {\n return Ei(a, b, c, d, e, f);\n } catch (k) {\n Y(h);\n if (k !== k + 0)\n throw k;\n X(1, 0);\n }\n }\n function Te(a, b, c, d, e, f, h, k, l, m, n, q) {\n var r = Z();\n try {\n Fi(a, b, c, d, e, f, h, k, l, m, n, q);\n } catch (p) {\n Y(r);\n if (p !== p + 0)\n throw p;\n X(1, 0);\n }\n }\n function bf(a, b, c, d, e, f, h, k, l, m, n, q, r) {\n var p = Z();\n try {\n Gi(a, b, c, d, e, f, h, k, l, m, n, q, r);\n } catch (u) {\n Y(p);\n if (u !== u + 0)\n throw u;\n X(1, 0);\n }\n }\n function te(a, b, c, d, e, f, h, k, l, m, n, q) {\n var r = Z();\n try {\n Eg(a, b, c, d, e, f, h, k, l, m, n, q);\n } catch (p) {\n Y(r);\n if (p !== p + 0)\n throw p;\n X(1, 0);\n }\n }\n function $c(a, b, c, d, e) {\n var f = Z();\n try {\n return Hi(a, b, c, d, e);\n } catch (h) {\n Y(f);\n if (h !== h + 0)\n throw h;\n X(1, 0);\n }\n }\n function vd(a) {\n var b = Z();\n try {\n return eg(a);\n } catch (c) {\n Y(b);\n if (c !== c + 0)\n throw c;\n X(1, 0);\n return 0n;\n }\n }\n function Wc(a, b, c, d, e, f) {\n var h = Z();\n try {\n return dg(a, b, c, d, e, f);\n } catch (k) {\n Y(h);\n if (k !== k + 0)\n throw k;\n X(1, 0);\n }\n }\n function Gc(a, b, c, d, e, f) {\n var h = Z();\n try {\n return ci(a, b, c, d, e, f);\n } catch (k) {\n Y(h);\n if (k !== k + 0)\n throw k;\n X(1, 0);\n }\n }\n function oe(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y) {\n var A = Z();\n try {\n $h(a, b, c, d, e, f, h, k, l, m, n, q, r, p, u, y);\n } catch (B) {\n Y(A);\n if (B !== B + 0)\n throw B;\n X(1, 0);\n }\n }\n function uc(a, b, c) {\n var d = Z();\n try {\n return qg(a, b, c);\n } catch (e) {\n Y(d);\n if (e !== e + 0)\n throw e;\n X(1, 0);\n }\n }\n function oc(a, b, c) {\n var d = Z();\n try {\n return sg(a, b, c);\n } catch (e) {\n Y(d);\n if (e !== e + 0)\n throw e;\n X(1, 0);\n }\n }\n function Df() {\n var a = U;\n a = Object.assign({}, a);\n var b = (c) => (d) => c(d) >>> 0;\n a.Fd = b(a.Fd);\n a.he = b(a.he);\n a.ne = b(a.ne);\n a.oe = ((c) => () => c() >>> 0)(a.oe);\n return a;\n }\n g.stackSave = () => Z();\n g.stackRestore = (a) => Y(a);\n g.stackAlloc = (a) => Ef(a);\n g.UTF8ToString = J;\n g.stringToUTF8 = (a, b, c) => M(a, E, b, c);\n g.lengthBytesUTF8 = bb;\n var Oi;\n Fa = function Pi() {\n Oi || Ri();\n Oi || (Fa = Pi);\n };\n function Ri() {\n if (!(0 < Da)) {\n if (g.preRun)\n for ("function" == typeof g.preRun && (g.preRun = [g.preRun]); g.preRun.length; ) {\n var a = g.preRun.shift();\n Aa.unshift(a);\n }\n for (; 0 < Aa.length; )\n Aa.shift()(g);\n if (!(0 < Da || Oi || (Oi = true, g.calledRun = true, x))) {\n for (; 0 < Ba.length; )\n Ba.shift()(g);\n for (aa(g); 0 < Ca.length; )\n Ca.shift()(g);\n }\n }\n }\n Ri();\n return readyPromise;\n };\n })();\n if (typeof exports === "object" && typeof module === "object")\n module.exports = ortWasm;\n else if (typeof define === "function" && define["amd"])\n define([], () => ortWasm);\n }\n });\n\n // nodejs-ignore:worker_threads\n var require_worker_threads = __commonJS({\n "nodejs-ignore:worker_threads"() {\n }\n });\n\n // nodejs-ignore:perf_hooks\n var require_perf_hooks = __commonJS({\n "nodejs-ignore:perf_hooks"() {\n }\n });\n\n // nodejs-ignore:os\n var os_exports = {};\n __export(os_exports, {\n cpus: () => cpus\n });\n var cpus;\n var init_os = __esm({\n "nodejs-ignore:os"() {\n cpus = void 0;\n }\n });\n\n // web/lib/wasm/binding/ort-wasm-simd-threaded.jsep.js\n var require_ort_wasm_simd_threaded_jsep = __commonJS({\n "web/lib/wasm/binding/ort-wasm-simd-threaded.jsep.js"(exports, module) {\n "use strict";\n var ortWasmThreaded = (() => {\n var _scriptDir = typeof document !== "undefined" && document.currentScript ? document.currentScript.src : void 0;\n if (typeof __filename !== "undefined")\n _scriptDir = _scriptDir || __filename;\n return function(moduleArg = {}) {\n function d() {\n l.buffer != p.buffer && t();\n return p;\n }\n function v() {\n l.buffer != p.buffer && t();\n return aa;\n }\n function z() {\n l.buffer != p.buffer && t();\n return ba;\n }\n function A() {\n l.buffer != p.buffer && t();\n return ca;\n }\n function da() {\n l.buffer != p.buffer && t();\n return ea;\n }\n var B = moduleArg, fa, C;\n B.ready = new Promise((a, b) => {\n fa = a;\n C = b;\n });\n "use strict";\n B.jsepInit = (a, b, c, e, f, h, k, q) => {\n B.Qb = a;\n B.wb = b;\n B.yb = c;\n B.jb = e;\n B.xb = f;\n B.Ea = h;\n B.zb = k;\n B.Ab = q;\n b = (n, m, r) => (...w) => {\n const x = D, g = m?.();\n w = n(...w);\n const u = m?.();\n g !== u && (n = u, r(g), m = r = null);\n return D != x ? ha() : w;\n };\n c = (n) => async (...m) => {\n try {\n if (B.bb)\n throw Error("Session already started");\n const r = B.bb = { Fb: m[0], errors: [] }, w = await n(...m);\n if (B.bb !== r)\n throw Error("Session mismatch");\n a.flush();\n const x = r.errors;\n if (0 < x.length) {\n let g = await Promise.all(x);\n g = g.filter((u) => u);\n if (0 < g.length)\n throw Error(g.join("\\n"));\n }\n return w;\n } finally {\n B.bb = null;\n }\n };\n B._OrtRun = c(b(B._OrtRun, () => B._OrtRun, (n) => B._OrtRun = n));\n B._OrtRunWithBinding = c(b(B._OrtRunWithBinding, () => B._OrtRunWithBinding, (n) => B._OrtRunWithBinding = n));\n B._OrtBindInput = b(B._OrtBindInput, () => B._OrtBindInput, (n) => B._OrtBindInput = n);\n B.jsepRegisterBuffer = (n, m, r, w) => a.registerBuffer(n, m, r, w);\n B.jsepUnregisterBuffers = (n) => {\n a.unregisterBuffers(n);\n };\n B.jsepGetBuffer = (n) => a.getBuffer(n);\n B.jsepCreateDownloader = (n, m, r) => a.createDownloader(n, m, r);\n };\n var ia = Object.assign({}, B), ja = "./this.program", E = (a, b) => {\n throw b;\n }, ka = "object" == typeof window, F = "function" == typeof importScripts, G = "object" == typeof process && "object" == typeof process.versions && "string" == typeof process.versions.node, H = B.ENVIRONMENT_IS_PTHREAD || false, I = "";\n function la(a) {\n return B.locateFile ? B.locateFile(a, I) : I + a;\n }\n var ma, J, na;\n if (G) {\n var fs = (init_fs(), __toCommonJS(fs_exports)), oa = (init_path(), __toCommonJS(path_exports));\n I = F ? oa.dirname(I) + "/" : __dirname + "/";\n ma = (b, c) => {\n b = b.startsWith("file://") ? new URL(b) : oa.normalize(b);\n return fs.readFileSync(b, c ? void 0 : "utf8");\n };\n na = (b) => {\n b = ma(b, true);\n b.buffer || (b = new Uint8Array(b));\n return b;\n };\n J = (b, c, e, f = true) => {\n b = b.startsWith("file://") ? new URL(b) : oa.normalize(b);\n fs.readFile(b, f ? void 0 : "utf8", (h, k) => {\n h ? e(h) : c(f ? k.buffer : k);\n });\n };\n !B.thisProgram && 1 < process.argv.length && (ja = process.argv[1].replace(/\\\\/g, "/"));\n process.argv.slice(2);\n E = (b, c) => {\n process.exitCode = b;\n throw c;\n };\n B.inspect = () => "[Emscripten Module object]";\n let a;\n try {\n a = require_worker_threads();\n } catch (b) {\n throw console.error(\'The "worker_threads" module is not supported in this node.js build - perhaps a newer version is needed?\'), b;\n }\n global.Worker = a.Worker;\n } else if (ka || F)\n F ? I = self.location.href : "undefined" != typeof document && document.currentScript && (I = document.currentScript.src), typeof _scriptDir !== "undefined" && _scriptDir && (I = _scriptDir), 0 !== I.indexOf("blob:") ? I = I.substr(0, I.replace(/[?#].*/, "").lastIndexOf("/") + 1) : I = "", G || (ma = (a) => {\n var b = new XMLHttpRequest();\n b.open("GET", a, false);\n b.send(null);\n return b.responseText;\n }, F && (na = (a) => {\n var b = new XMLHttpRequest();\n b.open("GET", a, false);\n b.responseType = "arraybuffer";\n b.send(null);\n return new Uint8Array(b.response);\n }), J = (a, b, c) => {\n var e = new XMLHttpRequest();\n e.open("GET", a, true);\n e.responseType = "arraybuffer";\n e.onload = () => {\n 200 == e.status || 0 == e.status && e.response ? b(e.response) : c();\n };\n e.onerror = c;\n e.send(null);\n });\n G && "undefined" == typeof performance && (global.performance = require_perf_hooks().performance);\n var pa = console.log.bind(console), qa = console.error.bind(console);\n G && (pa = (...a) => fs.writeSync(1, a.join(" ") + "\\n"), qa = (...a) => fs.writeSync(2, a.join(" ") + "\\n"));\n var ra = B.print || pa, K = B.printErr || qa;\n Object.assign(B, ia);\n ia = null;\n B.thisProgram && (ja = B.thisProgram);\n B.quit && (E = B.quit);\n var L;\n B.wasmBinary && (L = B.wasmBinary);\n var noExitRuntime = B.noExitRuntime || true;\n "object" != typeof WebAssembly && M("no native wasm support detected");\n var l, N, sa, P = false, Q, p, aa, ba, ca, ea;\n function t() {\n var a = l.buffer;\n B.HEAP8 = p = new Int8Array(a);\n B.HEAP16 = new Int16Array(a);\n B.HEAP32 = ba = new Int32Array(a);\n B.HEAPU8 = aa = new Uint8Array(a);\n B.HEAPU16 = new Uint16Array(a);\n B.HEAPU32 = ca = new Uint32Array(a);\n B.HEAPF32 = new Float32Array(a);\n B.HEAPF64 = ea = new Float64Array(a);\n }\n var ta = B.INITIAL_MEMORY || 16777216;\n 5242880 <= ta || M("INITIAL_MEMORY should be larger than STACK_SIZE, was " + ta + "! (STACK_SIZE=5242880)");\n if (H)\n l = B.wasmMemory;\n else if (B.wasmMemory)\n l = B.wasmMemory;\n else if (l = new WebAssembly.Memory({ initial: ta / 65536, maximum: 65536, shared: true }), !(l.buffer instanceof SharedArrayBuffer))\n throw K("requested a shared WebAssembly.Memory but the returned buffer is not a SharedArrayBuffer, indicating that while the browser has SharedArrayBuffer it does not have WebAssembly threads support - you may need to set a flag"), G && K("(on node you may need: --experimental-wasm-threads --experimental-wasm-bulk-memory and/or recent version)"), Error("bad memory");\n t();\n ta = l.buffer.byteLength;\n var ua = [], va = [], wa = [], xa = 0;\n function ya() {\n return noExitRuntime || 0 < xa;\n }\n var R = 0, za = null, S = null;\n function Aa() {\n R++;\n B.monitorRunDependencies && B.monitorRunDependencies(R);\n }\n function Ba() {\n R--;\n B.monitorRunDependencies && B.monitorRunDependencies(R);\n if (0 == R && (null !== za && (clearInterval(za), za = null), S)) {\n var a = S;\n S = null;\n a();\n }\n }\n function M(a) {\n if (B.onAbort)\n B.onAbort(a);\n a = "Aborted(" + a + ")";\n K(a);\n P = true;\n Q = 1;\n a = new WebAssembly.RuntimeError(a + ". Build with -sASSERTIONS for more info.");\n C(a);\n throw a;\n }\n function Ca(a) {\n return a.startsWith("data:application/octet-stream;base64,");\n }\n var T;\n T = "ort-wasm-simd-threaded.wasm";\n Ca(T) || (T = la(T));\n function Da(a) {\n if (a == T && L)\n return new Uint8Array(L);\n if (na)\n return na(a);\n throw "both async and sync fetching of the wasm failed";\n }\n function Ea(a) {\n if (!L && (ka || F)) {\n if ("function" == typeof fetch && !a.startsWith("file://"))\n return fetch(a, { credentials: "same-origin" }).then((b) => {\n if (!b.ok)\n throw "failed to load wasm binary file at \'" + a + "\'";\n return b.arrayBuffer();\n }).catch(() => Da(a));\n if (J)\n return new Promise((b, c) => {\n J(a, (e) => b(new Uint8Array(e)), c);\n });\n }\n return Promise.resolve().then(() => Da(a));\n }\n function Fa(a, b, c) {\n return Ea(a).then((e) => WebAssembly.instantiate(e, b)).then((e) => e).then(c, (e) => {\n K("failed to asynchronously prepare wasm: " + e);\n M(e);\n });\n }\n function Ga(a, b) {\n var c = T;\n return L || "function" != typeof WebAssembly.instantiateStreaming || Ca(c) || c.startsWith("file://") || G || "function" != typeof fetch ? Fa(c, a, b) : fetch(c, { credentials: "same-origin" }).then((e) => WebAssembly.instantiateStreaming(e, a).then(b, function(f) {\n K("wasm streaming compile failed: " + f);\n K("falling back to ArrayBuffer instantiation");\n return Fa(c, a, b);\n }));\n }\n var U, Ha = {\n 906828: (a) => {\n B.Ea("Abs", a, void 0);\n },\n 906879: (a) => {\n B.Ea("Neg", a, void 0);\n },\n 906930: (a) => {\n B.Ea("Floor", a, void 0);\n },\n 906983: (a) => {\n B.Ea("Ceil", a, void 0);\n },\n 907035: (a) => {\n B.Ea("Reciprocal", a, void 0);\n },\n 907093: (a) => {\n B.Ea("Sqrt", a, void 0);\n },\n 907145: (a) => {\n B.Ea("Exp", a, void 0);\n },\n 907196: (a) => {\n B.Ea("Erf", a, void 0);\n },\n 907247: (a) => {\n B.Ea("Sigmoid", a, void 0);\n },\n 907302: (a) => {\n B.Ea("Log", a, void 0);\n },\n 907353: (a) => {\n B.Ea("Sin", a, void 0);\n },\n 907404: (a) => {\n B.Ea("Cos", a, void 0);\n },\n 907455: (a) => {\n B.Ea("Tan", a, void 0);\n },\n 907506: (a) => {\n B.Ea("Asin", a, void 0);\n },\n 907558: (a) => {\n B.Ea(\n "Acos",\n a,\n void 0\n );\n },\n 907610: (a) => {\n B.Ea("Atan", a, void 0);\n },\n 907662: (a) => {\n B.Ea("Sinh", a, void 0);\n },\n 907714: (a) => {\n B.Ea("Cosh", a, void 0);\n },\n 907766: (a) => {\n B.Ea("Asinh", a, void 0);\n },\n 907819: (a) => {\n B.Ea("Acosh", a, void 0);\n },\n 907872: (a) => {\n B.Ea("Atanh", a, void 0);\n },\n 907925: (a) => {\n B.Ea("Tanh", a, void 0);\n },\n 907977: (a) => {\n B.Ea("Not", a, void 0);\n },\n 908028: (a, b, c) => {\n B.Ea("ClipV10", a, { min: b, max: c });\n },\n 908100: (a) => {\n B.Ea("Clip", a, void 0);\n },\n 908152: (a, b) => {\n B.Ea("Elu", a, { alpha: b });\n },\n 908210: (a) => {\n B.Ea("Relu", a, void 0);\n },\n 908262: (a, b) => {\n B.Ea("LeakyRelu", a, { alpha: b });\n },\n 908326: (a, b) => {\n B.Ea("ThresholdedRelu", a, { alpha: b });\n },\n 908396: (a, b) => {\n B.Ea("Cast", a, { to: b });\n },\n 908454: (a) => {\n B.Ea("Add", a, void 0);\n },\n 908505: (a) => {\n B.Ea("Sub", a, void 0);\n },\n 908556: (a) => {\n B.Ea("Mul", a, void 0);\n },\n 908607: (a) => {\n B.Ea("Div", a, void 0);\n },\n 908658: (a) => {\n B.Ea("Pow", a, void 0);\n },\n 908709: (a) => {\n B.Ea("Equal", a, void 0);\n },\n 908762: (a) => {\n B.Ea("Greater", a, void 0);\n },\n 908817: (a) => {\n B.Ea("GreaterOrEqual", a, void 0);\n },\n 908879: (a) => {\n B.Ea("Less", a, void 0);\n },\n 908931: (a) => {\n B.Ea("LessOrEqual", a, void 0);\n },\n 908990: (a, b, c, e, f) => {\n B.Ea("ReduceMean", a, {\n keepDims: !!b,\n noopWithEmptyAxes: !!c,\n axes: e ? Array.from(z().subarray(f >>> 0, f + e >>> 0)) : []\n });\n },\n 909154: (a, b, c, e, f) => {\n B.Ea("ReduceMax", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: e ? Array.from(z().subarray(f >>> 0, f + e >>> 0)) : [] });\n },\n 909317: (a, b, c, e, f) => {\n B.Ea("ReduceMin", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: e ? Array.from(z().subarray(f >>> 0, f + e >>> 0)) : [] });\n },\n 909480: (a, b, c, e, f) => {\n B.Ea("ReduceProd", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: e ? Array.from(z().subarray(f >>> 0, f + e >>> 0)) : [] });\n },\n 909644: (a, b, c, e, f) => {\n B.Ea("ReduceSum", a, {\n keepDims: !!b,\n noopWithEmptyAxes: !!c,\n axes: e ? Array.from(z().subarray(f >>> 0, f + e >>> 0)) : []\n });\n },\n 909807: (a, b, c, e, f) => {\n B.Ea("ReduceL1", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: e ? Array.from(z().subarray(f >>> 0, f + e >>> 0)) : [] });\n },\n 909969: (a, b, c, e, f) => {\n B.Ea("ReduceL2", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: e ? Array.from(z().subarray(f >>> 0, f + e >>> 0)) : [] });\n },\n 910131: (a, b, c, e, f) => {\n B.Ea("ReduceLogSum", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: e ? Array.from(z().subarray(f >>> 0, f + e >>> 0)) : [] });\n },\n 910297: (a, b, c, e, f) => {\n B.Ea("ReduceSumSquare", a, {\n keepDims: !!b,\n noopWithEmptyAxes: !!c,\n axes: e ? Array.from(z().subarray(f >>> 0, f + e >>> 0)) : []\n });\n },\n 910466: (a, b, c, e, f) => {\n B.Ea("ReduceLogSumExp", a, { keepDims: !!b, noopWithEmptyAxes: !!c, axes: e ? Array.from(z().subarray(f >>> 0, f + e >>> 0)) : [] });\n },\n 910635: (a) => {\n B.Ea("Where", a, void 0);\n },\n 910688: (a, b, c) => {\n B.Ea("Transpose", a, { perm: b ? Array.from(z().subarray(c >>> 0, c + b >>> 0)) : [] });\n },\n 910801: (a, b, c, e, f, h, k, q, n, m) => {\n B.Ea("Conv", a, { format: n ? "NHWC" : "NCHW", auto_pad: b, dilations: [c], group: e, kernel_shape: [f], pads: [h, k], strides: [q], w_is_const: () => !!d()[m >>> 0] });\n },\n 911029: (a, b, c, e, f, h, k, q, n, m, r, w, x, g, u) => {\n B.Ea("Conv", a, { format: g ? "NHWC" : "NCHW", auto_pad: b, dilations: [c, e], group: f, kernel_shape: [h, k], pads: [q, n, m, r], strides: [w, x], w_is_const: () => !!d()[u >>> 0] });\n },\n 911288: (a, b, c, e, f, h, k, q, n, m) => {\n B.Ea("Conv", a, { format: n ? "NHWC" : "NCHW", auto_pad: b, dilations: [c], group: e, kernel_shape: [f], pads: [h, k], strides: [q], w_is_const: () => !!d()[m >>> 0] });\n },\n 911516: (a, b, c, e, f, h, k, q, n, m, r, w, x, g, u) => {\n B.Ea("Conv", a, { format: g ? "NHWC" : "NCHW", auto_pad: b, dilations: [c, e], group: f, kernel_shape: [\n h,\n k\n ], pads: [q, n, m, r], strides: [w, x], w_is_const: () => !!d()[u >>> 0] });\n },\n 911775: (a, b, c, e, f, h, k, q, n, m, r, w, x, g) => {\n B.Ea("ConvTranspose", a, { format: n ? "NHWC" : "NCHW", autoPad: b, dilations: [c], group: e, kernel_shape: [f], pads: [h, k], strides: [q], wIsConst: () => !!d()[m >>> 0], outputPadding: r ? Array.from(z().subarray(w >>> 0, w + r >>> 0)) : [], outputShape: x ? Array.from(z().subarray(g >>> 0, g + x >>> 0)) : [] });\n },\n 912155: (a, b, c, e, f, h, k, q, n, m, r, w, x) => {\n B.Ea("ConvTranspose", a, { format: q ? "NHWC" : "NCHW", autoPad: b, dilations: Array.from(z().subarray(c >>> 0, c + 2 >>> 0)), group: e, kernelShape: Array.from(z().subarray(f >>> 0, f + 2 >>> 0)), pads: Array.from(z().subarray(h >>> 0, h + 4 >>> 0)), strides: Array.from(z().subarray(k >>> 0, k + 2 >>> 0)), wIsConst: () => !!d()[n >>> 0], outputPadding: 0 < m ? Array.from(z().subarray(r >>> 0, r + m >>> 0)) : [], outputShape: 0 < w ? Array.from(z().subarray(x >>> 0, x + w >>> 0)) : [] });\n },\n 912678: (a, b, c, e, f, h, k, q, n, m, r, w, x, g) => {\n B.Ea("ConvTranspose", a, { format: n ? "NHWC" : "NCHW", autoPad: b, dilations: [c], group: e, kernel_shape: [f], pads: [h, k], strides: [q], wIsConst: () => !!d()[m >>> 0], outputPadding: r ? Array.from(z().subarray(w >>> 0, w + r >>> 0)) : [], outputShape: x ? Array.from(z().subarray(g >>> 0, g + x >>> 0)) : [] });\n },\n 913058: (a, b, c, e, f, h, k, q, n, m, r, w, x) => {\n B.Ea("ConvTranspose", a, { format: q ? "NHWC" : "NCHW", autoPad: b, dilations: Array.from(z().subarray(c >>> 0, c + 2 >>> 0)), group: e, kernelShape: Array.from(z().subarray(f >>> 0, f + 2 >>> 0)), pads: Array.from(z().subarray(h >>> 0, h + 4 >>> 0)), strides: Array.from(z().subarray(k >>> 0, k + 2 >>> 0)), wIsConst: () => !!d()[n >>> 0], outputPadding: 0 < m ? Array.from(z().subarray(r >>> 0, r + m >>> 0)) : [], outputShape: 0 < w ? Array.from(z().subarray(x >>> 0, x + w >>> 0)) : [] });\n },\n 913581: (a, b) => {\n B.Ea("GlobalAveragePool", a, { format: b ? "NHWC" : "NCHW" });\n },\n 913672: (a, b, c, e, f, h, k, q, n, m, r, w, x, g, u, y) => {\n B.Ea("AveragePool", a, { format: y ? "NHWC" : "NCHW", auto_pad: b, ceil_mode: c, count_include_pad: e, storage_order: f, dilations: [h, k], kernel_shape: [q, n], pads: [m, r, w, x], strides: [g, u] });\n },\n 913956: (a, b) => {\n B.Ea("GlobalAveragePool", a, { format: b ? "NHWC" : "NCHW" });\n },\n 914047: (a, b, c, e, f, h, k, q, n, m, r, w, x, g, u, y) => {\n B.Ea("AveragePool", a, {\n format: y ? "NHWC" : "NCHW",\n auto_pad: b,\n ceil_mode: c,\n count_include_pad: e,\n storage_order: f,\n dilations: [h, k],\n kernel_shape: [q, n],\n pads: [m, r, w, x],\n strides: [g, u]\n });\n },\n 914331: (a, b) => {\n B.Ea("GlobalMaxPool", a, { format: b ? "NHWC" : "NCHW" });\n },\n 914418: (a, b, c, e, f, h, k, q, n, m, r, w, x, g, u, y) => {\n B.Ea("MaxPool", a, { format: y ? "NHWC" : "NCHW", auto_pad: b, ceil_mode: c, count_include_pad: e, storage_order: f, dilations: [h, k], kernel_shape: [q, n], pads: [m, r, w, x], strides: [g, u] });\n },\n 914698: (a, b) => {\n B.Ea("GlobalMaxPool", a, { format: b ? "NHWC" : "NCHW" });\n },\n 914785: (a, b, c, e, f, h, k, q, n, m, r, w, x, g, u, y) => {\n B.Ea(\n "MaxPool",\n a,\n { format: y ? "NHWC" : "NCHW", auto_pad: b, ceil_mode: c, count_include_pad: e, storage_order: f, dilations: [h, k], kernel_shape: [q, n], pads: [m, r, w, x], strides: [g, u] }\n );\n },\n 915065: (a, b, c, e, f) => {\n B.Ea("Gemm", a, { alpha: b, beta: c, transA: e, transB: f });\n },\n 915169: (a) => {\n B.Ea("MatMul", a, void 0);\n },\n 915223: (a, b, c, e) => {\n B.Ea("ArgMax", a, { keepDims: !!b, selectLastIndex: !!c, axis: e });\n },\n 915331: (a, b, c, e) => {\n B.Ea("ArgMin", a, { keepDims: !!b, selectLastIndex: !!c, axis: e });\n },\n 915439: (a, b) => {\n B.Ea("Softmax", a, { axis: b });\n },\n 915502: (a, b) => {\n B.Ea("Concat", a, { axis: b });\n },\n 915562: (a, b, c, e, f) => {\n B.Ea("Split", a, { axis: b, numOutputs: c, splitSizes: e ? Array.from(z().subarray(f >>> 0, f + e >>> 0)) : [] });\n },\n 915707: (a) => {\n B.Ea("Expand", a, void 0);\n },\n 915761: (a, b) => {\n B.Ea("Gather", a, { axis: Number(b) });\n },\n 915832: (a, b) => {\n B.Ea("GatherElements", a, { axis: Number(b) });\n },\n 915911: (a, b, c, e, f, h, k, q, n, m, r) => {\n B.Ea("Resize", a, { antialias: b, axes: c ? Array.from(z().subarray(e >>> 0, e + c >>> 0)) : [], coordinateTransformMode: V(f), cubicCoeffA: h, excludeOutside: k, extrapolationValue: q, keepAspectRatioPolicy: V(n), mode: V(m), nearestMode: V(r) });\n },\n 916262: (a, b, c, e, f, h, k) => {\n B.Ea("Slice", a, { starts: b ? Array.from(z().subarray(c >>> 0, c + b >>> 0)) : [], ends: e ? Array.from(z().subarray(f >>> 0, f + e >>> 0)) : [], axes: h ? Array.from(z().subarray(k >>> 0, k + h >>> 0)) : [] });\n },\n 916493: (a) => {\n B.Ea("Tile", a, void 0);\n },\n 916545: (a, b, c) => {\n B.Ea("LayerNormalization", a, { axis: Number(b), epsilon: Number(c) });\n },\n 916652: (a, b, c) => {\n B.Ea("InstanceNormalization", a, { epsilon: b, format: c ? "NHWC" : "NCHW" });\n },\n 916766: (a, b, c) => {\n B.Ea("InstanceNormalization", a, { epsilon: b, format: c ? "NHWC" : "NCHW" });\n },\n 916880: (a) => {\n B.Ea(\n "Range",\n a,\n void 0\n );\n },\n 916933: (a, b) => {\n B.Ea("Einsum", a, { equation: V(b) });\n },\n 917014: (a, b, c, e, f) => {\n B.Ea("Pad", a, { mode: b, value: c, pads: e ? Array.from(z().subarray(f >>> 0, f + e >>> 0)) : [] });\n },\n 917146: (a) => {\n B.Ea("Gelu", a, void 0);\n },\n 917198: (a) => {\n B.Ea("BiasAdd", a, void 0);\n },\n 917253: (a) => {\n B.Ea("BiasSplitGelu", a, void 0);\n },\n 917314: (a, b) => {\n B.Ea("SkipLayerNormalization", a, { epsilon: b });\n },\n 917395: (a) => {\n B.zb(a);\n },\n 917429: (a, b) => B.Ab(a, b, B.bb.Fb, B.bb.errors),\n 917541: (a) => B.wb(a),\n 917574: (a) => B.yb(a),\n 917606: (a, b, c) => {\n B.jb(a, b, c, true);\n },\n 917645: (a, b, c) => {\n B.jb(a, b, c);\n }\n };\n function Ia(a) {\n this.name = "ExitStatus";\n this.message = `Program terminated with exit(${a})`;\n this.status = a;\n }\n function Ja(a) {\n a.terminate();\n a.onmessage = () => {\n };\n }\n function Ka(a) {\n (a = W.Qa[a]) || M();\n W.Eb(a);\n }\n function La(a) {\n var b = W.tb();\n if (!b)\n return 6;\n W.Ya.push(b);\n W.Qa[a.Xa] = b;\n b.Xa = a.Xa;\n var c = { cmd: "run", start_routine: a.Gb, arg: a.rb, pthread_ptr: a.Xa };\n G && b.unref();\n b.postMessage(c, a.Mb);\n return 0;\n }\n var Ma = "undefined" != typeof TextDecoder ? new TextDecoder("utf8") : void 0, Na = (a, b, c) => {\n b >>>= 0;\n var e = b + c;\n for (c = b; a[c] && !(c >= e); )\n ++c;\n if (16 < c - b && a.buffer && Ma)\n return Ma.decode(a.buffer instanceof SharedArrayBuffer ? a.slice(b, c) : a.subarray(b, c));\n for (e = ""; b < c; ) {\n var f = a[b++];\n if (f & 128) {\n var h = a[b++] & 63;\n if (192 == (f & 224))\n e += String.fromCharCode((f & 31) << 6 | h);\n else {\n var k = a[b++] & 63;\n f = 224 == (f & 240) ? (f & 15) << 12 | h << 6 | k : (f & 7) << 18 | h << 12 | k << 6 | a[b++] & 63;\n 65536 > f ? e += String.fromCharCode(f) : (f -= 65536, e += String.fromCharCode(55296 | f >> 10, 56320 | f & 1023));\n }\n } else\n e += String.fromCharCode(f);\n }\n return e;\n }, V = (a, b) => (a >>>= 0) ? Na(v(), a, b) : "";\n function Oa(a) {\n if (H)\n return X(1, 1, a);\n Q = a;\n if (!ya()) {\n W.Hb();\n if (B.onExit)\n B.onExit(a);\n P = true;\n }\n E(a, new Ia(a));\n }\n var Qa = (a) => {\n Q = a;\n if (H)\n throw Pa(a), "unwind";\n Oa(a);\n }, W = {\n ab: [],\n Ya: [],\n mb: [],\n Qa: {},\n gb: function() {\n H ? W.vb() : W.ub();\n },\n ub: function() {\n ua.unshift(() => {\n Aa();\n W.Bb(() => Ba());\n });\n },\n vb: function() {\n W.receiveObjectTransfer = W.Db;\n W.threadInitTLS = W.lb;\n W.setExitStatus = W.kb;\n noExitRuntime = false;\n },\n kb: function(a) {\n Q = a;\n },\n Sb: ["$terminateWorker"],\n Hb: function() {\n for (var a of W.Ya)\n Ja(a);\n for (a of W.ab)\n Ja(a);\n W.ab = [];\n W.Ya = [];\n W.Qa = [];\n },\n Eb: function(a) {\n var b = a.Xa;\n delete W.Qa[b];\n W.ab.push(a);\n W.Ya.splice(W.Ya.indexOf(a), 1);\n a.Xa = 0;\n Ra(b);\n },\n Db: function() {\n },\n lb: function() {\n W.mb.forEach((a) => a());\n },\n Cb: (a) => new Promise((b) => {\n a.onmessage = (h) => {\n h = h.data;\n var k = h.cmd;\n if (h.targetThread && h.targetThread != Sa()) {\n var q = W.Qa[h.Rb];\n q ? q.postMessage(h, h.transferList) : K(\'Internal error! Worker sent a message "\' + k + \'" to target pthread \' + h.targetThread + ", but that thread no longer exists!");\n } else if ("checkMailbox" === k)\n Ta();\n else if ("spawnThread" === k)\n La(h);\n else if ("cleanupThread" === k)\n Ka(h.thread);\n else if ("killThread" === k)\n h = h.thread, k = W.Qa[h], delete W.Qa[h], Ja(k), Ra(h), W.Ya.splice(\n W.Ya.indexOf(k),\n 1\n ), k.Xa = 0;\n else if ("cancelThread" === k)\n W.Qa[h.thread].postMessage({ cmd: "cancel" });\n else if ("loaded" === k)\n a.loaded = true, b(a);\n else if ("alert" === k)\n alert("Thread " + h.threadId + ": " + h.text);\n else if ("setimmediate" === h.target)\n a.postMessage(h);\n else if ("callHandler" === k)\n B[h.handler](...h.args);\n else\n k && K("worker sent an unknown command " + k);\n };\n a.onerror = (h) => {\n K("worker sent an error! " + h.filename + ":" + h.lineno + ": " + h.message);\n throw h;\n };\n G && (a.on("message", function(h) {\n a.onmessage({ data: h });\n }), a.on("error", function(h) {\n a.onerror(h);\n }));\n var c = [], e = ["onExit", "onAbort", "print", "printErr"], f;\n for (f of e)\n B.hasOwnProperty(f) && c.push(f);\n a.postMessage({ cmd: "load", handlers: c, urlOrBlob: B.mainScriptUrlOrBlob || _scriptDir, wasmMemory: l, wasmModule: sa });\n }),\n Bb: function(a) {\n a();\n },\n qb: function() {\n var a = la("ort-wasm-simd-threaded.worker.js");\n a = new Worker(a);\n W.ab.push(a);\n },\n tb: function() {\n 0 == W.ab.length && (W.qb(), W.Cb(W.ab[0]));\n return W.ab.pop();\n }\n };\n B.PThread = W;\n var Ua = (a) => {\n for (; 0 < a.length; )\n a.shift()(B);\n };\n B.establishStackSpace = function() {\n var a = Sa(), b = z()[a + 52 >> 2 >>> 0];\n a = z()[a + 56 >> 2 >>> 0];\n Va(b, b - a);\n Wa(b);\n };\n function Pa(a) {\n if (H)\n return X(2, 0, a);\n Qa(a);\n }\n B.invokeEntryPoint = function(a, b) {\n a = Xa.apply(null, [a, b]);\n ya() ? W.kb(a) : Ya(a);\n };\n function Za(a) {\n this.fb = a - 24;\n this.pb = function(b) {\n A()[this.fb + 4 >> 2 >>> 0] = b;\n };\n this.ob = function(b) {\n A()[this.fb + 8 >> 2 >>> 0] = b;\n };\n this.gb = function(b, c) {\n this.nb();\n this.pb(b);\n this.ob(c);\n };\n this.nb = function() {\n A()[this.fb + 16 >> 2 >>> 0] = 0;\n };\n }\n var $a = 0, ab = 0;\n function bb(a, b, c, e) {\n return H ? X(3, 1, a, b, c, e) : cb(a, b, c, e);\n }\n function cb(a, b, c, e) {\n a >>>= 0;\n b >>>= 0;\n c >>>= 0;\n e >>>= 0;\n if ("undefined" == typeof SharedArrayBuffer)\n return K("Current environment does not support SharedArrayBuffer, pthreads are not available!"), 6;\n var f = [];\n if (H && 0 === f.length)\n return bb(a, b, c, e);\n a = { Gb: c, Xa: a, rb: e, Mb: f };\n return H ? (a.Ob = "spawnThread", postMessage(a, f), 0) : La(a);\n }\n function db(a, b, c) {\n return H ? X(4, 1, a, b, c) : 0;\n }\n function eb(a, b) {\n if (H)\n return X(5, 1, a, b);\n }\n var fb = (a) => {\n for (var b = 0, c = 0; c < a.length; ++c) {\n var e = a.charCodeAt(c);\n 127 >= e ? b++ : 2047 >= e ? b += 2 : 55296 <= e && 57343 >= e ? (b += 4, ++c) : b += 3;\n }\n return b;\n }, gb = (a, b, c, e) => {\n c >>>= 0;\n if (!(0 < e))\n return 0;\n var f = c;\n e = c + e - 1;\n for (var h = 0; h < a.length; ++h) {\n var k = a.charCodeAt(h);\n if (55296 <= k && 57343 >= k) {\n var q = a.charCodeAt(++h);\n k = 65536 + ((k & 1023) << 10) | q & 1023;\n }\n if (127 >= k) {\n if (c >= e)\n break;\n b[c++ >>> 0] = k;\n } else {\n if (2047 >= k) {\n if (c + 1 >= e)\n break;\n b[c++ >>> 0] = 192 | k >> 6;\n } else {\n if (65535 >= k) {\n if (c + 2 >= e)\n break;\n b[c++ >>> 0] = 224 | k >> 12;\n } else {\n if (c + 3 >= e)\n break;\n b[c++ >>> 0] = 240 | k >> 18;\n b[c++ >>> 0] = 128 | k >> 12 & 63;\n }\n b[c++ >>> 0] = 128 | k >> 6 & 63;\n }\n b[c++ >>> 0] = 128 | k & 63;\n }\n }\n b[c >>> 0] = 0;\n return c - f;\n }, hb = (a, b, c) => gb(a, v(), b, c);\n function ib(a, b) {\n if (H)\n return X(6, 1, a, b);\n }\n function jb(a, b, c) {\n if (H)\n return X(7, 1, a, b, c);\n }\n function kb(a, b, c) {\n return H ? X(8, 1, a, b, c) : 0;\n }\n function lb(a, b) {\n if (H)\n return X(9, 1, a, b);\n }\n function mb(a, b, c) {\n if (H)\n return X(10, 1, a, b, c);\n }\n function nb(a, b, c, e) {\n if (H)\n return X(11, 1, a, b, c, e);\n }\n function ob(a, b, c, e) {\n if (H)\n return X(12, 1, a, b, c, e);\n }\n function pb(a, b, c, e) {\n if (H)\n return X(13, 1, a, b, c, e);\n }\n function qb(a) {\n if (H)\n return X(14, 1, a);\n }\n function rb(a, b) {\n if (H)\n return X(15, 1, a, b);\n }\n function sb(a, b, c) {\n if (H)\n return X(16, 1, a, b, c);\n }\n var tb = (a) => {\n if (!P)\n try {\n if (a(), !ya())\n try {\n H ? Ya(Q) : Qa(Q);\n } catch (b) {\n b instanceof Ia || "unwind" == b || E(1, b);\n }\n } catch (b) {\n b instanceof Ia || "unwind" == b || E(1, b);\n }\n };\n function ub(a) {\n a >>>= 0;\n "function" === typeof Atomics.Nb && (Atomics.Nb(z(), a >> 2, a).value.then(Ta), a += 128, Atomics.store(z(), a >> 2, 1));\n }\n B.__emscripten_thread_mailbox_await = ub;\n function Ta() {\n var a = Sa();\n a && (ub(a), tb(() => vb()));\n }\n B.checkMailbox = Ta;\n var Y = (a) => 0 === a % 4 && (0 !== a % 100 || 0 === a % 400), wb = [0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335], xb = [0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334];\n function yb(a, b, c, e, f, h, k, q) {\n return H ? X(17, 1, a, b, c, e, f, h, k, q) : -52;\n }\n function zb(a, b, c, e, f, h, k) {\n if (H)\n return X(18, 1, a, b, c, e, f, h, k);\n }\n var Bb = (a) => {\n var b = fb(a) + 1, c = Ab(b);\n c && hb(a, c, b);\n return c;\n }, Cb = [], Db = (a, b) => {\n Cb.length = 0;\n var c;\n for (b >>= 2; c = v()[a++ >>> 0]; )\n b += 105 != c & b, Cb.push(105 == c ? z()[b >>> 0] : da()[b++ >>> 1]), ++b;\n return Cb;\n }, Fb = (a) => {\n var b = Eb();\n a = a();\n Wa(b);\n return a;\n };\n function X(a, b) {\n var c = arguments.length - 2, e = arguments;\n return Fb(() => {\n for (var f = Gb(8 * c), h = f >> 3, k = 0; k < c; k++) {\n var q = e[2 + k];\n da()[h + k >>> 0] = q;\n }\n return Hb(a, c, f, b);\n });\n }\n var Ib = [], Jb = {}, Lb = () => {\n if (!Kb) {\n var a = { USER: "web_user", LOGNAME: "web_user", PATH: "/", PWD: "/", HOME: "/home/web_user", LANG: ("object" == typeof navigator && navigator.languages && navigator.languages[0] || "C").replace("-", "_") + ".UTF-8", _: ja || "./this.program" }, b;\n for (b in Jb)\n void 0 === Jb[b] ? delete a[b] : a[b] = Jb[b];\n var c = [];\n for (b in a)\n c.push(`${b}=${a[b]}`);\n Kb = c;\n }\n return Kb;\n }, Kb;\n function Mb(a, b) {\n if (H)\n return X(19, 1, a, b);\n a >>>= 0;\n b >>>= 0;\n var c = 0;\n Lb().forEach(function(e, f) {\n var h = b + c;\n f = A()[a + 4 * f >> 2 >>> 0] = h;\n for (h = 0; h < e.length; ++h)\n d()[f++ >> 0 >>> 0] = e.charCodeAt(h);\n d()[f >> 0 >>> 0] = 0;\n c += e.length + 1;\n });\n return 0;\n }\n function Nb(a, b) {\n if (H)\n return X(20, 1, a, b);\n a >>>= 0;\n b >>>= 0;\n var c = Lb();\n A()[a >> 2 >>> 0] = c.length;\n var e = 0;\n c.forEach(function(f) {\n e += f.length + 1;\n });\n A()[b >> 2 >>> 0] = e;\n return 0;\n }\n function Ob(a) {\n return H ? X(21, 1, a) : 52;\n }\n function Pb(a, b, c, e) {\n return H ? X(22, 1, a, b, c, e) : 52;\n }\n function Qb(a, b, c, e, f) {\n return H ? X(23, 1, a, b, c, e, f) : 70;\n }\n var Rb = [null, [], []];\n function Tb(a, b, c, e) {\n if (H)\n return X(24, 1, a, b, c, e);\n b >>>= 0;\n c >>>= 0;\n e >>>= 0;\n for (var f = 0, h = 0; h < c; h++) {\n var k = A()[b >> 2 >>> 0], q = A()[b + 4 >> 2 >>> 0];\n b += 8;\n for (var n = 0; n < q; n++) {\n var m = v()[k + n >>> 0], r = Rb[a];\n 0 === m || 10 === m ? ((1 === a ? ra : K)(Na(r, 0)), r.length = 0) : r.push(m);\n }\n f += q;\n }\n A()[e >> 2 >>> 0] = f;\n return 0;\n }\n var Ub = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], Vb = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];\n function Wb(a) {\n var b = Array(fb(a) + 1);\n gb(a, b, 0, b.length);\n return b;\n }\n var Xb = (a, b) => {\n d().set(a, b >>> 0);\n };\n function Yb(a, b, c, e) {\n function f(g, u, y) {\n for (g = "number" == typeof g ? g.toString() : g || ""; g.length < u; )\n g = y[0] + g;\n return g;\n }\n function h(g, u) {\n return f(g, u, "0");\n }\n function k(g, u) {\n function y(Sb) {\n return 0 > Sb ? -1 : 0 < Sb ? 1 : 0;\n }\n var O;\n 0 === (O = y(g.getFullYear() - u.getFullYear())) && 0 === (O = y(g.getMonth() - u.getMonth())) && (O = y(g.getDate() - u.getDate()));\n return O;\n }\n function q(g) {\n switch (g.getDay()) {\n case 0:\n return new Date(g.getFullYear() - 1, 11, 29);\n case 1:\n return g;\n case 2:\n return new Date(g.getFullYear(), 0, 3);\n case 3:\n return new Date(\n g.getFullYear(),\n 0,\n 2\n );\n case 4:\n return new Date(g.getFullYear(), 0, 1);\n case 5:\n return new Date(g.getFullYear() - 1, 11, 31);\n case 6:\n return new Date(g.getFullYear() - 1, 11, 30);\n }\n }\n function n(g) {\n var u = g.Za;\n for (g = new Date(new Date(g.$a + 1900, 0, 1).getTime()); 0 < u; ) {\n var y = g.getMonth(), O = (Y(g.getFullYear()) ? Ub : Vb)[y];\n if (u > O - g.getDate())\n u -= O - g.getDate() + 1, g.setDate(1), 11 > y ? g.setMonth(y + 1) : (g.setMonth(0), g.setFullYear(g.getFullYear() + 1));\n else {\n g.setDate(g.getDate() + u);\n break;\n }\n }\n y = new Date(g.getFullYear() + 1, 0, 4);\n u = q(new Date(\n g.getFullYear(),\n 0,\n 4\n ));\n y = q(y);\n return 0 >= k(u, g) ? 0 >= k(y, g) ? g.getFullYear() + 1 : g.getFullYear() : g.getFullYear() - 1;\n }\n a >>>= 0;\n b >>>= 0;\n c >>>= 0;\n e >>>= 0;\n var m = z()[e + 40 >> 2 >>> 0];\n e = { Kb: z()[e >> 2 >>> 0], Jb: z()[e + 4 >> 2 >>> 0], cb: z()[e + 8 >> 2 >>> 0], ib: z()[e + 12 >> 2 >>> 0], eb: z()[e + 16 >> 2 >>> 0], $a: z()[e + 20 >> 2 >>> 0], Wa: z()[e + 24 >> 2 >>> 0], Za: z()[e + 28 >> 2 >>> 0], Tb: z()[e + 32 >> 2 >>> 0], Ib: z()[e + 36 >> 2 >>> 0], Lb: m ? V(m) : "" };\n c = V(c);\n m = {\n "%c": "%a %b %d %H:%M:%S %Y",\n "%D": "%m/%d/%y",\n "%F": "%Y-%m-%d",\n "%h": "%b",\n "%r": "%I:%M:%S %p",\n "%R": "%H:%M",\n "%T": "%H:%M:%S",\n "%x": "%m/%d/%y",\n "%X": "%H:%M:%S",\n "%Ec": "%c",\n "%EC": "%C",\n "%Ex": "%m/%d/%y",\n "%EX": "%H:%M:%S",\n "%Ey": "%y",\n "%EY": "%Y",\n "%Od": "%d",\n "%Oe": "%e",\n "%OH": "%H",\n "%OI": "%I",\n "%Om": "%m",\n "%OM": "%M",\n "%OS": "%S",\n "%Ou": "%u",\n "%OU": "%U",\n "%OV": "%V",\n "%Ow": "%w",\n "%OW": "%W",\n "%Oy": "%y"\n };\n for (var r in m)\n c = c.replace(new RegExp(r, "g"), m[r]);\n var w = "Sunday Monday Tuesday Wednesday Thursday Friday Saturday".split(" "), x = "January February March April May June July August September October November December".split(" ");\n m = { "%a": (g) => w[g.Wa].substring(0, 3), "%A": (g) => w[g.Wa], "%b": (g) => x[g.eb].substring(0, 3), "%B": (g) => x[g.eb], "%C": (g) => h((g.$a + 1900) / 100 | 0, 2), "%d": (g) => h(g.ib, 2), "%e": (g) => f(g.ib, 2, " "), "%g": (g) => n(g).toString().substring(2), "%G": (g) => n(g), "%H": (g) => h(g.cb, 2), "%I": (g) => {\n g = g.cb;\n 0 == g ? g = 12 : 12 < g && (g -= 12);\n return h(g, 2);\n }, "%j": (g) => {\n for (var u = 0, y = 0; y <= g.eb - 1; u += (Y(g.$a + 1900) ? Ub : Vb)[y++])\n ;\n return h(g.ib + u, 3);\n }, "%m": (g) => h(g.eb + 1, 2), "%M": (g) => h(g.Jb, 2), "%n": () => "\\n", "%p": (g) => 0 <= g.cb && 12 > g.cb ? "AM" : "PM", "%S": (g) => h(g.Kb, 2), "%t": () => " ", "%u": (g) => g.Wa || 7, "%U": (g) => h(Math.floor((g.Za + 7 - g.Wa) / 7), 2), "%V": (g) => {\n var u = Math.floor((g.Za + 7 - (g.Wa + 6) % 7) / 7);\n 2 >= (g.Wa + 371 - g.Za - 2) % 7 && u++;\n if (u)\n 53 == u && (y = (g.Wa + 371 - g.Za) % 7, 4 == y || 3 == y && Y(g.$a) || (u = 1));\n else {\n u = 52;\n var y = (g.Wa + 7 - g.Za - 1) % 7;\n (4 == y || 5 == y && Y(g.$a % 400 - 1)) && u++;\n }\n return h(u, 2);\n }, "%w": (g) => g.Wa, "%W": (g) => h(Math.floor((g.Za + 7 - (g.Wa + 6) % 7) / 7), 2), "%y": (g) => (g.$a + 1900).toString().substring(2), "%Y": (g) => g.$a + 1900, "%z": (g) => {\n g = g.Ib;\n var u = 0 <= g;\n g = Math.abs(g) / 60;\n return (u ? "+" : "-") + String("0000" + (g / 60 * 100 + g % 60)).slice(-4);\n }, "%Z": (g) => g.Lb, "%%": () => "%" };\n c = c.replace(/%%/g, "\\0\\0");\n for (r in m)\n c.includes(r) && (c = c.replace(new RegExp(r, "g"), m[r](e)));\n c = c.replace(/\\0\\0/g, "%");\n r = Wb(c);\n if (r.length > b)\n return 0;\n Xb(r, a);\n return r.length - 1;\n }\n function Zb(a) {\n try {\n a();\n } catch (b) {\n M(b);\n }\n }\n function $b(a) {\n var b = {}, c;\n for (c in a)\n (function(e) {\n var f = a[e];\n b[e] = "function" == typeof f ? function() {\n ac.push(e);\n try {\n return f.apply(null, arguments);\n } finally {\n P || (ac.pop() === e || M(), D && 1 === Z && 0 === ac.length && (Z = 0, xa += 1, Zb(bc), "undefined" != typeof Fibers && Fibers.Ub()));\n }\n } : f;\n })(c);\n return b;\n }\n var Z = 0, D = null, cc = 0, ac = [], dc = {}, ec = {}, fc = 0, gc = null, hc = [];\n function ha() {\n return new Promise((a, b) => {\n gc = { resolve: a, reject: b };\n });\n }\n function ic() {\n var a = Ab(65548), b = a + 12;\n A()[a >> 2 >>> 0] = b;\n A()[a + 4 >> 2 >>> 0] = b + 65536;\n b = ac[0];\n var c = dc[b];\n void 0 === c && (c = fc++, dc[b] = c, ec[c] = b);\n b = c;\n z()[a + 8 >> 2 >>> 0] = b;\n return a;\n }\n function jc() {\n var a = z()[D + 8 >> 2 >>> 0];\n a = N[ec[a]];\n --xa;\n return a();\n }\n function kc(a) {\n if (!P) {\n if (0 === Z) {\n var b = false, c = false;\n a((e = 0) => {\n if (!P && (cc = e, b = true, c)) {\n Z = 2;\n Zb(() => lc(D));\n "undefined" != typeof Browser && Browser.hb.sb && Browser.hb.resume();\n e = false;\n try {\n var f = jc();\n } catch (q) {\n f = q, e = true;\n }\n var h = false;\n if (!D) {\n var k = gc;\n k && (gc = null, (e ? k.reject : k.resolve)(f), h = true);\n }\n if (e && !h)\n throw f;\n }\n });\n c = true;\n b || (Z = 1, D = ic(), "undefined" != typeof Browser && Browser.hb.sb && Browser.hb.pause(), Zb(() => mc(D)));\n } else\n 2 === Z ? (Z = 0, Zb(nc), oc(D), D = null, hc.forEach((e) => tb(e))) : M(`invalid state: ${Z}`);\n return cc;\n }\n }\n function pc(a) {\n return kc((b) => {\n a().then(b);\n });\n }\n W.gb();\n var qc = [null, Oa, Pa, bb, db, eb, ib, jb, kb, lb, mb, nb, ob, pb, qb, rb, sb, yb, zb, Mb, Nb, Ob, Pb, Qb, Tb], tc = {\n r: function(a, b, c) {\n return pc(async () => {\n await B.xb(a, b, c);\n });\n },\n b: function(a, b, c) {\n a >>>= 0;\n new Za(a).gb(b >>> 0, c >>> 0);\n $a = a;\n ab++;\n throw $a;\n },\n O: function(a) {\n rc(a >>> 0, !F, 1, !ka, 131072, false);\n W.lb();\n },\n l: function(a) {\n a >>>= 0;\n H ? postMessage({ cmd: "cleanupThread", thread: a }) : Ka(a);\n },\n I: cb,\n i: db,\n U: eb,\n E: ib,\n G: jb,\n V: kb,\n S: lb,\n K: mb,\n R: nb,\n p: ob,\n F: pb,\n C: qb,\n T: rb,\n D: sb,\n q: () => true,\n A: function(a, b) {\n a >>>= 0;\n a == b >>> 0 ? setTimeout(() => Ta()) : H ? postMessage({\n targetThread: a,\n cmd: "checkMailbox"\n }) : (a = W.Qa[a]) && a.postMessage({ cmd: "checkMailbox" });\n },\n M: function() {\n return -1;\n },\n N: ub,\n X: function(a) {\n G && W.Qa[a >>> 0].ref();\n },\n u: function(a, b, c) {\n a = b + 2097152 >>> 0 < 4194305 - !!a ? (a >>> 0) + 4294967296 * b : NaN;\n c >>>= 0;\n a = new Date(1e3 * a);\n z()[c >> 2 >>> 0] = a.getUTCSeconds();\n z()[c + 4 >> 2 >>> 0] = a.getUTCMinutes();\n z()[c + 8 >> 2 >>> 0] = a.getUTCHours();\n z()[c + 12 >> 2 >>> 0] = a.getUTCDate();\n z()[c + 16 >> 2 >>> 0] = a.getUTCMonth();\n z()[c + 20 >> 2 >>> 0] = a.getUTCFullYear() - 1900;\n z()[c + 24 >> 2 >>> 0] = a.getUTCDay();\n a = (a.getTime() - Date.UTC(\n a.getUTCFullYear(),\n 0,\n 1,\n 0,\n 0,\n 0,\n 0\n )) / 864e5 | 0;\n z()[c + 28 >> 2 >>> 0] = a;\n },\n v: function(a, b, c) {\n a = b + 2097152 >>> 0 < 4194305 - !!a ? (a >>> 0) + 4294967296 * b : NaN;\n c >>>= 0;\n a = new Date(1e3 * a);\n z()[c >> 2 >>> 0] = a.getSeconds();\n z()[c + 4 >> 2 >>> 0] = a.getMinutes();\n z()[c + 8 >> 2 >>> 0] = a.getHours();\n z()[c + 12 >> 2 >>> 0] = a.getDate();\n z()[c + 16 >> 2 >>> 0] = a.getMonth();\n z()[c + 20 >> 2 >>> 0] = a.getFullYear() - 1900;\n z()[c + 24 >> 2 >>> 0] = a.getDay();\n b = (Y(a.getFullYear()) ? wb : xb)[a.getMonth()] + a.getDate() - 1 | 0;\n z()[c + 28 >> 2 >>> 0] = b;\n z()[c + 36 >> 2 >>> 0] = -(60 * a.getTimezoneOffset());\n b = new Date(\n a.getFullYear(),\n 6,\n 1\n ).getTimezoneOffset();\n var e = new Date(a.getFullYear(), 0, 1).getTimezoneOffset();\n a = (b != e && a.getTimezoneOffset() == Math.min(e, b)) | 0;\n z()[c + 32 >> 2 >>> 0] = a;\n },\n w: function(a) {\n a >>>= 0;\n var b = new Date(z()[a + 20 >> 2 >>> 0] + 1900, z()[a + 16 >> 2 >>> 0], z()[a + 12 >> 2 >>> 0], z()[a + 8 >> 2 >>> 0], z()[a + 4 >> 2 >>> 0], z()[a >> 2 >>> 0], 0), c = z()[a + 32 >> 2 >>> 0], e = b.getTimezoneOffset(), f = new Date(b.getFullYear(), 6, 1).getTimezoneOffset(), h = new Date(b.getFullYear(), 0, 1).getTimezoneOffset(), k = Math.min(h, f);\n 0 > c ? z()[a + 32 >> 2 >>> 0] = Number(f != h && k == e) : 0 < c != (k == e) && (f = Math.max(h, f), b.setTime(b.getTime() + 6e4 * ((0 < c ? k : f) - e)));\n z()[a + 24 >> 2 >>> 0] = b.getDay();\n c = (Y(b.getFullYear()) ? wb : xb)[b.getMonth()] + b.getDate() - 1 | 0;\n z()[a + 28 >> 2 >>> 0] = c;\n z()[a >> 2 >>> 0] = b.getSeconds();\n z()[a + 4 >> 2 >>> 0] = b.getMinutes();\n z()[a + 8 >> 2 >>> 0] = b.getHours();\n z()[a + 12 >> 2 >>> 0] = b.getDate();\n z()[a + 16 >> 2 >>> 0] = b.getMonth();\n z()[a + 20 >> 2 >>> 0] = b.getYear();\n a = b.getTime() / 1e3;\n return sc((U = a, 1 <= +Math.abs(U) ? 0 < U ? +Math.floor(U / 4294967296) >>> 0 : ~~+Math.ceil((U - +(~~U >>> 0)) / 4294967296) >>> 0 : 0)), a >>> 0;\n },\n s: yb,\n t: zb,\n z: function(a, b, c) {\n function e(m) {\n return (m = m.toTimeString().match(/\\(([A-Za-z ]+)\\)$/)) ? m[1] : "GMT";\n }\n a >>>= 0;\n b >>>= 0;\n c >>>= 0;\n var f = (/* @__PURE__ */ new Date()).getFullYear(), h = new Date(f, 0, 1), k = new Date(f, 6, 1);\n f = h.getTimezoneOffset();\n var q = k.getTimezoneOffset(), n = Math.max(f, q);\n A()[a >> 2 >>> 0] = 60 * n;\n z()[b >> 2 >>> 0] = Number(f != q);\n a = e(h);\n b = e(k);\n a = Bb(a);\n b = Bb(b);\n q < f ? (A()[c >> 2 >>> 0] = a, A()[c + 4 >> 2 >>> 0] = b) : (A()[c >> 2 >>> 0] = b, A()[c + 4 >> 2 >>> 0] = a);\n },\n d: () => {\n M("");\n },\n c: function(a, b, c) {\n a >>>= 0;\n b = Db(b >>> 0, c >>> 0);\n return Ha[a].apply(null, b);\n },\n k: function(a, b, c) {\n a >>>= 0;\n b = Db(b >>> 0, c >>> 0);\n return Ha[a].apply(null, b);\n },\n m: function() {\n },\n j: function() {\n return Date.now();\n },\n W: () => {\n xa += 1;\n throw "unwind";\n },\n B: function() {\n return 4294901760;\n },\n f: () => performance.timeOrigin + performance.now(),\n g: function() {\n return G ? (init_os(), __toCommonJS(os_exports)).cpus().length : navigator.hardwareConcurrency;\n },\n L: function(a, b, c, e) {\n W.Pb = b >>> 0;\n Ib.length = c;\n b = e >>> 0 >> 3;\n for (e = 0; e < c; e++)\n Ib[e] = da()[b + e >>> 0];\n return (0 > a ? Ha[-a - 1] : qc[a]).apply(null, Ib);\n },\n y: function(a) {\n a >>>= 0;\n var b = v().length;\n if (a <= b || 4294901760 < a)\n return false;\n for (var c = 1; 4 >= c; c *= 2) {\n var e = b * (1 + 0.2 / c);\n e = Math.min(e, a + 100663296);\n var f = Math;\n e = Math.max(a, e);\n a: {\n f = f.min.call(f, 4294901760, e + (65536 - e % 65536) % 65536) - l.buffer.byteLength + 65535 >>> 16;\n try {\n l.grow(f);\n t();\n var h = 1;\n break a;\n } catch (k) {\n }\n h = void 0;\n }\n if (h)\n return true;\n }\n return false;\n },\n P: Mb,\n Q: Nb,\n H: Qa,\n h: Ob,\n o: Pb,\n x: Qb,\n n: Tb,\n a: l || B.wasmMemory,\n J: Yb,\n e: function(a, b, c, e) {\n return Yb(a >>> 0, b >>> 0, c >>> 0, e >>> 0);\n }\n };\n (function() {\n function a(c, e) {\n c = c.exports;\n c = $b(c);\n N = c = uc(c);\n W.mb.push(N.Da);\n va.unshift(N.Y);\n sa = e;\n Ba();\n return c;\n }\n var b = { a: tc };\n Aa();\n if (B.instantiateWasm)\n try {\n return B.instantiateWasm(b, a);\n } catch (c) {\n K("Module.instantiateWasm callback failed with error: " + c), C(c);\n }\n Ga(b, function(c) {\n a(c.instance, c.module);\n }).catch(C);\n return {};\n })();\n B._OrtInit = (a, b) => (B._OrtInit = N.Z)(a, b);\n B._OrtGetLastError = (a, b) => (B._OrtGetLastError = N._)(a, b);\n B._OrtCreateSessionOptions = (a, b, c, e, f, h, k, q, n, m) => (B._OrtCreateSessionOptions = N.$)(a, b, c, e, f, h, k, q, n, m);\n B._OrtAppendExecutionProvider = (a, b) => (B._OrtAppendExecutionProvider = N.aa)(a, b);\n B._OrtAddFreeDimensionOverride = (a, b, c) => (B._OrtAddFreeDimensionOverride = N.ba)(a, b, c);\n B._OrtAddSessionConfigEntry = (a, b, c) => (B._OrtAddSessionConfigEntry = N.ca)(a, b, c);\n B._OrtReleaseSessionOptions = (a) => (B._OrtReleaseSessionOptions = N.da)(a);\n B._OrtCreateSession = (a, b, c) => (B._OrtCreateSession = N.ea)(a, b, c);\n B._OrtReleaseSession = (a) => (B._OrtReleaseSession = N.fa)(a);\n B._OrtGetInputOutputCount = (a, b, c) => (B._OrtGetInputOutputCount = N.ga)(a, b, c);\n B._OrtGetInputName = (a, b) => (B._OrtGetInputName = N.ha)(a, b);\n B._OrtGetOutputName = (a, b) => (B._OrtGetOutputName = N.ia)(a, b);\n B._OrtFree = (a) => (B._OrtFree = N.ja)(a);\n B._OrtCreateTensor = (a, b, c, e, f, h) => (B._OrtCreateTensor = N.ka)(a, b, c, e, f, h);\n B._OrtGetTensorData = (a, b, c, e, f) => (B._OrtGetTensorData = N.la)(a, b, c, e, f);\n B._OrtReleaseTensor = (a) => (B._OrtReleaseTensor = N.ma)(a);\n B._OrtCreateRunOptions = (a, b, c, e) => (B._OrtCreateRunOptions = N.na)(a, b, c, e);\n B._OrtAddRunConfigEntry = (a, b, c) => (B._OrtAddRunConfigEntry = N.oa)(a, b, c);\n B._OrtReleaseRunOptions = (a) => (B._OrtReleaseRunOptions = N.pa)(a);\n B._OrtCreateBinding = (a) => (B._OrtCreateBinding = N.qa)(a);\n B._OrtBindInput = (a, b, c) => (B._OrtBindInput = N.ra)(a, b, c);\n B._OrtBindOutput = (a, b, c, e) => (B._OrtBindOutput = N.sa)(a, b, c, e);\n B._OrtClearBoundOutputs = (a) => (B._OrtClearBoundOutputs = N.ta)(a);\n B._OrtReleaseBinding = (a) => (B._OrtReleaseBinding = N.ua)(a);\n B._OrtRunWithBinding = (a, b, c, e, f) => (B._OrtRunWithBinding = N.va)(a, b, c, e, f);\n B._OrtRun = (a, b, c, e, f, h, k, q) => (B._OrtRun = N.wa)(a, b, c, e, f, h, k, q);\n B._OrtEndProfiling = (a) => (B._OrtEndProfiling = N.xa)(a);\n B._JsepOutput = (a, b, c) => (B._JsepOutput = N.ya)(a, b, c);\n B._JsepGetNodeName = (a) => (B._JsepGetNodeName = N.za)(a);\n var Sa = B._pthread_self = () => (Sa = B._pthread_self = N.Aa)(), Ab = B._malloc = (a) => (Ab = B._malloc = N.Ba)(a), oc = B._free = (a) => (oc = B._free = N.Ca)(a);\n B.__emscripten_tls_init = () => (B.__emscripten_tls_init = N.Da)();\n var rc = B.__emscripten_thread_init = (a, b, c, e, f, h) => (rc = B.__emscripten_thread_init = N.Fa)(a, b, c, e, f, h);\n B.__emscripten_thread_crashed = () => (B.__emscripten_thread_crashed = N.Ga)();\n var Hb = (a, b, c, e) => (Hb = N.Ha)(a, b, c, e), Ra = (a) => (Ra = N.Ia)(a), Ya = B.__emscripten_thread_exit = (a) => (Ya = B.__emscripten_thread_exit = N.Ja)(a), vb = B.__emscripten_check_mailbox = () => (vb = B.__emscripten_check_mailbox = N.Ka)(), sc = (a) => (sc = N.La)(a), Va = (a, b) => (Va = N.Ma)(a, b), Eb = () => (Eb = N.Na)(), Wa = (a) => (Wa = N.Oa)(a), Gb = (a) => (Gb = N.Pa)(a), Xa = B.dynCall_ii = (a, b) => (Xa = B.dynCall_ii = N.Ra)(a, b), mc = (a) => (mc = N.Sa)(a), bc = () => (bc = N.Ta)(), lc = (a) => (lc = N.Ua)(a), nc = () => (nc = N.Va)();\n B.___start_em_js = 917678;\n B.___stop_em_js = 917839;\n function uc(a) {\n a = Object.assign({}, a);\n var b = (e) => () => e() >>> 0, c = (e) => (f) => e(f) >>> 0;\n a.__errno_location = b(a.__errno_location);\n a.pthread_self = b(a.pthread_self);\n a.malloc = c(a.malloc);\n a.stackSave = b(a.stackSave);\n a.stackAlloc = c(a.stackAlloc);\n return a;\n }\n B.keepRuntimeAlive = ya;\n B.wasmMemory = l;\n B.stackAlloc = Gb;\n B.stackSave = Eb;\n B.stackRestore = Wa;\n B.UTF8ToString = V;\n B.stringToUTF8 = hb;\n B.lengthBytesUTF8 = fb;\n B.ExitStatus = Ia;\n B.PThread = W;\n var vc;\n S = function wc() {\n vc || xc();\n vc || (S = wc);\n };\n function xc() {\n function a() {\n if (!vc && (vc = true, B.calledRun = true, !P)) {\n H || Ua(va);\n fa(B);\n if (B.onRuntimeInitialized)\n B.onRuntimeInitialized();\n if (!H) {\n if (B.postRun)\n for ("function" == typeof B.postRun && (B.postRun = [B.postRun]); B.postRun.length; ) {\n var b = B.postRun.shift();\n wa.unshift(b);\n }\n Ua(wa);\n }\n }\n }\n if (!(0 < R))\n if (H)\n fa(B), H || Ua(va), startWorker(B);\n else {\n if (B.preRun)\n for ("function" == typeof B.preRun && (B.preRun = [B.preRun]); B.preRun.length; )\n ua.unshift(B.preRun.shift());\n Ua(ua);\n 0 < R || (B.setStatus ? (B.setStatus("Running..."), setTimeout(function() {\n setTimeout(\n function() {\n B.setStatus("");\n },\n 1\n );\n a();\n }, 1)) : a());\n }\n }\n if (B.preInit)\n for ("function" == typeof B.preInit && (B.preInit = [B.preInit]); 0 < B.preInit.length; )\n B.preInit.pop()();\n xc();\n return moduleArg.ready;\n };\n })();\n if (typeof exports === "object" && typeof module === "object")\n module.exports = ortWasmThreaded;\n else if (typeof define === "function" && define["amd"])\n define([], () => ortWasmThreaded);\n }\n });\n\n // web/lib/wasm/binding/ort-wasm-threaded.worker.js\n var require_ort_wasm_threaded_worker = __commonJS({\n "web/lib/wasm/binding/ort-wasm-threaded.worker.js"(exports, module) {\n module.exports = \'"use strict";var Module={};var ENVIRONMENT_IS_NODE=typeof process=="object"&&typeof process.versions=="object"&&typeof process.versions.node=="string";if(ENVIRONMENT_IS_NODE){var nodeWorkerThreads=require("worker_threads");var parentPort=nodeWorkerThreads.parentPort;parentPort.on("message",data=>onmessage({data:data}));var fs=require("fs");Object.assign(global,{self:global,require:require,Module:Module,location:{href:__filename},Worker:nodeWorkerThreads.Worker,importScripts:f=>(0,eval)(fs.readFileSync(f,"utf8")+"//# sourceURL="+f),postMessage:msg=>parentPort.postMessage(msg),performance:global.performance||{now:Date.now}})}var initializedJS=false;function threadPrintErr(){var text=Array.prototype.slice.call(arguments).join(" ");if(ENVIRONMENT_IS_NODE){fs.writeSync(2,text+"\\\\n");return}console.error(text)}function threadAlert(){var text=Array.prototype.slice.call(arguments).join(" ");postMessage({cmd:"alert",text:text,threadId:Module["_pthread_self"]()})}var err=threadPrintErr;self.alert=threadAlert;Module["instantiateWasm"]=(info,receiveInstance)=>{var module=Module["wasmModule"];Module["wasmModule"]=null;var instance=new WebAssembly.Instance(module,info);return receiveInstance(instance)};self.onunhandledrejection=e=>{throw e.reason??e};function handleMessage(e){try{if(e.data.cmd==="load"){let messageQueue=[];self.onmessage=e=>messageQueue.push(e);self.startWorker=instance=>{Module=instance;postMessage({"cmd":"loaded"});for(let msg of messageQueue){handleMessage(msg)}self.onmessage=handleMessage};Module["wasmModule"]=e.data.wasmModule;for(const handler of e.data.handlers){Module[handler]=(...args)=>{postMessage({cmd:"callHandler",handler:handler,args:args})}}Module["wasmMemory"]=e.data.wasmMemory;Module["buffer"]=Module["wasmMemory"].buffer;Module["ENVIRONMENT_IS_PTHREAD"]=true;if(typeof e.data.urlOrBlob=="string"){importScripts(e.data.urlOrBlob)}else{var objectUrl=URL.createObjectURL(e.data.urlOrBlob);importScripts(objectUrl);URL.revokeObjectURL(objectUrl)}ortWasmThreaded(Module)}else if(e.data.cmd==="run"){Module["__emscripten_thread_init"](e.data.pthread_ptr,/*isMainBrowserThread=*/0,/*isMainRuntimeThread=*/0,/*canBlock=*/1);Module["__emscripten_thread_mailbox_await"](e.data.pthread_ptr);Module["establishStackSpace"]();Module["PThread"].receiveObjectTransfer(e.data);Module["PThread"].threadInitTLS();if(!initializedJS){initializedJS=true}try{Module["invokeEntryPoint"](e.data.start_routine,e.data.arg)}catch(ex){if(ex!="unwind"){throw ex}}}else if(e.data.cmd==="cancel"){if(Module["_pthread_self"]()){Module["__emscripten_thread_exit"](-1)}}else if(e.data.target==="setimmediate"){}else if(e.data.cmd==="checkMailbox"){if(initializedJS){Module["checkMailbox"]()}}else if(e.data.cmd){err("worker.js received unknown command "+e.data.cmd);err(e.data)}}catch(ex){if(Module["__emscripten_thread_crashed"]){Module["__emscripten_thread_crashed"]()}throw ex}}self.onmessage=handleMessage;\\n\';\n }\n });\n\n // web/lib/wasm/wasm-common.ts\n var tensorDataTypeStringToEnum, tensorDataTypeEnumToString, getTensorElementSize, tensorTypeToTypedArrayConstructor, logLevelStringToEnum, isGpuBufferSupportedType, dataLocationStringToEnum;\n var init_wasm_common = __esm({\n "web/lib/wasm/wasm-common.ts"() {\n "use strict";\n tensorDataTypeStringToEnum = (type) => {\n switch (type) {\n case "int8":\n return 3 /* int8 */;\n case "uint8":\n return 2 /* uint8 */;\n case "bool":\n return 9 /* bool */;\n case "int16":\n return 5 /* int16 */;\n case "uint16":\n return 4 /* uint16 */;\n case "int32":\n return 6 /* int32 */;\n case "uint32":\n return 12 /* uint32 */;\n case "float16":\n return 10 /* float16 */;\n case "float32":\n return 1 /* float */;\n case "float64":\n return 11 /* double */;\n case "string":\n return 8 /* string */;\n case "int64":\n return 7 /* int64 */;\n case "uint64":\n return 13 /* uint64 */;\n default:\n throw new Error(`unsupported data type: ${type}`);\n }\n };\n tensorDataTypeEnumToString = (typeProto) => {\n switch (typeProto) {\n case 3 /* int8 */:\n return "int8";\n case 2 /* uint8 */:\n return "uint8";\n case 9 /* bool */:\n return "bool";\n case 5 /* int16 */:\n return "int16";\n case 4 /* uint16 */:\n return "uint16";\n case 6 /* int32 */:\n return "int32";\n case 12 /* uint32 */:\n return "uint32";\n case 10 /* float16 */:\n return "float16";\n case 1 /* float */:\n return "float32";\n case 11 /* double */:\n return "float64";\n case 8 /* string */:\n return "string";\n case 7 /* int64 */:\n return "int64";\n case 13 /* uint64 */:\n return "uint64";\n default:\n throw new Error(`unsupported data type: ${typeProto}`);\n }\n };\n getTensorElementSize = (dateType) => [void 0, 4, 1, 1, 2, 2, 4, 8, void 0, 1, 2, 8, 4, 8, void 0, void 0, void 0][dateType];\n tensorTypeToTypedArrayConstructor = (type) => {\n switch (type) {\n case "float16":\n return typeof Float16Array !== "undefined" && Float16Array.from ? Float16Array : Uint16Array;\n case "float32":\n return Float32Array;\n case "uint8":\n return Uint8Array;\n case "int8":\n return Int8Array;\n case "uint16":\n return Uint16Array;\n case "int16":\n return Int16Array;\n case "int32":\n return Int32Array;\n case "bool":\n return Uint8Array;\n case "float64":\n return Float64Array;\n case "uint32":\n return Uint32Array;\n case "int64":\n return BigInt64Array;\n case "uint64":\n return BigUint64Array;\n default:\n throw new Error(`unsupported type: ${type}`);\n }\n };\n logLevelStringToEnum = (logLevel) => {\n switch (logLevel) {\n case "verbose":\n return 0;\n case "info":\n return 1;\n case "warning":\n return 2;\n case "error":\n return 3;\n case "fatal":\n return 4;\n default:\n throw new Error(`unsupported logging level: ${logLevel}`);\n }\n };\n isGpuBufferSupportedType = (type) => type === "float32" || type === "float16" || type === "int32" || type === "int64" || type === "uint32" || type === "uint8" || type === "bool";\n dataLocationStringToEnum = (location) => {\n switch (location) {\n case "none":\n return 0;\n case "cpu":\n return 1;\n case "cpu-pinned":\n return 2;\n case "texture":\n return 3;\n case "gpu-buffer":\n return 4;\n default:\n throw new Error(`unsupported data location: ${location}`);\n }\n };\n }\n });\n\n // common/dist/esm/backend-impl.js\n var init_backend_impl = __esm({\n "common/dist/esm/backend-impl.js"() {\n "use strict";\n }\n });\n\n // common/dist/esm/backend.js\n var init_backend = __esm({\n "common/dist/esm/backend.js"() {\n "use strict";\n init_backend_impl();\n }\n });\n\n // common/dist/esm/version.js\n var version;\n var init_version = __esm({\n "common/dist/esm/version.js"() {\n "use strict";\n version = "1.18.0";\n }\n });\n\n // common/dist/esm/env-impl.js\n var logLevelValue, env;\n var init_env_impl = __esm({\n "common/dist/esm/env-impl.js"() {\n "use strict";\n init_version();\n logLevelValue = "warning";\n env = {\n wasm: {},\n webgl: {},\n webgpu: {},\n versions: { common: version },\n set logLevel(value) {\n if (value === void 0) {\n return;\n }\n if (typeof value !== "string" || ["verbose", "info", "warning", "error", "fatal"].indexOf(value) === -1) {\n throw new Error(`Unsupported logging level: ${value}`);\n }\n logLevelValue = value;\n },\n get logLevel() {\n return logLevelValue;\n }\n };\n Object.defineProperty(env, "logLevel", { enumerable: true });\n }\n });\n\n // common/dist/esm/env.js\n var env2;\n var init_env = __esm({\n "common/dist/esm/env.js"() {\n "use strict";\n init_env_impl();\n env2 = env;\n }\n });\n\n // common/dist/esm/tensor-conversion-impl.js\n var init_tensor_conversion_impl = __esm({\n "common/dist/esm/tensor-conversion-impl.js"() {\n "use strict";\n }\n });\n\n // common/dist/esm/tensor-factory-impl.js\n var init_tensor_factory_impl = __esm({\n "common/dist/esm/tensor-factory-impl.js"() {\n "use strict";\n init_tensor_impl();\n }\n });\n\n // common/dist/esm/tensor-impl-type-mapping.js\n var init_tensor_impl_type_mapping = __esm({\n "common/dist/esm/tensor-impl-type-mapping.js"() {\n "use strict";\n }\n });\n\n // common/dist/esm/tensor-utils-impl.js\n var init_tensor_utils_impl = __esm({\n "common/dist/esm/tensor-utils-impl.js"() {\n "use strict";\n init_tensor_impl();\n }\n });\n\n // common/dist/esm/tensor-impl.js\n var init_tensor_impl = __esm({\n "common/dist/esm/tensor-impl.js"() {\n "use strict";\n init_tensor_conversion_impl();\n init_tensor_factory_impl();\n init_tensor_impl_type_mapping();\n init_tensor_utils_impl();\n }\n });\n\n // common/dist/esm/tensor.js\n var init_tensor = __esm({\n "common/dist/esm/tensor.js"() {\n "use strict";\n init_tensor_impl();\n }\n });\n\n // common/dist/esm/trace.js\n var TRACE, TRACE_FUNC, TRACE_FUNC_BEGIN, TRACE_FUNC_END;\n var init_trace = __esm({\n "common/dist/esm/trace.js"() {\n "use strict";\n init_env_impl();\n TRACE = (deviceType, label) => {\n if (typeof env.trace === "undefined" ? !env.wasm.trace : !env.trace) {\n return;\n }\n console.timeStamp(`${deviceType}::ORT::${label}`);\n };\n TRACE_FUNC = (msg, extraMsg) => {\n const stack = new Error().stack?.split(/\\r\\n|\\r|\\n/g) || [];\n let hasTraceFunc = false;\n for (let i = 0; i < stack.length; i++) {\n if (hasTraceFunc && !stack[i].includes("TRACE_FUNC")) {\n let label = `FUNC_${msg}::${stack[i].trim().split(" ")[1]}`;\n if (extraMsg) {\n label += `::${extraMsg}`;\n }\n TRACE("CPU", label);\n return;\n }\n if (stack[i].includes("TRACE_FUNC")) {\n hasTraceFunc = true;\n }\n }\n };\n TRACE_FUNC_BEGIN = (extraMsg) => {\n if (typeof env.trace === "undefined" ? !env.wasm.trace : !env.trace) {\n return;\n }\n TRACE_FUNC("BEGIN", extraMsg);\n };\n TRACE_FUNC_END = (extraMsg) => {\n if (typeof env.trace === "undefined" ? !env.wasm.trace : !env.trace) {\n return;\n }\n TRACE_FUNC("END", extraMsg);\n };\n }\n });\n\n // common/dist/esm/inference-session-impl.js\n var init_inference_session_impl = __esm({\n "common/dist/esm/inference-session-impl.js"() {\n "use strict";\n init_backend_impl();\n init_tensor();\n init_trace();\n }\n });\n\n // common/dist/esm/inference-session.js\n var init_inference_session = __esm({\n "common/dist/esm/inference-session.js"() {\n "use strict";\n init_inference_session_impl();\n }\n });\n\n // common/dist/esm/tensor-conversion.js\n var init_tensor_conversion = __esm({\n "common/dist/esm/tensor-conversion.js"() {\n "use strict";\n }\n });\n\n // common/dist/esm/tensor-factory.js\n var init_tensor_factory = __esm({\n "common/dist/esm/tensor-factory.js"() {\n "use strict";\n }\n });\n\n // common/dist/esm/onnx-model.js\n var init_onnx_model = __esm({\n "common/dist/esm/onnx-model.js"() {\n "use strict";\n }\n });\n\n // common/dist/esm/onnx-value.js\n var init_onnx_value = __esm({\n "common/dist/esm/onnx-value.js"() {\n "use strict";\n }\n });\n\n // common/dist/esm/training-session-impl.js\n var init_training_session_impl = __esm({\n "common/dist/esm/training-session-impl.js"() {\n "use strict";\n init_backend_impl();\n init_tensor();\n }\n });\n\n // common/dist/esm/training-session.js\n var init_training_session = __esm({\n "common/dist/esm/training-session.js"() {\n "use strict";\n init_training_session_impl();\n }\n });\n\n // common/dist/esm/index.js\n var init_esm = __esm({\n "common/dist/esm/index.js"() {\n "use strict";\n init_backend();\n init_env();\n init_inference_session();\n init_tensor();\n init_tensor_conversion();\n init_tensor_factory();\n init_trace();\n init_onnx_model();\n init_onnx_value();\n init_training_session();\n }\n });\n\n // web/lib/wasm/jsep/log.ts\n var logLevelPrefix, doLog, configLogLevel, debug, configureLogger, LOG, LOG_DEBUG;\n var init_log = __esm({\n "web/lib/wasm/jsep/log.ts"() {\n "use strict";\n init_wasm_common();\n logLevelPrefix = ["V", "I", "W", "E", "F"];\n doLog = (level, message) => {\n console.log(`[${logLevelPrefix[level]},${(/* @__PURE__ */ new Date()).toISOString()}]${message}`);\n };\n configureLogger = ($configLogLevel, $debug) => {\n configLogLevel = $configLogLevel;\n debug = $debug;\n };\n LOG = (logLevel, msg) => {\n const messageLevel = logLevelStringToEnum(logLevel);\n const configLevel = logLevelStringToEnum(configLogLevel);\n if (messageLevel >= configLevel) {\n doLog(messageLevel, typeof msg === "function" ? msg() : msg);\n }\n };\n LOG_DEBUG = (...args) => {\n if (debug) {\n LOG(...args);\n }\n };\n }\n });\n\n // web/lib/wasm/jsep/tensor-view.ts\n var createView;\n var init_tensor_view = __esm({\n "web/lib/wasm/jsep/tensor-view.ts"() {\n "use strict";\n init_wasm_common();\n createView = (dataBuffer, type) => new (tensorTypeToTypedArrayConstructor(type))(dataBuffer);\n }\n });\n\n // web/lib/wasm/jsep/webgpu/types.ts\n var init_types = __esm({\n "web/lib/wasm/jsep/webgpu/types.ts"() {\n "use strict";\n }\n });\n\n // web/lib/wasm/jsep/webgpu/gpu-data-manager.ts\n var bucketFreelist, bucketArr, calcNormalizedBufferSize, calcBucketBufferSize, guid, createNewGpuDataId, downloadGpuData, GpuDataManagerImpl, createGpuDataManager;\n var init_gpu_data_manager = __esm({\n "web/lib/wasm/jsep/webgpu/gpu-data-manager.ts"() {\n "use strict";\n init_log();\n init_types();\n bucketFreelist = /* @__PURE__ */ new Map([\n [64, 250],\n [128, 200],\n [256, 200],\n [512, 200],\n [2048, 230],\n [4096, 200],\n [8192, 50],\n [16384, 50],\n [32768, 50],\n [65536, 50],\n [131072, 50],\n [262144, 50],\n [524288, 50],\n [1048576, 50],\n [2097152, 30],\n [4194304, 20],\n [8388608, 10],\n [12582912, 10],\n [16777216, 10],\n [26214400, 15],\n [33554432, 22],\n [44236800, 2],\n [58982400, 6],\n // we don\'t want to cache the bucket sizes below but not caching them\n // results in some major performance hits for models like sd-turbo.\n [67108864, 6],\n [134217728, 6],\n [167772160, 6]\n ]);\n bucketArr = [];\n calcNormalizedBufferSize = (size) => Math.ceil(size / 16) * 16;\n calcBucketBufferSize = (size) => {\n for (let idx = 0; idx < bucketArr.length; idx++) {\n const sizeForBucket = bucketArr[idx];\n if (size <= sizeForBucket) {\n return sizeForBucket;\n }\n }\n return Math.ceil(size / 16) * 16;\n };\n guid = 1;\n createNewGpuDataId = () => guid++;\n downloadGpuData = async (backend, gpuBuffer, originalSize, getTargetBuffer) => {\n const bufferSize = calcNormalizedBufferSize(originalSize);\n const gpuReadBuffer = backend.device.createBuffer(\n // eslint-disable-next-line no-bitwise\n { size: bufferSize, usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ }\n );\n try {\n const commandEncoder = backend.getCommandEncoder();\n backend.endComputePass();\n commandEncoder.copyBufferToBuffer(\n gpuBuffer,\n 0,\n gpuReadBuffer,\n 0,\n bufferSize\n /* size */\n );\n backend.flush();\n await gpuReadBuffer.mapAsync(GPUMapMode.READ);\n const arrayBuffer = gpuReadBuffer.getMappedRange();\n if (getTargetBuffer) {\n const targetBuffer = getTargetBuffer();\n targetBuffer.set(new Uint8Array(arrayBuffer, 0, originalSize));\n return targetBuffer;\n } else {\n return new Uint8Array(arrayBuffer.slice(0, originalSize));\n }\n } finally {\n gpuReadBuffer.destroy();\n }\n };\n GpuDataManagerImpl = class {\n constructor(backend) {\n this.backend = backend;\n this.storageCache = /* @__PURE__ */ new Map();\n this.freeBuffers = /* @__PURE__ */ new Map();\n this.freeUniformBuffers = /* @__PURE__ */ new Map();\n this.buffersForUploadingPending = [];\n this.buffersPending = [];\n this.externalBuffers = /* @__PURE__ */ new Map();\n this.capturedPendingBuffers = /* @__PURE__ */ new Map();\n for (const [key] of bucketFreelist) {\n bucketArr.push(key);\n this.freeBuffers.set(key, []);\n this.freeUniformBuffers.set(key, []);\n }\n }\n upload(id, data) {\n const srcArrayBuffer = data.buffer;\n const srcOffset = data.byteOffset;\n const srcLength = data.byteLength;\n const size = calcNormalizedBufferSize(srcLength);\n const gpuDataCache = this.storageCache.get(id);\n if (!gpuDataCache) {\n throw new Error("gpu data for uploading does not exist");\n }\n if (gpuDataCache.originalSize !== srcLength) {\n throw new Error(`inconsistent data size. gpu data size=${gpuDataCache.originalSize}, data size=${srcLength}`);\n }\n const gpuBufferForUploading = this.backend.device.createBuffer(\n // eslint-disable-next-line no-bitwise\n { mappedAtCreation: true, size, usage: GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC }\n );\n const arrayBuffer = gpuBufferForUploading.getMappedRange();\n new Uint8Array(arrayBuffer).set(new Uint8Array(srcArrayBuffer, srcOffset, srcLength));\n gpuBufferForUploading.unmap();\n const commandEncoder = this.backend.getCommandEncoder();\n this.backend.endComputePass();\n commandEncoder.copyBufferToBuffer(gpuBufferForUploading, 0, gpuDataCache.gpuData.buffer, 0, size);\n LOG_DEBUG("verbose", () => `[WebGPU] GpuDataManager.upload(id=${id})`);\n this.buffersForUploadingPending.push(gpuBufferForUploading);\n }\n memcpy(sourceId, destinationId) {\n const sourceGpuDataCache = this.storageCache.get(sourceId);\n if (!sourceGpuDataCache) {\n throw new Error("source gpu data for memcpy does not exist");\n }\n const destinationGpuDataCache = this.storageCache.get(destinationId);\n if (!destinationGpuDataCache) {\n throw new Error("destination gpu data for memcpy does not exist");\n }\n if (sourceGpuDataCache.originalSize !== destinationGpuDataCache.originalSize) {\n throw new Error("inconsistent source and destination gpu data size");\n }\n const size = calcNormalizedBufferSize(sourceGpuDataCache.originalSize);\n const commandEncoder = this.backend.getCommandEncoder();\n this.backend.endComputePass();\n commandEncoder.copyBufferToBuffer(\n sourceGpuDataCache.gpuData.buffer,\n 0,\n destinationGpuDataCache.gpuData.buffer,\n 0,\n size\n );\n }\n registerExternalBuffer(buffer, originalSize, previousBuffer) {\n let id;\n if (previousBuffer) {\n id = this.externalBuffers.get(previousBuffer);\n if (id === void 0) {\n throw new Error("previous buffer is not registered");\n }\n if (buffer === previousBuffer) {\n LOG_DEBUG(\n "verbose",\n () => `[WebGPU] GpuDataManager.registerExternalBuffer(size=${originalSize}) => id=${id}, buffer is the same, skip.`\n );\n return id;\n } else if (this.backend.capturedCommandList.has(this.backend.currentSessionId)) {\n throw new Error(`Registering a different external buffer under graph capture mode is not supported yet.\n Please use the previous external buffer!`);\n }\n this.externalBuffers.delete(previousBuffer);\n } else {\n id = createNewGpuDataId();\n }\n this.storageCache.set(id, { gpuData: { id, type: 0 /* default */, buffer }, originalSize });\n this.externalBuffers.set(buffer, id);\n LOG_DEBUG(\n "verbose",\n () => `[WebGPU] GpuDataManager.registerExternalBuffer(size=${originalSize}) => id=${id}, registered.`\n );\n return id;\n }\n unregisterExternalBuffer(buffer) {\n const id = this.externalBuffers.get(buffer);\n if (id !== void 0) {\n this.storageCache.delete(id);\n this.externalBuffers.delete(buffer);\n LOG_DEBUG("verbose", () => `[WebGPU] GpuDataManager.unregisterExternalBuffer() => id=${id}`);\n }\n }\n // eslint-disable-next-line no-bitwise\n create(size, usage = GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST) {\n const bufferSize = calcBucketBufferSize(size);\n let gpuBuffer;\n const isStorage = (usage & GPUBufferUsage.STORAGE) === GPUBufferUsage.STORAGE;\n const isUniform = (usage & GPUBufferUsage.UNIFORM) === GPUBufferUsage.UNIFORM;\n if (isStorage || isUniform) {\n const freeBuffers = isStorage ? this.freeBuffers : this.freeUniformBuffers;\n const buffers = freeBuffers.get(bufferSize);\n if (!buffers) {\n gpuBuffer = this.backend.device.createBuffer({ size: bufferSize, usage });\n } else {\n if (buffers.length > 0) {\n gpuBuffer = buffers.pop();\n } else {\n gpuBuffer = this.backend.device.createBuffer({ size: bufferSize, usage });\n }\n }\n } else {\n gpuBuffer = this.backend.device.createBuffer({ size: bufferSize, usage });\n }\n const gpuData = { id: createNewGpuDataId(), type: 0 /* default */, buffer: gpuBuffer };\n this.storageCache.set(gpuData.id, { gpuData, originalSize: size });\n LOG_DEBUG("verbose", () => `[WebGPU] GpuDataManager.create(size=${size}) => id=${gpuData.id}`);\n return gpuData;\n }\n get(id) {\n return this.storageCache.get(id)?.gpuData;\n }\n release(id) {\n const cachedData = this.storageCache.get(id);\n if (!cachedData) {\n throw new Error("releasing data does not exist");\n }\n LOG_DEBUG("verbose", () => `[WebGPU] GpuDataManager.release(id=${id}), gpuDataId=${cachedData.gpuData.id}`);\n this.storageCache.delete(id);\n this.buffersPending.push(cachedData.gpuData.buffer);\n return cachedData.originalSize;\n }\n async download(id, getTargetBuffer) {\n const cachedData = this.storageCache.get(id);\n if (!cachedData) {\n throw new Error("data does not exist");\n }\n await downloadGpuData(this.backend, cachedData.gpuData.buffer, cachedData.originalSize, getTargetBuffer);\n }\n refreshPendingBuffers() {\n for (const buffer of this.buffersForUploadingPending) {\n buffer.destroy();\n }\n this.buffersForUploadingPending = [];\n if (this.buffersPending.length === 0) {\n return;\n }\n if (this.backend.sessionStatus === "default") {\n for (const buffer of this.buffersPending) {\n const maxInFreeList = bucketFreelist.get(buffer.size);\n if ((buffer.usage & GPUBufferUsage.STORAGE) === GPUBufferUsage.STORAGE) {\n const freelist = this.freeBuffers.get(buffer.size) || [];\n if (maxInFreeList === void 0 || freelist.length >= maxInFreeList) {\n buffer.destroy();\n } else {\n freelist.push(buffer);\n }\n } else if ((buffer.usage & GPUBufferUsage.UNIFORM) === GPUBufferUsage.UNIFORM) {\n const freelist = this.freeUniformBuffers.get(buffer.size) || [];\n if (maxInFreeList === void 0 || freelist.length >= maxInFreeList) {\n buffer.destroy();\n } else {\n freelist.push(buffer);\n }\n } else {\n buffer.destroy();\n }\n }\n this.buffersPending = [];\n } else {\n let capturedBuffers = this.capturedPendingBuffers.get(this.backend.currentSessionId);\n if (!capturedBuffers) {\n capturedBuffers = [];\n this.capturedPendingBuffers.set(this.backend.currentSessionId, capturedBuffers);\n }\n for (const buffer of this.buffersPending) {\n capturedBuffers.push(buffer);\n }\n this.buffersPending = [];\n }\n }\n dispose() {\n this.freeBuffers.forEach((buffers) => {\n buffers.forEach((buffer) => {\n buffer.destroy();\n });\n });\n this.freeUniformBuffers.forEach((buffers) => {\n buffers.forEach((buffer) => {\n buffer.destroy();\n });\n });\n this.storageCache.forEach((storage) => {\n storage.gpuData.buffer.destroy();\n });\n this.capturedPendingBuffers.forEach((buffers) => {\n buffers.forEach((buffer) => {\n buffer.destroy();\n });\n });\n this.storageCache = /* @__PURE__ */ new Map();\n this.freeBuffers = /* @__PURE__ */ new Map();\n this.freeUniformBuffers = /* @__PURE__ */ new Map();\n this.capturedPendingBuffers = /* @__PURE__ */ new Map();\n }\n onReleaseSession(sessionId) {\n const pendingBuffers = this.capturedPendingBuffers.get(sessionId);\n if (pendingBuffers) {\n pendingBuffers.forEach((buffer) => {\n buffer.destroy();\n });\n this.capturedPendingBuffers.delete(sessionId);\n }\n }\n };\n createGpuDataManager = (...args) => new GpuDataManagerImpl(...args);\n }\n });\n\n // web/lib/wasm/jsep/webgpu/attribute-with-cache-key.ts\n var AttributeWithCacheKeyImpl, createAttributeWithCacheKey;\n var init_attribute_with_cache_key = __esm({\n "web/lib/wasm/jsep/webgpu/attribute-with-cache-key.ts"() {\n "use strict";\n AttributeWithCacheKeyImpl = class {\n constructor(attribute) {\n Object.assign(this, attribute);\n }\n get cacheKey() {\n if (!this.key) {\n this.key = Object.getOwnPropertyNames(this).sort().map((name) => `${this[name]}`).join(";");\n }\n return this.key;\n }\n };\n createAttributeWithCacheKey = (attribute) => new AttributeWithCacheKeyImpl(attribute);\n }\n });\n\n // web/lib/wasm/jsep/util.ts\n var MatMulUtil, BroadcastUtil, ShapeUtil, PoolConvUtil, GemmUtil, MIN_CLIP, MAX_CLIP;\n var init_util = __esm({\n "web/lib/wasm/jsep/util.ts"() {\n "use strict";\n MatMulUtil = class {\n /**\n * Calculate the expected shape when matrix multiplication\n * @param a The shape of tensor A. Should be a tuple of 2 positive integers\n * @param b The shape of tensor B. Should be a tuple of 2 positive integers\n * @returns The expected shape of the result, or undefined if N/A\n */\n static calcMatMulShape(a, b) {\n return a[1] !== b[0] ? void 0 : [a[0], b[1]];\n }\n };\n BroadcastUtil = class {\n /**\n * Calculate the expected shape when broadcasting 2 tensors\n * @param a The shape of tensor A. Should be an array of positive integers\n * @param b The shape of tensor B. Should be an array of positive integers\n * @param isMatMul Whether the operation is MatMul\n * @returns The expected shape of the result, or undefined if N/A\n */\n static calcShape(adims, bdims, isMatMul = false) {\n const arank = adims.length;\n const brank = bdims.length;\n if (arank === 0) {\n return bdims;\n }\n if (brank === 0) {\n return adims;\n }\n const crank = Math.max(adims.length, bdims.length);\n const cdims = new Array(crank);\n if (isMatMul) {\n if (arank < 2 || brank < 2) {\n return void 0;\n }\n const cShapeMatMul = MatMulUtil.calcMatMulShape([adims[arank - 2], adims[arank - 1]], [bdims[brank - 2], bdims[brank - 1]]);\n if (cShapeMatMul === void 0) {\n return void 0;\n }\n [cdims[crank - 2], cdims[crank - 1]] = cShapeMatMul;\n }\n for (let i = isMatMul ? 3 : 1; i <= crank; i++) {\n const aLen = arank - i < 0 ? 1 : adims[arank - i];\n const bLen = brank - i < 0 ? 1 : bdims[brank - i];\n if (aLen !== bLen && aLen > 1 && bLen > 1) {\n return void 0;\n }\n const max = Math.max(aLen, bLen);\n if (aLen && bLen) {\n cdims[crank - i] = Math.max(aLen, bLen);\n } else {\n if (max > 1) {\n return void 0;\n }\n cdims[crank - i] = 0;\n }\n }\n return cdims;\n }\n /**\n * Determine if a shape is unidirectional broadcastable to another shape\n * @param shape The input shape\n * @param finalShape The desired shape after broadcasting\n */\n static isValidBroadcast(shape, finalShape) {\n const inputRank = shape.length;\n const finalRank = finalShape.length;\n if (inputRank > finalRank) {\n return false;\n }\n for (let i = 1; i <= inputRank; i++) {\n if (shape[inputRank - i] !== 1 && shape[inputRank - i] !== finalShape[finalRank - i]) {\n return false;\n }\n }\n return true;\n }\n };\n ShapeUtil = class _ShapeUtil {\n /**\n * calculate the size (number of elements)\n */\n static size(dims) {\n return _ShapeUtil.getSizeFromDimensionRange(dims, 0, dims.length);\n }\n /**\n * convert dims corresponding to type change to pack. ex. uint8 data to uint32\n */\n static convertShape(dims, size = 4) {\n const rank = dims.length;\n if (rank === 0) {\n return [];\n }\n const newDims = new Array(rank);\n let i = rank - 1;\n while (i >= 0) {\n if (dims[i] % size === 0) {\n newDims[i] = dims[i] / size;\n break;\n }\n if (size % dims[i] !== 0) {\n throw new Error("cannot convert shape");\n }\n newDims[i] = 1;\n size /= dims[i];\n i--;\n }\n for (i--; i >= 0; i--) {\n newDims[i] = dims[i];\n }\n return newDims;\n }\n /**\n * calculate the size (number of elements) from the given axis (inclusive)\n */\n static sizeFromDimension(dims, axis) {\n if (axis < 0 || axis > dims.length) {\n throw new Error(`invalid dimension of ${axis} for sizeFromDimension as Tensor has ${dims.length} dimensions.`);\n }\n return _ShapeUtil.getSizeFromDimensionRange(dims, axis, dims.length);\n }\n /**\n * calculate the size (number of elements) to the given axis (exclusive)\n */\n static sizeToDimension(dims, axis) {\n if (axis < 0 || axis > dims.length) {\n throw new Error(`invalid dimension of ${axis} for sizeToDimension as Tensor has ${dims.length} dimensions.`);\n }\n return _ShapeUtil.getSizeFromDimensionRange(dims, 0, axis);\n }\n /**\n * calculate the size (number of elements) from and to the given axis [start, end)\n */\n static getSizeFromDimensionRange(dims, start, end) {\n let size = 1;\n for (let i = start; i < end; i++) {\n if (dims[i] < 0) {\n throw new Error(\n // eslint-disable-next-line max-len\n "cannot get valid size from specified dimension range. Most likely the range contains negative values in them."\n );\n }\n size *= dims[i];\n }\n return size;\n }\n static computeStrides(dims) {\n const rank = dims.length;\n if (rank === 0) {\n return [];\n } else if (rank === 1) {\n return [1];\n }\n const strides = new Array(rank);\n strides[rank - 1] = 1;\n strides[rank - 2] = dims[rank - 1];\n for (let i = rank - 3; i >= 0; --i) {\n strides[i] = strides[i + 1] * dims[i + 1];\n }\n return strides;\n }\n /**\n * normailze axis of range [-r, r) into [0, r).\n */\n static normalizeAxis(axis, tensorRank) {\n if (axis < -tensorRank && axis >= tensorRank) {\n throw new Error("unsupported axis for this operation.");\n }\n return axis < 0 ? axis + tensorRank : axis;\n }\n static normalizeAxes(axes, tensorRank) {\n return axes.map((x) => this.normalizeAxis(x, tensorRank ?? axes.length));\n }\n /**\n * Sorts a given array based on the indices in the Perm array\n * Used in Transpose\n * @param a Array to be sorted such as dims or strides\n * @param perm Perm given; if null a will be reversed\n */\n static sortBasedOnPerm(a, perm) {\n if (perm) {\n return perm.map((v) => a[v]);\n } else {\n return a.slice().reverse();\n }\n }\n /**\n * Pads a given shape according to the padding values\n * @param dims shape of the Tensor to be padded\n * @param pad pad values\n */\n static padShape(dims, pad2) {\n const rank = dims.length;\n return dims.map((v, i) => v + pad2[i] + pad2[i + rank]);\n }\n /**\n * Determines if the two shapes are identical\n * @param shape1\n * @param shape2\n */\n static areEqual(shape1, shape2) {\n if (shape1.length !== shape2.length) {\n return false;\n }\n return shape1.every((v, i) => v === shape2[i]);\n }\n };\n PoolConvUtil = class _PoolConvUtil {\n /**\n * Adjust the kernel, strides, pads to correct rank. Set to default value if not present\n * @param isGlobalOperator If true, perform global pooling.\n * @param inputDims The input tensor dimension.\n * @param kernelShape The size of the kernel along each axis.\n * @param strides Stride along each axis.\n * @param dilations Dilation along each axis.\n * @param pads Padding for the beginning and ending along each axis.\n */\n static adjustPoolAttributes(isGlobalOperator, inputDims, kernelShape, strides, dilations, pads) {\n if (!isGlobalOperator && kernelShape.length !== inputDims.length - 2) {\n throw new Error("length of specified kernel shapes should be 2 less than length of input dimensions");\n }\n if (isGlobalOperator) {\n for (let dim = 0; dim < inputDims.length - 2; dim++) {\n if (dim >= kernelShape.length) {\n kernelShape.push(inputDims[dim + 2]);\n } else {\n kernelShape[dim] = inputDims[dim + 2];\n }\n }\n }\n for (let dim = 0; dim < kernelShape.length; dim++) {\n if (dim < strides.length) {\n if (strides[dim] < 0) {\n throw new Error("strides should be greater than or equal to 1");\n }\n } else {\n strides.push(1);\n }\n }\n for (let dim = 0; dim < kernelShape.length; dim++) {\n if (dim < dilations.length) {\n if (dilations[dim] < 0) {\n throw new Error("dilations should be greater than or equal to 1");\n }\n } else {\n dilations.push(1);\n }\n }\n for (let dim = 0; dim < kernelShape.length * 2; dim++) {\n if (dim < pads.length) {\n if (pads[dim] < 0) {\n throw new Error("pad should be greater than or equal to 1");\n }\n } else {\n pads.push(0);\n }\n }\n for (let dim = 0; dim < kernelShape.length; dim++) {\n if (kernelShape[dim] <= 0) {\n throw new Error("kernel shapes need to be greater than 0");\n }\n if (pads[dim] >= kernelShape[dim] || pads[dim + kernelShape.length] >= kernelShape[dim]) {\n throw new Error("pads should be smaller than kernel");\n }\n }\n }\n // adjust pad values based on \'autoPad\' attribute\n static adjustPadsBasedOnAutoPad(inputDims, strides, dilations, kernelShape, pads, isChannelLast, autoPad) {\n if (!autoPad) {\n return;\n }\n if (pads.length !== 2 * (inputDims.length - 2)) {\n throw new Error("length of pads should be twice the length of data dimensions");\n }\n if (strides.length !== inputDims.length - 2) {\n throw new Error("length of strides should be the length of data dimensions");\n }\n if (kernelShape.length !== inputDims.length - 2) {\n throw new Error("length of kernel shapes should be the length of data dimensions");\n }\n for (let dim = 0; dim < inputDims.length - 2; dim++) {\n _PoolConvUtil.adjustPadAndReturnShape(\n inputDims[dim + (isChannelLast ? 1 : 2)],\n strides[dim],\n dilations[dim],\n kernelShape[dim],\n pads,\n dim,\n dim + inputDims.length - 2,\n autoPad\n );\n }\n }\n /**\n * Calculate the output shape for Pool ops based on input attributes. (Should be used only for Pool ops)\n * @param isGlobalOperator If true, perform global pooling.\n * @param inputDims The input tensor dimension. (inputs[0].dims)\n * @param strides Stride along each axis.\n * @param dilations Dilation along each axis.\n * @param kernelShape The size of the kernel along each axis.\n * @param pads Padding for the beginning and ending along each axis.\n * @param autoPad DEPRECATED attribute supported for legacy models. Specifies how to implicitly calculate pads in each\n * dimension. Can take values NOTSET, SAME_UPPER, SAME_LOWER, or VALID.\n */\n static computePoolOutputShape(isGlobalOperator, inputDims, strides, dilations, kernelShape, pads, autoPad) {\n if (inputDims.length <= 0) {\n throw new Error("input shape must be of size greater than 0");\n }\n const outputDims = [inputDims[0], inputDims[1]];\n _PoolConvUtil.computeShapeHelper(\n isGlobalOperator,\n inputDims,\n outputDims,\n strides,\n dilations,\n kernelShape,\n pads,\n autoPad\n );\n return outputDims;\n }\n /**\n * Calculate the output shape for Conv op based on input attributes. (Should be used only for Conv op)\n * @param inputDims The input tensor dimension. (inputs[0].dims)\n * @param filterDims The filter tensor dimension. (inputs[1].dims)\n * @param strides Stride along each axis.\n * @param kernelShape The size of the kernel along each axis.\n * @param pads Padding for the beginning and ending along each axis.\n * @param autoPad DEPRECATED attribute supported for legacy models. Specifies how to implicitly calculate pads in each\n * dimension. Can take values NOTSET, SAME_UPPER, SAME_LOWER, or VALID.\n */\n static computeConvOutputShape(inputDims, filterDims, strides, dilations, kernelShape, pads, autoPad) {\n if (inputDims.length <= 0 || filterDims.length <= 0) {\n throw new Error("invalid input tensor dims or invalid filter tensor dims");\n }\n const outputDims = [inputDims[0], filterDims[0]];\n _PoolConvUtil.computeShapeHelper(false, inputDims, outputDims, strides, dilations, kernelShape, pads, autoPad);\n return outputDims;\n }\n // will compute output shapes for data dimensions ONLY (i.e.) no batch size and channels\n // called by computePoolOutputShape() and computeConvOutputShape()\n // adjust pads based on \'autoPad\' attribute prior to shape computation\n static computeShapeHelper(isGlobalOperator, inputDims, outputDims, strides, dilations, kernelShape, pads, autoPad) {\n if (isGlobalOperator) {\n for (let dim = 0; dim < inputDims.length - 2; dim++) {\n outputDims.push(1);\n }\n } else {\n for (let dim = 0; dim < inputDims.length - 2; dim++) {\n outputDims.push(_PoolConvUtil.adjustPadAndReturnShape(\n inputDims[dim + 2],\n strides[dim],\n dilations[dim],\n kernelShape[dim],\n pads,\n dim,\n dim + inputDims.length - 2,\n autoPad\n ));\n }\n }\n }\n // helper for computeShapeHelper() and adjustPadsBasedOnAutoPad()\n // adjusts pad value for given \'autoPad\' string and computes output shape along a particular dimension\n static adjustPadAndReturnShape(inSize, stride, dilation, kernel, pads, padHeadIndex, padTailIndex, autoPad) {\n const dkernel = dilation * (kernel - 1) + 1;\n if (autoPad && autoPad !== "NOTSET") {\n switch (autoPad) {\n case "VALID":\n pads[padHeadIndex] = 0;\n pads[padTailIndex] = 0;\n return Math.floor((inSize - dkernel) / stride + 1);\n case "SAME_LOWER":\n case "SAME_UPPER":\n if (dilation !== 1) {\n throw new Error("Dilation not supported for SAME_UPPER or SAME_LOWER");\n } else {\n const legacyTargetSize = (inSize + stride - 1) / stride;\n const padNeeded = (legacyTargetSize - 1) * stride + kernel - inSize;\n pads[padHeadIndex] = autoPad === "SAME_LOWER" ? Math.floor((padNeeded + 1) / 2) : Math.floor(padNeeded / 2);\n pads[padTailIndex] = padNeeded - pads[padHeadIndex];\n return Math.floor((inSize + padNeeded - kernel) / stride + 1);\n }\n default:\n throw new Error("Unsupported AutoPad type");\n }\n } else {\n return Math.floor((inSize + pads[padHeadIndex] + pads[padTailIndex] - dkernel) / stride + 1);\n }\n }\n };\n GemmUtil = class {\n // will make sure input shapes are compatible for this op\n // and return back the shape of the output in the form of a tuple\n // will throw exception if the input shapes are not compatible\n static getShapeOfGemmResult(leftShape, transLeft, rightShape, transRight, biasShape) {\n if (leftShape.length !== 2 || rightShape.length !== 2) {\n throw new Error("shape need to be of size 2");\n }\n let M;\n let K;\n let N;\n if (transLeft) {\n M = leftShape[1];\n K = leftShape[0];\n } else {\n M = leftShape[0];\n K = leftShape[1];\n }\n let kDim = -1;\n if (transRight) {\n N = rightShape[0];\n kDim = 1;\n } else {\n N = rightShape[1];\n kDim = 0;\n }\n if (rightShape[kDim] !== K) {\n throw new Error("dimension mismatch");\n }\n if (M <= 0 || N <= 0 || K <= 0) {\n throw new Error("invalid shape specified");\n }\n if (biasShape && !BroadcastUtil.isValidBroadcast(biasShape, [M, N])) {\n throw new Error("gemm: invalid bias shape for broadcast");\n }\n return [M, N, K];\n }\n };\n MIN_CLIP = -34028234663852886e22;\n MAX_CLIP = 34028234663852886e22;\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/common.ts\n var WORKGROUP_SIZE, getWgslMappedType, tensorTypeToWsglStorageType, tensorTypeToWsglValueType, createTensorShapeVariables, getMaxComponents, fillVector, castToF32, sumVector, getElementAt, createIndicesHelper, inputVariable, outputVariable, internalVariable, ShaderHelperImpl, createShaderHelper, getBroadcastDims;\n var init_common = __esm({\n "web/lib/wasm/jsep/webgpu/ops/common.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n WORKGROUP_SIZE = 64;\n getWgslMappedType = (type, components) => {\n if (components === 3) {\n throw new Error("vec3 has same alignment as vec4, use vec4 instead");\n }\n switch (type) {\n case 10 /* float16 */:\n return components > 1 ? `vec${components}` : "f16";\n case 1 /* float */:\n return components > 1 ? `vec${components}` : "f32";\n case 6 /* int32 */:\n return components > 1 ? `vec${components}` : "i32";\n case 12 /* uint32 */:\n return components > 1 ? `vec${components}` : "u32";\n case 7 /* int64 */:\n if (components > 1) {\n throw new Error("currently not supported vecX of uint64 yet");\n }\n return ["vec2", "i32"];\n case 13 /* uint64 */:\n if (components > 1) {\n throw new Error("currently not supported vecX of uint64 yet");\n }\n return ["vec2", "u32"];\n case 9 /* bool */:\n if (components !== 4) {\n throw new Error("bool must be vec4");\n }\n return ["u32", "vec4"];\n default:\n throw new Error(`Unknown data type: ${type}`);\n }\n };\n tensorTypeToWsglStorageType = (type, components = 1) => {\n const mappedType = getWgslMappedType(type, components);\n return typeof mappedType === "string" ? mappedType : mappedType[0];\n };\n tensorTypeToWsglValueType = (type, components = 1) => {\n const mappedType = getWgslMappedType(type, components);\n return typeof mappedType === "string" ? mappedType : mappedType[1];\n };\n createTensorShapeVariables = (...dims) => {\n const programUniforms = [];\n dims.forEach((dim) => {\n if (dim.length !== 0) {\n programUniforms.push(\n { type: 12 /* uint32 */, data: dim },\n { type: 12 /* uint32 */, data: ShapeUtil.computeStrides(dim) }\n );\n }\n });\n return programUniforms;\n };\n getMaxComponents = (size) => {\n if (size % 4 === 0) {\n return 4;\n } else if (size % 2 === 0) {\n return 2;\n }\n return 1;\n };\n fillVector = (dataType = "f32", components, value = "0") => {\n if (!components || components === 1) {\n return `${dataType}(${value})`;\n }\n return `vec${components}<${dataType}>(${value})`;\n };\n castToF32 = (dataType, components, value) => {\n if (dataType === "f32") {\n return value;\n }\n if (components === 1) {\n return `f32(${value})`;\n }\n return `vec${components}(${value})`;\n };\n sumVector = (name, components) => {\n if (components === 4) {\n return `(${name}.x + ${name}.y + ${name}.z + ${name}.w)`;\n } else if (components === 2) {\n return `(${name}.x + ${name}.y)`;\n } else if (components === 3) {\n return `(${name}.x + ${name}.y + ${name}.z)`;\n }\n return name;\n };\n getElementAt = (name, index, length, type) => {\n if (name.startsWith("uniforms.") && length > 4) {\n if (typeof index === "string") {\n if (type === "f16") {\n return `${name}[(${index}) / 8][(${index}) % 8 / 4][(${index}) % 8 % 4]`;\n } else {\n return `${name}[(${index}) / 4][(${index}) % 4]`;\n }\n } else {\n if (type === "f16") {\n return `${name}[${Math.floor(index / 8)}][${Math.floor(index % 8 / 4)}][${index % 8 % 4}]`;\n } else {\n return `${name}[${Math.floor(index / 4)}][${index % 4}]`;\n }\n }\n } else {\n return length > 1 ? `${name}[${index}]` : name;\n }\n };\n createIndicesHelper = (name, tensorType, shapeOrRank, usage, components) => {\n const useUniform = typeof shapeOrRank === "number";\n const rank = useUniform ? shapeOrRank : shapeOrRank.length;\n const rankIdentity = [...new Array(rank).keys()];\n const indicesType = rank < 2 ? "u32" : rank <= 4 ? `vec${rank}` : `array`;\n const mappedType = getWgslMappedType(tensorType, components);\n const valueType = typeof mappedType === "string" ? mappedType : mappedType[1];\n const storageType = typeof mappedType === "string" ? mappedType : mappedType[0];\n const type = { indices: indicesType, value: valueType, storage: storageType, tensor: tensorType };\n const normalizeDim = (dim) => typeof dim === "string" ? dim : `${dim}u`;\n const implementationUsed = {\n offsetToIndices: false,\n indicesToOffset: false,\n broadcastedIndicesToOffset: false,\n set: false,\n setByIndices: false,\n get: false,\n getByIndices: false\n };\n const uniformPrefix = useUniform ? "uniforms." : "";\n const shape = `${uniformPrefix}${name}_shape`;\n const strides = `${uniformPrefix}${name}_strides`;\n let o2iSnippet = "";\n for (let i = 0; i < rank - 1; i++) {\n o2iSnippet += `\n let dim${i} = current / ${getElementAt(strides, i, rank)};\n let rest${i} = current % ${getElementAt(strides, i, rank)};\n indices[${i}] = dim${i};\n current = rest${i};\n `;\n }\n o2iSnippet += `indices[${rank - 1}] = current;`;\n const offsetToIndicesImplementation = rank < 2 ? "" : `\n fn o2i_${name}(offset: u32) -> ${type.indices} {\n var indices: ${type.indices};\n var current = offset;\n ${o2iSnippet}\n return indices;\n }`;\n const offsetToIndices = (varOffset) => {\n implementationUsed.offsetToIndices = true;\n return rank < 2 ? varOffset : `o2i_${name}(${varOffset})`;\n };\n const offsets = [];\n if (rank >= 2) {\n for (let i = rank - 1; i >= 0; i--) {\n offsets.push(`${getElementAt(strides, i, rank)} * (indices[${i}])`);\n }\n }\n const indicesToOffsetImplementation = rank < 2 ? "" : `\n fn i2o_${name}(indices: ${type.indices}) -> u32 {\n return ${offsets.join("+")};\n }`;\n const indicesToOffset = (varIndices) => {\n implementationUsed.indicesToOffset = true;\n return rank < 2 ? varIndices : `i2o_${name}(${varIndices})`;\n };\n const indices = (...init2) => rank === 0 ? "0u" : `${type.indices}(${init2.map(normalizeDim).join(",")})`;\n const indicesGet = (varIndices, idx) => {\n if (rank < 2) {\n return `${varIndices}`;\n } else {\n return `${getElementAt(varIndices, idx, rank)}`;\n }\n };\n const indicesSet = (varIndices, idx, value) => {\n if (rank < 2) {\n return `${varIndices}=${value};`;\n } else {\n return `${getElementAt(varIndices, idx, rank)}=${value};`;\n }\n };\n const broadcastedIndicesToOffsetImplementation = {};\n const broadcastedIndicesToOffset = (varIndices, output) => {\n implementationUsed.broadcastedIndicesToOffset = true;\n const implKey = `${output.name}broadcastedIndicesTo${name}Offset`;\n if (implKey in broadcastedIndicesToOffsetImplementation) {\n return `${implKey}(${varIndices})`;\n }\n const offsets2 = [];\n for (let i = rank - 1; i >= 0; i--) {\n const idx = output.indicesGet("outputIndices", i + output.rank - rank);\n offsets2.push(`${indicesGet(strides, i)} * (${idx} % ${indicesGet(shape, i)})`);\n }\n broadcastedIndicesToOffsetImplementation[implKey] = `fn ${implKey}(outputIndices: ${output.type.indices}) -> u32 {\n return ${offsets2.length > 0 ? offsets2.join("+") : "0u"};\n }`;\n return `${implKey}(${varIndices})`;\n };\n const setByOffset = (offset, value) => (() => {\n if (type.storage === type.value) {\n return `${name}[${offset}]=${value};`;\n } else if (type.storage === "vec2" && type.value === "i32") {\n return `${name}[${offset}]=vec2(u32(${value}), select(0u, 0xFFFFFFFFu, ${value} < 0));`;\n } else if (type.storage === "vec2" && type.value === "u32") {\n return `${name}[${offset}]=vec2(u32(${value}), 0u);`;\n } else if (type.storage === "u32" && type.value === "vec4") {\n return `${name}[${offset}]=dot(vec4(0x1, 0x100, 0x10000, 0x1000000), vec4(${value}));`;\n } else {\n throw new Error(`not supported combination of storage type ${type.storage} and value type ${type.value} yet`);\n }\n })();\n const getByOffset = (offset) => (() => {\n if (type.storage === type.value) {\n return `${name}[${offset}]`;\n } else if (type.storage === "vec2" && type.value === "i32") {\n return `i32(${name}[${offset}].x)`;\n } else if (type.storage === "vec2" && type.value === "u32") {\n return `u32(${name}[${offset}].x)`;\n } else if (type.storage === "u32" && type.value === "vec4") {\n return `vec4(bool(${name}[${offset}] & 0xFFu), bool(${name}[${offset}] & 0xFF00u), bool(${name}[${offset}] & 0xFF0000u), bool(${name}[${offset}] & 0xFF000000u))`;\n } else {\n throw new Error(`not supported combination of storage type ${type.storage} and value type ${type.value} yet`);\n }\n })();\n const getByIndicesImplementation = rank < 2 ? "" : `\n fn get_${name}ByIndices(indices: ${type.indices}) -> ${valueType} {\n return ${getByOffset(`i2o_${name}(indices)`)};\n }`;\n const getImplementation = rank < 2 ? "" : (() => {\n const functionParams = rankIdentity.map((i) => `d${i}: u32`).join(", ");\n const dimsParams = rankIdentity.map((i) => `d${i}`).join(", ");\n return `\n fn get_${name}(${functionParams}) -> ${valueType} {\n return get_${name}ByIndices(${indices(dimsParams)});\n }`;\n })();\n const get = (...indices2) => {\n if (indices2.length !== rank) {\n throw new Error(`indices length must be ${rank}`);\n }\n const normalizedIndices = indices2.map(normalizeDim).join(",");\n if (rank === 0) {\n return getByOffset("0u");\n } else if (rank === 1) {\n return getByOffset(normalizedIndices[0]);\n } else {\n implementationUsed.get = true;\n implementationUsed.getByIndices = true;\n implementationUsed.indicesToOffset = true;\n return `get_${name}(${normalizedIndices})`;\n }\n };\n const getByIndices = (varIndices) => {\n if (rank < 2) {\n return getByOffset(varIndices);\n } else {\n implementationUsed.getByIndices = true;\n implementationUsed.indicesToOffset = true;\n return `get_${name}ByIndices(${varIndices})`;\n }\n };\n const setByIndicesImplementation = rank < 2 ? "" : `\n fn set_${name}ByIndices(indices: ${type.indices}, value: ${valueType}) {\n ${setByOffset(`i2o_${name}(indices)`, "value")}\n }`;\n const setImplementation = rank < 2 ? "" : (() => {\n const functionParams = rankIdentity.map((i) => `d${i}: u32`).join(", ");\n const dimsParams = rankIdentity.map((i) => `d${i}`).join(", ");\n return `\n fn set_${name}(${functionParams}, value: ${valueType}) {\n set_${name}ByIndices(${indices(dimsParams)}, value);\n }`;\n })();\n const set = (...indicesAndValue) => {\n if (indicesAndValue.length !== rank + 1) {\n throw new Error(`indices length must be ${rank}`);\n }\n const value = indicesAndValue[rank];\n if (typeof value !== "string") {\n throw new Error("value must be string");\n }\n const normalizedIndices = indicesAndValue.slice(0, rank).map(normalizeDim).join(",");\n if (rank === 0) {\n return setByOffset("0u", value);\n } else if (rank === 1) {\n return setByOffset(normalizedIndices[0], value);\n } else {\n implementationUsed.set = true;\n implementationUsed.setByIndices = true;\n implementationUsed.indicesToOffset = true;\n return `set_${name}(${normalizedIndices}, ${value})`;\n }\n };\n const setByIndices = (varIndices, value) => {\n if (rank < 2) {\n return setByOffset(varIndices, value);\n } else {\n implementationUsed.setByIndices = true;\n implementationUsed.indicesToOffset = true;\n return `set_${name}ByIndices(${varIndices}, ${value});`;\n }\n };\n const impl = () => {\n const impls = [];\n let needShapeStrides = false;\n if (implementationUsed.offsetToIndices) {\n impls.push(offsetToIndicesImplementation);\n needShapeStrides = true;\n }\n if (implementationUsed.indicesToOffset) {\n impls.push(indicesToOffsetImplementation);\n needShapeStrides = true;\n }\n if (implementationUsed.broadcastedIndicesToOffset) {\n Object.values(broadcastedIndicesToOffsetImplementation).forEach((impl2) => impls.push(impl2));\n needShapeStrides = true;\n }\n if (implementationUsed.set) {\n impls.push(setImplementation);\n needShapeStrides = true;\n }\n if (implementationUsed.setByIndices) {\n impls.push(setByIndicesImplementation);\n needShapeStrides = true;\n }\n if (implementationUsed.get) {\n impls.push(getImplementation);\n needShapeStrides = true;\n }\n if (implementationUsed.getByIndices) {\n impls.push(getByIndicesImplementation);\n needShapeStrides = true;\n }\n if (!useUniform && needShapeStrides) {\n impls.unshift(\n `const ${shape} = ${type.indices}(${shapeOrRank.join(",")});`,\n `const ${strides} = ${type.indices}(${ShapeUtil.computeStrides(shapeOrRank).join(",")});`\n );\n }\n return impls.join("\\n");\n };\n return {\n impl,\n type,\n offsetToIndices,\n indicesToOffset,\n broadcastedIndicesToOffset,\n indices,\n indicesGet,\n indicesSet,\n set,\n setByOffset,\n setByIndices,\n get,\n getByOffset,\n getByIndices,\n // isVec4,\n usage,\n name,\n strides,\n shape,\n rank\n };\n };\n inputVariable = (name, type, shapeOrRank, components = 1) => createIndicesHelper(name, type, shapeOrRank, "input", components);\n outputVariable = (name, type, shapeOrRank, components = 1) => createIndicesHelper(name, type, shapeOrRank, "output", components);\n internalVariable = (name, type, shapeOrRank, components = 1) => createIndicesHelper(name, type, shapeOrRank, "internal", components);\n ShaderHelperImpl = class {\n constructor(normalizedDispatchGroup, limits) {\n this.normalizedDispatchGroup = normalizedDispatchGroup;\n this.limits = limits;\n this.internalVariables = [];\n this.variables = [];\n this.uniforms = [];\n this.variableIndex = 0;\n }\n guardAgainstOutOfBoundsWorkgroupSizes(size) {\n const sizeInCode = typeof size === "number" ? `${size}u` : size;\n return `if (global_idx >= ${sizeInCode}) { return; }`;\n }\n mainStart(workgroupSize = WORKGROUP_SIZE) {\n const workgroupSizeX = typeof workgroupSize === "number" ? workgroupSize : workgroupSize[0];\n const workgroupSizeY = typeof workgroupSize === "number" ? 1 : workgroupSize[1];\n const workgroupSizeZ = typeof workgroupSize === "number" ? 1 : workgroupSize[2];\n if (workgroupSizeX > this.limits.maxComputeWorkgroupSizeX || workgroupSizeY > this.limits.maxComputeWorkgroupSizeY || workgroupSizeZ > this.limits.maxComputeWorkgroupSizeZ) {\n throw new Error(`workgroup size [${workgroupSizeX}, ${workgroupSizeY}, ${workgroupSizeZ}] exceeds the maximum workgroup size [${this.limits.maxComputeWorkgroupSizeX}, ${this.limits.maxComputeWorkgroupSizeY}, ${this.limits.maxComputeWorkgroupSizeZ}].`);\n }\n if (workgroupSizeX * workgroupSizeY * workgroupSizeZ > this.limits.maxComputeInvocationsPerWorkgroup) {\n throw new Error(`workgroup size [${workgroupSizeX}, ${workgroupSizeY}, ${workgroupSizeZ}] exceeds the maximum workgroup invocations ${this.limits.maxComputeInvocationsPerWorkgroup}.`);\n }\n const is1DimensionDispatch = this.normalizedDispatchGroup[1] === 1 && this.normalizedDispatchGroup[2] === 1;\n const paramList = is1DimensionDispatch ? `@builtin(global_invocation_id) global_id : vec3,\n @builtin(workgroup_id) workgroup_id : vec3,\n @builtin(local_invocation_id) local_id : vec3` : `@builtin(global_invocation_id) global_id : vec3,\n @builtin(local_invocation_id) local_id : vec3,\n @builtin(local_invocation_index) local_idx : u32,\n @builtin(workgroup_id) workgroup_id : vec3,\n @builtin(num_workgroups) num_workgroups : vec3`;\n const globalIdxDefinition = is1DimensionDispatch ? "let global_idx = global_id.x; let local_idx = local_id.x;" : `let global_idx = (workgroup_id.z * num_workgroups[0] * num_workgroups[1] +\n workgroup_id.y * num_workgroups[0] + workgroup_id.x) * ${workgroupSizeX * workgroupSizeY * workgroupSizeZ}u + local_idx;`;\n return `@compute @workgroup_size(${workgroupSizeX}, ${workgroupSizeY}, ${workgroupSizeZ})\n fn main(${paramList}) {\n ${globalIdxDefinition}\n `;\n }\n appendVariableUniforms(variable) {\n if (variable.rank !== 0) {\n if (variable.shape.startsWith("uniforms.")) {\n this.uniforms.push({ name: variable.shape.replace("uniforms.", ""), type: "u32", length: variable.rank });\n }\n if (variable.strides.startsWith("uniforms.")) {\n this.uniforms.push({ name: variable.strides.replace("uniforms.", ""), type: "u32", length: variable.rank });\n }\n }\n }\n declareVariable(variable, bindingIndex) {\n if (variable.usage === "internal") {\n throw new Error("cannot use internal variable with declareVariable(). use registerInternalVariables() instead.");\n }\n this.variables.push(variable);\n this.appendVariableUniforms(variable);\n const access = variable.usage === "input" ? "read" : "read_write";\n const storageType = variable.type.storage;\n return `@group(0) @binding(${bindingIndex}) var ${variable.name}: array<${storageType}>;`;\n }\n declareVariables(...variables) {\n return variables.map((v) => this.declareVariable(v, this.variableIndex++)).join("\\n");\n }\n registerInternalVariable(variable) {\n if (variable.usage !== "internal") {\n throw new Error(\n "cannot use input or output variable with registerInternalVariable(). use declareVariables() instead."\n );\n }\n this.internalVariables.push(variable);\n this.appendVariableUniforms(variable);\n }\n registerInternalVariables(...variables) {\n variables.forEach((v) => this.registerInternalVariable(v));\n return this;\n }\n registerUniform(name, type, length = 1) {\n this.uniforms.push({ name, type, length });\n return this;\n }\n registerUniforms(additionalUniforms) {\n this.uniforms = this.uniforms.concat(additionalUniforms);\n return this;\n }\n uniformDeclaration() {\n if (this.uniforms.length === 0) {\n return "";\n }\n const uniformSnippets = [];\n for (const { name, type, length } of this.uniforms) {\n if (length && length > 4) {\n if (type === "f16") {\n uniformSnippets.push(`@align(16) ${name}:array, ${Math.ceil(length / 8)}>`);\n } else {\n uniformSnippets.push(`${name}:array, ${Math.ceil(length / 4)}>`);\n }\n } else {\n const typeTemp = length == null || length === 1 ? type : `vec${length}<${type}>`;\n uniformSnippets.push(`${name}:${typeTemp}`);\n }\n }\n return `\n struct Uniforms { ${uniformSnippets.join(", ")} };\n @group(0) @binding(${this.variableIndex}) var uniforms: Uniforms;`;\n }\n /**\n * Get additional implementation that needs to be added to the shader source.\n */\n get additionalImplementations() {\n return this.uniformDeclaration() + this.variables.map((i) => i.impl()).join("\\n") + this.internalVariables.map((i) => i.impl()).join("\\n");\n }\n /**\n * Get the variable info of the shader program.\n */\n get variablesInfo() {\n if (this.uniforms.length === 0) {\n return void 0;\n }\n const uniformWgslTypeToDataType = (type) => [\n 12 /* uint32 */,\n 10 /* float16 */,\n 1 /* float */,\n 6 /* int32 */\n ][["u32", "f16", "f32", "i32"].indexOf(type)];\n return this.uniforms.map((u) => [uniformWgslTypeToDataType(u.type), u.length ?? 1]);\n }\n };\n createShaderHelper = (dispatchGroup, limits) => new ShaderHelperImpl(dispatchGroup, limits);\n getBroadcastDims = (inShape, outShape) => {\n const inRank = inShape.length;\n const dims = [];\n for (let i = 0; i < inRank; i++) {\n const dim = inRank - 1 - i;\n const a = inShape[dim] || 1;\n const b = outShape[outShape.length - 1 - i] || 1;\n if (b > 1 && a === 1) {\n dims.unshift(dim);\n }\n }\n return dims;\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/transpose.ts\n var validateInputs, getAdjustedPerm, getOutputShape, permFunctionBody, createTransposeProgramInfo, transpose, parseTransposeAttributes;\n var init_transpose = __esm({\n "web/lib/wasm/jsep/webgpu/ops/transpose.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_attribute_with_cache_key();\n init_common();\n validateInputs = (inputs) => {\n if (!inputs || inputs.length !== 1) {\n throw new Error("Transpose requires 1 input.");\n }\n };\n getAdjustedPerm = (inputRank, perm) => perm && perm.length !== inputRank ? [...new Array(inputRank).keys()].reverse() : perm;\n getOutputShape = (inputShape, perm) => ShapeUtil.sortBasedOnPerm(inputShape, getAdjustedPerm(inputShape.length, perm));\n permFunctionBody = (perm, rank, input, output) => {\n const reverseFunc = [];\n reverseFunc.push(`fn perm(i: ${output.type.indices}) -> ${input.type.indices} {\n var a: ${input.type.indices};`);\n for (let i = 0; i < rank; ++i) {\n reverseFunc.push(input.indicesSet("a", perm[i], `i[${i}]`));\n }\n reverseFunc.push("return a;}");\n return reverseFunc.join("\\n");\n };\n createTransposeProgramInfo = (inputTensor, permAttr) => {\n const inputDataType = inputTensor.dataType;\n const inputRank = inputTensor.dims.length;\n const perm = getAdjustedPerm(inputRank, permAttr);\n const outputShape = getOutputShape(inputTensor.dims, perm);\n const output = outputVariable("output", inputDataType, outputShape.length);\n const input = inputVariable("a", inputDataType, inputRank);\n const getShaderSource = (shaderHelper) => `\n ${shaderHelper.registerUniform("output_size", "u32").declareVariables(input, output)}\n\n ${permFunctionBody(perm, inputRank, input, output)}\n\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}\n\n let indices = ${output.offsetToIndices("global_idx")};\n let aIndices = perm(indices);\n\n ${output.setByOffset("global_idx", input.getByIndices("aIndices"))}\n }`;\n return {\n name: "Transpose",\n shaderCache: { hint: `${permAttr}`, inputDependencies: ["rank"] },\n getRunData: (inputs) => {\n const outputSize = ShapeUtil.size(outputShape);\n return {\n outputs: [{ dims: outputShape, dataType: inputs[0].dataType }],\n dispatchGroup: { x: Math.ceil(\n outputSize / 64\n /* workgroup size */\n ) },\n programUniforms: [{ type: 12 /* uint32 */, data: outputSize }, ...createTensorShapeVariables(inputs[0].dims, outputShape)]\n };\n },\n getShaderSource\n };\n };\n transpose = (context, attributes) => {\n validateInputs(context.inputs);\n context.compute(createTransposeProgramInfo(context.inputs[0], attributes.perm));\n };\n parseTransposeAttributes = (attributes) => createAttributeWithCacheKey({ perm: attributes.perm });\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/reduce-shared.ts\n var reduceOps, reduceSharedOps, reduceInitValues, reduceOutputValues, getInnerMostAxes, computeOutAndReduceShapes, expandShapeToKeepDim, areAxesInnerMostDims, getAxesPermutation, createReduceSharedProgramInfo, reduceCommon, reduceMeanShared, reduceL1Shared, reduceL2Shared, reduceLogSumExpShared, reduceMaxShared, reduceMinShared, reduceProdShared, reduceSumShared, reduceSumSquareShared, reduceLogSumShared;\n var init_reduce_shared = __esm({\n "web/lib/wasm/jsep/webgpu/ops/reduce-shared.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_common();\n init_reduce();\n init_transpose();\n reduceOps = {\n max: "select(bestValue, candidate, candidate > bestValue)",\n min: "select(bestValue, candidate, candidate < bestValue)",\n mean: "bestValue + candidate",\n sum: "bestValue + candidate",\n prod: "bestValue * candidate",\n sumSquare: "bestValue + candidate * candidate",\n logSumExp: "bestValue + exp(candidate)",\n l1: "bestValue + abs(candidate)",\n l2: "bestValue + candidate * candidate",\n logSum: "bestValue + candidate"\n };\n reduceSharedOps = {\n max: "select(bestValue, candidate, candidate > bestValue)",\n min: "select(bestValue, candidate, candidate < bestValue)",\n mean: "bestValue + candidate",\n sum: "bestValue + candidate",\n prod: "bestValue * candidate",\n sumSquare: "bestValue + candidate",\n logSumExp: "bestValue + candidate",\n l1: "bestValue + candidate",\n l2: "bestValue + candidate",\n logSum: "bestValue + candidate"\n };\n reduceInitValues = {\n max: "_A[offset]",\n min: "_A[offset]",\n mean: "0",\n sum: "0",\n prod: "1",\n sumSquare: "0",\n logSumExp: "0",\n l1: "0",\n l2: "0",\n logSum: "0"\n };\n reduceOutputValues = {\n max: "bestValue",\n min: "bestValue",\n sum: "bestValue",\n prod: "bestValue",\n sumSquare: "bestValue",\n logSumExp: "log(bestValue)",\n l1: "bestValue",\n l2: "sqrt(bestValue)",\n logSum: "log(bestValue)"\n };\n getInnerMostAxes = (numInnerAxes, rank) => {\n const res = [];\n for (let i = rank - numInnerAxes; i < rank; ++i) {\n res.push(i);\n }\n return res;\n };\n computeOutAndReduceShapes = (shape, axes) => {\n const outputShape = [];\n const rank = shape.length;\n for (let dim = 0; dim < rank; dim++) {\n if (axes.indexOf(dim) === -1) {\n outputShape.push(shape[dim]);\n }\n }\n const reduceShape = axes.map((dim) => shape[dim]);\n return [outputShape, reduceShape];\n };\n expandShapeToKeepDim = (shape, axes) => {\n const rank = shape.length + axes.length;\n const expandShape = [];\n let shapeIdx = 0;\n for (let dim = 0; dim < rank; dim++) {\n if (axes.indexOf(dim) === -1) {\n expandShape.push(shape[shapeIdx++]);\n } else {\n expandShape.push(1);\n }\n }\n return expandShape;\n };\n areAxesInnerMostDims = (axes, rank) => {\n for (let i = 0; i < axes.length; ++i) {\n if (axes[axes.length - i - 1] !== rank - 1 - i) {\n return false;\n }\n }\n return true;\n };\n getAxesPermutation = (axes, rank) => {\n const res = [];\n if (!areAxesInnerMostDims(axes, rank)) {\n for (let i = 0; i < rank; ++i) {\n if (axes.indexOf(i) === -1) {\n res.push(i);\n }\n }\n axes.forEach((axis) => res.push(axis));\n }\n return res;\n };\n createReduceSharedProgramInfo = (name, shaderCache, inputs, reduceType, outputDataType, outputShape, reduceShape) => {\n const inputShape = inputs[0].dims;\n const outputSize = ShapeUtil.size(outputShape);\n const reduceSize = ShapeUtil.size(reduceShape);\n const input = inputVariable("_A", inputs[0].dataType, inputShape);\n const output = outputVariable("output", outputDataType, outputShape);\n const workgroupSize = 32;\n const sharedMemorySnippet = `\n var aBestValues : array;\n `;\n const getShaderSource = (shaderHelper) => `\n ${shaderHelper.registerUniform("reduceSize", "u32").declareVariables(input, output)}\n ${sharedMemorySnippet}\n fn DIV_CEIL(a : u32, b : u32) -> u32 {\n return ((a - 1u) / b + 1u);\n }\n ${shaderHelper.mainStart(workgroupSize)}\n\n let outputIndex = global_idx / ${workgroupSize};\n let offset = outputIndex * uniforms.reduceSize;\n\n var bestValue = f32(${reduceInitValues[reduceType]});\n let Length = uniforms.reduceSize;\n for (var k = local_idx; k < Length; k = k + ${workgroupSize}) {\n let candidate = f32(${input.getByOffset("offset + k")});\n bestValue = ${reduceOps[reduceType]};\n }\n aBestValues[local_idx] = bestValue;\n workgroupBarrier();\n\n var reduceSize = min(Length, ${workgroupSize}u);\n for (var currentSize = reduceSize / 2u; reduceSize > 1u;\n currentSize = reduceSize / 2u) {\n let interval = DIV_CEIL(reduceSize, 2u);\n if (local_idx < currentSize) {\n let candidate = aBestValues[local_idx + interval];\n bestValue = ${reduceSharedOps[reduceType]};\n aBestValues[local_idx] = bestValue;\n }\n reduceSize = interval;\n workgroupBarrier();\n }\n\n if (local_idx == 0u) {\n ${output.setByOffset(\n "outputIndex",\n `${reduceType === "mean" ? `${output.type.storage}(bestValue / f32(uniforms.reduceSize))` : `${output.type.storage}(${reduceOutputValues[reduceType]})`}`\n )};\n }\n }`;\n return {\n name,\n shaderCache,\n getShaderSource,\n getRunData: () => ({\n outputs: [{ dims: outputShape, dataType: outputDataType }],\n dispatchGroup: { x: outputSize },\n programUniforms: [{ type: 12 /* uint32 */, data: reduceSize }]\n })\n };\n };\n reduceCommon = (context, name, attributes, reduceType) => {\n const updatedAttributes = context.inputs.length === 1 ? attributes : createReduceAttributesFromInputs(context.inputs, attributes);\n let updatedAxes = updatedAttributes.axes;\n if (updatedAxes.length === 0 && !updatedAttributes.noopWithEmptyAxes) {\n updatedAxes = context.inputs[0].dims.map((_dim, i) => i);\n }\n const normalizeAxes = ShapeUtil.normalizeAxes(updatedAxes, context.inputs[0].dims.length);\n let axes = normalizeAxes;\n let input = context.inputs[0];\n const permutedAxes = getAxesPermutation(axes, context.inputs[0].dims.length);\n if (permutedAxes.length > 0) {\n input = context.compute(\n createTransposeProgramInfo(context.inputs[0], permutedAxes),\n { inputs: [0], outputs: [-1] }\n )[0];\n axes = getInnerMostAxes(axes.length, input.dims.length);\n }\n const [outputShape, reduceShape] = computeOutAndReduceShapes(input.dims, axes);\n let finalOutputShape = outputShape;\n if (updatedAttributes.keepDims) {\n finalOutputShape = expandShapeToKeepDim(outputShape, normalizeAxes);\n }\n context.compute(\n createReduceSharedProgramInfo(\n name,\n { hint: updatedAttributes.cacheKey, inputDependencies: ["type"] },\n [input],\n reduceType,\n context.inputs[0].dataType,\n finalOutputShape,\n reduceShape\n ),\n { inputs: [input] }\n );\n };\n reduceMeanShared = (context, attributes) => {\n reduceCommon(context, "ReduceMeanShared", attributes, "mean");\n };\n reduceL1Shared = (context, attributes) => {\n reduceCommon(context, "ReduceL1Shared", attributes, "l1");\n };\n reduceL2Shared = (context, attributes) => {\n reduceCommon(context, "ReduceL2Shared", attributes, "l2");\n };\n reduceLogSumExpShared = (context, attributes) => {\n reduceCommon(context, "ReduceLogSumExpShared", attributes, "logSumExp");\n };\n reduceMaxShared = (context, attributes) => {\n reduceCommon(context, "ReduceMaxShared", attributes, "max");\n };\n reduceMinShared = (context, attributes) => {\n reduceCommon(context, "ReduceMinShared", attributes, "min");\n };\n reduceProdShared = (context, attributes) => {\n reduceCommon(context, "ReduceProdShared", attributes, "prod");\n };\n reduceSumShared = (context, attributes) => {\n reduceCommon(context, "ReduceSumShared", attributes, "sum");\n };\n reduceSumSquareShared = (context, attributes) => {\n reduceCommon(context, "ReduceSumSquareShared", attributes, "sumSquare");\n };\n reduceLogSumShared = (context, attributes) => {\n reduceCommon(context, "ReduceLogSumShared", attributes, "logSum");\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/reduce.ts\n var validateInputs2, noOp, createReduceProgramInfo, createReduceAttributesFromInputs, runReduceProgram, reduceLogSumNaive, reduceL1Naive, reduceL2Naive, reduceLogSumExpNaive, reduceMaxNaive, reduceMeanNaive, reduceMinNaive, reduceProdNaive, reduceSumNaive, reduceSumSquareNaive, useNaiveReduceMethod, reduceMean, reduceL1, reduceL2, reduceLogSumExp, reduceMax, reduceMin, reduceProd, reduceSum, reduceSumSquare, reduceLogSum;\n var init_reduce = __esm({\n "web/lib/wasm/jsep/webgpu/ops/reduce.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_attribute_with_cache_key();\n init_common();\n init_reduce_shared();\n validateInputs2 = (inputs) => {\n if (!inputs || inputs.length === 0 || inputs.length > 2) {\n throw new Error("Reduce op requires 1 or 2 inputs.");\n }\n if (inputs.length === 2 && inputs[1].dims.length !== 1) {\n throw new Error("Invalid axes input dims.");\n }\n };\n noOp = (input) => ["", "", `var value = ${input.getByIndices("input_indices")};`, ""];\n createReduceProgramInfo = (name, shaderCache, inputs, reduceOp, axesInput, outputDataType, keepDims = false, noopWithEmptyAxes = false) => {\n const outputShape = [];\n const inputShape = inputs[0].dims;\n const inputRank = inputShape.length;\n const axes = ShapeUtil.normalizeAxes(axesInput, inputRank);\n const reduceOnAllAxes = !noopWithEmptyAxes && axes.length === 0;\n inputShape.forEach((d, i) => {\n if (reduceOnAllAxes || axes.indexOf(i) >= 0) {\n if (keepDims) {\n outputShape.push(1);\n }\n } else {\n outputShape.push(d);\n }\n });\n const outputRank = outputShape.length;\n const outputSize = ShapeUtil.size(outputShape);\n const getShaderSource = (shaderHelper) => {\n const idxCopy = [];\n const input = inputVariable("_A", inputs[0].dataType, inputRank);\n const output = outputVariable("output", outputDataType, outputRank);\n const ops = reduceOp(input, output, axes);\n let reduceOps2 = ops[2];\n for (let k = 0, l = 0; k < inputRank; k++) {\n if (reduceOnAllAxes || axes.indexOf(k) >= 0) {\n if (keepDims) {\n l++;\n }\n reduceOps2 = `for(var j${k}: u32 = 0; j${k} < ${inputShape[k]}; j${k}++) {\n ${ops[2].includes("last_index") ? `let last_index = j${k};` : ""}\n ${input.indicesSet("input_indices", k, `j${k}`)}\n ${reduceOps2}\n }`;\n } else {\n idxCopy.push(`${input.indicesSet("input_indices", k, output.indicesGet("output_indices", l))};`);\n l++;\n }\n }\n return `\n\n ${shaderHelper.registerUniform("output_size", "u32").declareVariables(input, output)}\n\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}\n var input_indices: ${input.type.indices};\n let output_indices = ${output.offsetToIndices("global_idx")};\n\n ${idxCopy.join("\\n")}\n ${ops[0]} // init ops for reduce max/min\n ${ops[1]}\n ${reduceOps2}\n ${ops[3]}\n ${ops.length === 4 ? output.setByOffset("global_idx", "value") : ops.slice(4).join("\\n")}\n }`;\n };\n return {\n name,\n shaderCache,\n getShaderSource,\n getRunData: () => ({\n outputs: [{ dims: outputShape, dataType: outputDataType }],\n dispatchGroup: { x: Math.ceil(\n outputSize / 64\n /* workgroup size */\n ) },\n programUniforms: [{ type: 12 /* uint32 */, data: outputSize }, ...createTensorShapeVariables(inputShape, outputShape)]\n })\n };\n };\n createReduceAttributesFromInputs = (inputs, attributes) => {\n const axes = [];\n if (inputs[1].dims[0] > 0) {\n inputs[1].getBigInt64Array().forEach((v) => axes.push(Number(v)));\n }\n return createAttributeWithCacheKey(\n { axes, keepDims: attributes.keepDims, noopWithEmptyAxes: attributes.noopWithEmptyAxes }\n );\n };\n runReduceProgram = (context, name, attributes, reduceOp) => {\n const inputs = context.inputs;\n const updatedAttributes = inputs.length === 1 ? attributes : createReduceAttributesFromInputs(inputs, attributes);\n context.compute(\n createReduceProgramInfo(\n name,\n { hint: updatedAttributes.cacheKey, inputDependencies: ["rank"] },\n [inputs[0]],\n updatedAttributes.noopWithEmptyAxes && updatedAttributes.axes.length === 0 ? noOp : reduceOp,\n updatedAttributes.axes,\n inputs[0].dataType,\n updatedAttributes.keepDims,\n updatedAttributes.noopWithEmptyAxes\n ),\n { inputs: [0] }\n );\n };\n reduceLogSumNaive = (context, attributes) => {\n validateInputs2(context.inputs);\n const reduceOp = (input, output) => [\n `var value = ${output.type.storage}(0);`,\n "",\n `value += ${input.getByIndices("input_indices")};`,\n "value = log(value);"\n ];\n runReduceProgram(context, "ReduceLogSum", attributes, reduceOp);\n };\n reduceL1Naive = (context, attributes) => {\n validateInputs2(context.inputs);\n const reduceOp = (input, output) => [\n `var value = ${output.type.storage}(0);`,\n "",\n `value += abs(${input.getByIndices("input_indices")});`,\n ""\n ];\n runReduceProgram(context, "ReduceL1", attributes, reduceOp);\n };\n reduceL2Naive = (context, attributes) => {\n validateInputs2(context.inputs);\n const reduceOp = (input, output) => [\n `var t = ${output.type.value}(0); var value = ${output.type.value}(0);`,\n "",\n `t = ${input.getByIndices("input_indices")}; value += (t * t);`,\n "value = sqrt(value);"\n ];\n runReduceProgram(context, "ReduceL2", attributes, reduceOp);\n };\n reduceLogSumExpNaive = (context, attributes) => {\n validateInputs2(context.inputs);\n const reduceOp = (input, output) => [\n `var value = ${output.type.storage}(0);`,\n "",\n `value += exp(${input.getByIndices("input_indices")});`,\n "value = log(value);"\n ];\n runReduceProgram(context, "ReduceLogSumExp", attributes, reduceOp);\n };\n reduceMaxNaive = (context, attributes) => {\n validateInputs2(context.inputs);\n const reduceOp = (input, _output, axes) => {\n const idxZero = [];\n for (let k = 0; k < input.rank; k++) {\n if (axes.indexOf(k) >= 0 || axes.length === 0) {\n idxZero.push(input.indicesSet("input_indices", k, 0));\n }\n }\n return [\n `${idxZero.join("\\n")}`,\n `var value = ${input.getByIndices("input_indices")};`,\n `value = max(value, ${input.getByIndices("input_indices")});`,\n ""\n ];\n };\n runReduceProgram(context, "ReduceMax", attributes, reduceOp);\n };\n reduceMeanNaive = (context, attributes) => {\n validateInputs2(context.inputs);\n const reduceOp = (input, output, axes) => {\n let size = 1;\n for (let k = 0; k < input.rank; k++) {\n if (axes.indexOf(k) >= 0 || axes.length === 0) {\n size *= context.inputs[0].dims[k];\n }\n }\n return [\n "var sum = f32(0);",\n "",\n `sum += f32(${input.getByIndices("input_indices")});`,\n `let value = ${output.type.value}(sum / ${size});`\n ];\n };\n runReduceProgram(context, "ReduceMean", attributes, reduceOp);\n };\n reduceMinNaive = (context, attributes) => {\n validateInputs2(context.inputs);\n const reduceOp = (input, _output, axes) => {\n const idxZero = [];\n for (let k = 0; k < input.rank; k++) {\n if (axes.indexOf(k) >= 0 || axes.length === 0) {\n idxZero.push(`input_indices[${k}] = 0;`);\n }\n }\n return [\n `${idxZero.join("\\n")}`,\n `var value = ${input.getByIndices("input_indices")};`,\n `value = min(value, ${input.getByIndices("input_indices")});`,\n ""\n ];\n };\n runReduceProgram(context, "ReduceMin", attributes, reduceOp);\n };\n reduceProdNaive = (context, attributes) => {\n validateInputs2(context.inputs);\n const reduceOp = (input, output) => [\n `var value = ${output.type.storage}(1);`,\n "",\n `value *= ${input.getByIndices("input_indices")};`,\n ""\n ];\n runReduceProgram(context, "ReduceProd", attributes, reduceOp);\n };\n reduceSumNaive = (context, attributes) => {\n validateInputs2(context.inputs);\n const reduceOp = (input, output) => [\n `var value = ${output.type.storage}(0);`,\n "",\n `value += ${input.getByIndices("input_indices")};`,\n ""\n ];\n runReduceProgram(context, "ReduceSum", attributes, reduceOp);\n };\n reduceSumSquareNaive = (context, attributes) => {\n validateInputs2(context.inputs);\n const reduceOp = (input, output) => [\n `var t = ${output.type.value}(0); var value = ${output.type.value}(0);`,\n "",\n `t = ${input.getByIndices("input_indices")}; value += t * t;`,\n ""\n ];\n runReduceProgram(context, "ReduceSumSquare", attributes, reduceOp);\n };\n useNaiveReduceMethod = (shape, axes, noopWithEmptyAxes) => {\n if (axes.length === 0) {\n return noopWithEmptyAxes;\n }\n let outputSize = 1;\n let reduceSize = 1;\n for (let dim = 0; dim < axes.length; dim++) {\n if (axes.indexOf(dim) === -1) {\n outputSize *= shape[dim];\n } else {\n reduceSize *= shape[dim];\n }\n }\n return reduceSize < 32 && outputSize > 1024;\n };\n reduceMean = (context, attributes) => {\n if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {\n reduceMeanNaive(context, attributes);\n } else {\n reduceMeanShared(context, attributes);\n }\n };\n reduceL1 = (context, attributes) => {\n if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {\n reduceL1Naive(context, attributes);\n } else {\n reduceL1Shared(context, attributes);\n }\n };\n reduceL2 = (context, attributes) => {\n if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {\n reduceL2Naive(context, attributes);\n } else {\n reduceL2Shared(context, attributes);\n }\n };\n reduceLogSumExp = (context, attributes) => {\n if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {\n reduceLogSumExpNaive(context, attributes);\n } else {\n reduceLogSumExpShared(context, attributes);\n }\n };\n reduceMax = (context, attributes) => {\n if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {\n reduceMaxNaive(context, attributes);\n } else {\n reduceMaxShared(context, attributes);\n }\n };\n reduceMin = (context, attributes) => {\n if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {\n reduceMinNaive(context, attributes);\n } else {\n reduceMinShared(context, attributes);\n }\n };\n reduceProd = (context, attributes) => {\n if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {\n reduceProdNaive(context, attributes);\n } else {\n reduceProdShared(context, attributes);\n }\n };\n reduceSum = (context, attributes) => {\n if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {\n reduceSumNaive(context, attributes);\n } else {\n reduceSumShared(context, attributes);\n }\n };\n reduceSumSquare = (context, attributes) => {\n if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {\n reduceSumSquareNaive(context, attributes);\n } else {\n reduceSumSquareShared(context, attributes);\n }\n };\n reduceLogSum = (context, attributes) => {\n if (useNaiveReduceMethod(context.inputs[0].dims, attributes.axes, attributes.noopWithEmptyAxes)) {\n reduceLogSumNaive(context, attributes);\n } else {\n reduceLogSumShared(context, attributes);\n }\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/argminmax.ts\n var validateInputs3, argMin, argMax, parseArgMinMaxAttributes;\n var init_argminmax = __esm({\n "web/lib/wasm/jsep/webgpu/ops/argminmax.ts"() {\n "use strict";\n init_wasm_common();\n init_attribute_with_cache_key();\n init_reduce();\n validateInputs3 = (inputs) => {\n if (!inputs || inputs.length === 0 || inputs.length > 2) {\n throw new Error("ArgMinMaxOp op requires 1 or 2 inputs.");\n }\n if (inputs[0].dataType !== 1 /* float */) {\n throw new Error("Invalid input type.");\n }\n };\n argMin = (context, attributes) => {\n validateInputs3(context.inputs);\n const argMinMaxOp = (input, output, axes) => {\n const idxZero = [];\n for (let k = 0; k < input.rank; k++) {\n if (axes.indexOf(k) >= 0 || axes.length === 0) {\n idxZero.push(`input_indices[${k}] = 0;`);\n }\n }\n return [\n `${idxZero.join("\\n")}`,\n `var value = ${input.getByIndices("input_indices")};\nvar best_index : i32 = 0;`,\n `if (${input.getByIndices("input_indices")} ${attributes.selectLastIndex > 0 ? "<=" : "<"} value) {\n value = ${input.getByIndices("input_indices")};\n best_index = i32(last_index);\n }`,\n "",\n output.setByOffset("global_idx", "best_index")\n ];\n };\n context.compute(\n createReduceProgramInfo(\n "ArgMin",\n { hint: attributes.cacheKey, inputDependencies: ["rank"] },\n [context.inputs[0]],\n argMinMaxOp,\n [attributes.axis],\n 7 /* int64 */,\n attributes.keepDims\n ),\n { inputs: [0] }\n );\n };\n argMax = (context, attributes) => {\n validateInputs3(context.inputs);\n const argMinMaxOp = (input, output, axes) => {\n const idxZero = [];\n for (let k = 0; k < input.rank; k++) {\n if (axes.indexOf(k) >= 0 || axes.length === 0) {\n idxZero.push(`input_indices[${k}] = 0;`);\n }\n }\n return [\n `${idxZero.join("\\n")}`,\n `var value = ${input.getByIndices("input_indices")};\nvar best_index : i32 = 0;`,\n `if (${input.getByIndices("input_indices")} ${attributes.selectLastIndex > 0 ? ">=" : ">"} value) {\n value = ${input.getByIndices("input_indices")};\n best_index = i32(last_index);\n }`,\n "",\n output.setByOffset("global_idx", "best_index")\n ];\n };\n context.compute(\n createReduceProgramInfo(\n "argMax",\n { hint: attributes.cacheKey, inputDependencies: ["rank"] },\n [context.inputs[0]],\n argMinMaxOp,\n [attributes.axis],\n 7 /* int64 */,\n attributes.keepDims\n ),\n { inputs: [0] }\n );\n };\n parseArgMinMaxAttributes = (attributes) => createAttributeWithCacheKey(attributes);\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/concat.ts\n var validateInputs4, calculateInputIndexImpl, assignOutputData, createConcatProgramInfo, concat, parseConcatAttributes;\n var init_concat = __esm({\n "web/lib/wasm/jsep/webgpu/ops/concat.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_attribute_with_cache_key();\n init_common();\n validateInputs4 = (inputs, axis) => {\n if (!inputs || inputs.length < 1) {\n throw new Error("too few inputs");\n }\n const referenceIndex = 0;\n const referenceInput = inputs[referenceIndex];\n const inputType = referenceInput.dataType;\n const inputRank = referenceInput.dims.length;\n inputs.forEach((input, i) => {\n if (i === referenceIndex) {\n return;\n }\n if (input.dataType !== inputType) {\n throw new Error("input tensors should be one type");\n }\n if (input.dims.length !== inputRank) {\n throw new Error("input tensors should have the same shape");\n }\n input.dims.forEach((dim, i2) => {\n if (i2 !== axis && dim !== referenceInput.dims[i2]) {\n throw new Error("non concat dimensions must match");\n }\n });\n });\n };\n calculateInputIndexImpl = (numberOfTensors, sizeInConcatAxisStr) => `\n fn calculateInputIndex(index: u32) -> u32 {\n let sizeInConcatAxis = array(${sizeInConcatAxisStr});\n for (var i: u32 = 0u; i < ${numberOfTensors}; i += 1u ) {\n if (index < sizeInConcatAxis[i]) {\n return i;\n }\n }\n return ${numberOfTensors}u;\n }`;\n assignOutputData = (inputs, output) => {\n const numberOfTensors = inputs.length;\n const codeLines = [];\n for (let i = 0; i < numberOfTensors; ++i) {\n const returnSnippet = output.setByOffset("global_idx", inputs[i].getByIndices("indices"));\n if (numberOfTensors === 1) {\n codeLines.push(returnSnippet);\n } else if (i === 0) {\n codeLines.push(`if (inputIndex == ${i}u) { ${returnSnippet} }`);\n } else if (i === numberOfTensors - 1) {\n codeLines.push(`else { ${returnSnippet} }`);\n } else {\n codeLines.push(`else if (inputIndex == ${i}) { ${returnSnippet} }`);\n }\n }\n return codeLines.join("\\n");\n };\n createConcatProgramInfo = (inputs, adjustedAxis, outputShape, dataType) => {\n const outputSize = ShapeUtil.size(outputShape);\n const sizeInConcatAxis = new Array(inputs.length);\n const inputVars = new Array(inputs.length);\n let previousSum = 0;\n const inputDependencies = [];\n const inputRanks = [];\n const programUniforms = [{ type: 12 /* uint32 */, data: outputSize }];\n for (let i = 0; i < inputs.length; ++i) {\n previousSum += inputs[i].dims[adjustedAxis];\n sizeInConcatAxis[i] = previousSum;\n inputRanks.push(inputs[i].dims.length);\n inputVars[i] = inputVariable(`input${i}`, dataType, inputRanks[i]);\n inputDependencies.push("rank");\n programUniforms.push({ type: 12 /* uint32 */, data: sizeInConcatAxis[i] });\n }\n for (let i = 0; i < inputs.length; ++i) {\n programUniforms.push(...createTensorShapeVariables(inputs[i].dims));\n }\n programUniforms.push(...createTensorShapeVariables(outputShape));\n const output = outputVariable("output", dataType, outputShape.length);\n const indicesAxis = output.indicesGet("indices", adjustedAxis);\n const sizeInConcatAxisStr = Array.from(Array(sizeInConcatAxis.length).keys()).map((i) => `uniforms.sizeInConcatAxis${i}`).join(",");\n const getShaderSource = (shaderHelper) => `\n\n ${(() => {\n shaderHelper.registerUniform("outputSize", "u32");\n for (let i = 0; i < inputs.length; i++) {\n shaderHelper.registerUniform(`sizeInConcatAxis${i}`, "u32");\n }\n return shaderHelper.declareVariables(...inputVars, output);\n })()}\n\n ${calculateInputIndexImpl(sizeInConcatAxis.length, sizeInConcatAxisStr)}\n\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")}\n\n var indices = ${output.offsetToIndices("global_idx")};\n\n let inputIndex = calculateInputIndex(${indicesAxis});\n if (inputIndex != 0u) {\n let sizeInConcatAxis = array(${sizeInConcatAxisStr});\n ${indicesAxis} -= sizeInConcatAxis[inputIndex - 1u];\n }\n\n ${assignOutputData(inputVars, output)}\n }`;\n return {\n name: "Concat",\n shaderCache: { hint: `${adjustedAxis}`, inputDependencies },\n getRunData: () => ({\n outputs: [{ dims: outputShape, dataType }],\n dispatchGroup: { x: Math.ceil(\n outputSize / 64\n /* workgroup size */\n ) },\n programUniforms\n }),\n getShaderSource\n };\n };\n concat = (context, attributes) => {\n const inputs = context.inputs;\n const inputShape = inputs[0].dims;\n const adjustedAxis = ShapeUtil.normalizeAxis(attributes.axis, inputShape.length);\n validateInputs4(inputs, adjustedAxis);\n const outputShape = inputShape.slice();\n outputShape[adjustedAxis] = inputs.reduce((sum, input) => sum + (input.dims.length > adjustedAxis ? input.dims[adjustedAxis] : 0), 0);\n const nonEmptyInputs = inputs.filter((input) => ShapeUtil.size(input.dims) > 0);\n context.compute(\n createConcatProgramInfo(nonEmptyInputs, adjustedAxis, outputShape, inputs[0].dataType),\n { inputs: nonEmptyInputs }\n );\n };\n parseConcatAttributes = (attributes) => createAttributeWithCacheKey({ axis: attributes.axis });\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/attention.ts\n var validateAttentionInputs, createInPlaceSoftmaxProgramInfo, createAttentionProbsProgramInfo, createVxAttentionScoreProgramInfo, applyAttention, prepare, attention;\n var init_attention = __esm({\n "web/lib/wasm/jsep/webgpu/ops/attention.ts"() {\n "use strict";\n init_wasm_common();\n init_types();\n init_common();\n init_concat();\n validateAttentionInputs = (inputs, attributes) => {\n const input = inputs[0];\n const weights = inputs[1];\n const bias = inputs[2];\n const maskIndex = inputs[3];\n const past = inputs[4];\n const relativePositionBias = inputs[5];\n if (past && relativePositionBias) {\n throw new Error("Attention cannot have both past and relative_position_bias");\n }\n if (input.dims.length !== 3) {\n throw new Error(\'Input "input" must have 3 dimensions\');\n }\n const batchSize = input.dims[0];\n const sequenceLength = input.dims[1];\n const inputHiddenSize = input.dims[2];\n if (bias.dims.length !== 1) {\n throw new Error(\'Input "bias" is expected to have 1 dimensions\');\n }\n if (weights.dims.length !== 2) {\n throw new Error(\'Input "weights" is expected to have 2 dimensions\');\n }\n if (weights.dims[0] !== inputHiddenSize) {\n throw new Error("Input 1 dimension 0 should have same length as dimension 2 of input 0");\n }\n if (bias.dims[0] !== weights.dims[1]) {\n throw new Error(\'Input "bias" dimension 0 should have same length as dimension 1 of input "weights"\');\n }\n let qHiddenSize = bias.dims[0] / 3;\n let kHiddenSize = qHiddenSize;\n let vHiddenSize = kHiddenSize;\n if (attributes.qkvHiddenSizes.length > 0) {\n if (attributes.qkvHiddenSizes.length !== 3) {\n throw new Error("qkv_hidden_sizes attribute should have 3 elements");\n }\n for (const sz of attributes.qkvHiddenSizes) {\n if (sz % attributes.numHeads !== 0) {\n throw new Error("qkv_hidden_sizes should be divisible by num_heads");\n }\n }\n qHiddenSize = attributes.qkvHiddenSizes[0];\n kHiddenSize = attributes.qkvHiddenSizes[1];\n vHiddenSize = attributes.qkvHiddenSizes[2];\n }\n const kvSequenceLength = sequenceLength;\n if (qHiddenSize !== kHiddenSize) {\n throw new Error("qkv_hidden_sizes first element should be same as the second");\n }\n if (bias.dims[0] !== qHiddenSize + kHiddenSize + vHiddenSize) {\n throw new Error(\'Input "bias" dimension 0 should have same length as sum of Q/K/V hidden sizes\');\n }\n let pastSequenceLength = 0;\n if (past) {\n if (kHiddenSize !== vHiddenSize) {\n throw new Error(\'Input "past" expect k_hidden_size == v_hidden_size\');\n }\n if (past.dims.length !== 5) {\n throw new Error(\'Input "past" must have 5 dimensions\');\n }\n if (past.dims[0] !== 2) {\n throw new Error(\'Input "past" first dimension must be 2\');\n }\n if (past.dims[1] !== batchSize) {\n throw new Error(\'Input "past" second dimension must be batch_size\');\n }\n if (past.dims[2] !== attributes.numHeads) {\n throw new Error(\'Input "past" third dimension must be num_heads\');\n }\n if (past.dims[4] !== kHiddenSize / attributes.numHeads) {\n throw new Error(\'Input "past" fifth dimension must be k_hidden_size / num_heads\');\n }\n if (!attributes.pastPresentShareBuffer) {\n pastSequenceLength = past.dims[3];\n }\n }\n const totalSequenceLength = kvSequenceLength + pastSequenceLength;\n const maxSequenceLength = -1;\n const maskType = 0 /* none */;\n if (maskIndex) {\n throw new Error("Mask not supported");\n }\n if (past) {\n throw new Error("past is not supported");\n }\n return {\n batchSize,\n sequenceLength,\n pastSequenceLength,\n kvSequenceLength,\n totalSequenceLength,\n maxSequenceLength,\n inputHiddenSize,\n hiddenSize: qHiddenSize,\n vHiddenSize,\n headSize: Math.floor(qHiddenSize / attributes.numHeads),\n vHeadSize: Math.floor(vHiddenSize / attributes.numHeads),\n numHeads: attributes.numHeads,\n isUnidirectional: false,\n pastPresentShareBuffer: false,\n maskFilterValue: attributes.maskFilterValue,\n maskType,\n scale: attributes.scale,\n broadcastResPosBias: false,\n passPastInKv: false,\n qkvFormat: 1 /* qkvBNSH */\n };\n };\n createInPlaceSoftmaxProgramInfo = (_context, input, n, d) => {\n const components = getMaxComponents(d);\n let WG = 64;\n const dComp = d / components;\n if (dComp < WG) {\n WG = 1;\n } else if (dComp / 8 < 64) {\n WG = Math.ceil(dComp / 8);\n }\n const elementsPerThread = Math.ceil(d / components / WG);\n const programUniforms = [\n { type: input.dataType, data: 1 / d },\n { type: 12 /* uint32 */, data: dComp },\n { type: 12 /* uint32 */, data: elementsPerThread }\n ];\n const dataType = tensorTypeToWsglStorageType(input.dataType, components);\n const f32Type = tensorTypeToWsglValueType(1 /* float */, components);\n const getShaderSource = (shaderHelper) => {\n const inputHelper = outputVariable("x", input.dataType, input.dims, components);\n const elemValueType = tensorTypeToWsglValueType(input.dataType);\n const uniforms = [\n { name: "d_inv", type: elemValueType },\n { name: "d_comp", type: "u32" },\n { name: "elements_per_thread", type: "u32" }\n ];\n return `\n var thread_max: array;\n var thread_sum: array;\n ${shaderHelper.registerUniforms(uniforms).declareVariables(inputHelper)}\n ${shaderHelper.mainStart([\n WG,\n 1,\n 1\n ])}\n let local_offset = local_idx * uniforms.elements_per_thread;\n let offset = workgroup_id.x * uniforms.d_comp + local_offset;\n\n var thread_max_vector = ${f32Type}(-3.402823e+38f);\n for (var i: u32 = 0; i < uniforms.elements_per_thread && i + local_offset < uniforms.d_comp; i++) {\n thread_max_vector = max(${f32Type}(x[offset + i]), thread_max_vector);\n }\n thread_max[local_idx] = ${(() => {\n switch (components) {\n case 1:\n return "thread_max_vector";\n case 2:\n return "max(thread_max_vector.x, thread_max_vector.y)";\n case 4:\n return "max(max(thread_max_vector.x, thread_max_vector.y), max(thread_max_vector.z, thread_max_vector.w))";\n default:\n throw new Error(`Unsupported components: ${components}`);\n }\n })()};\n workgroupBarrier();\n\n var max_value = f32(-3.402823e+38f);\n for (var i = 0u; i < ${WG}; i++) {\n max_value = max(thread_max[i], max_value);\n }\n\n var sum_vector = ${f32Type}(0);\n for (var i: u32 = 0; i < uniforms.elements_per_thread && i + local_offset < uniforms.d_comp; i++) {\n sum_vector += exp(${f32Type}(x[offset + i]) - max_value);\n }\n thread_sum[local_idx] = ${(() => {\n switch (components) {\n case 1:\n return "sum_vector";\n case 2:\n return "sum_vector.x + sum_vector.y";\n case 4:\n return "sum_vector.x + sum_vector.y + sum_vector.z + sum_vector.w";\n default:\n throw new Error(`Unsupported components: ${components}`);\n }\n })()};\n workgroupBarrier();\n\n var sum: f32 = 0;\n for (var i = 0u; i < ${WG}; i++) {\n sum += thread_sum[i];\n }\n\n if (sum == 0) {\n for (var i: u32 = 0; i < uniforms.elements_per_thread && i + local_offset < uniforms.d_comp; i++) {\n x[offset + i] = ${inputHelper.type.value}(uniforms.d_inv);\n }\n } else {\n for (var i: u32 = 0; i < uniforms.elements_per_thread && i + local_offset < uniforms.d_comp; i++) {\n var f32input = ${f32Type}(x[offset + i]);\n x[offset + i] = ${inputHelper.type.value}(exp(f32input - max_value) / sum);\n }\n }\n }`;\n };\n return {\n name: "AttentionProbsSoftmax",\n shaderCache: { hint: `${WG};${dataType};${components}` },\n getShaderSource,\n getRunData: () => ({ outputs: [], dispatchGroup: { x: n }, programUniforms })\n };\n };\n createAttentionProbsProgramInfo = (_context, q, key, relativePositionBias, parameters, attributes, pastSequenceLength) => {\n const totalSequenceLength = pastSequenceLength + parameters.kvSequenceLength;\n const probsShape = [parameters.batchSize, parameters.numHeads, parameters.sequenceLength, totalSequenceLength];\n const alpha = attributes.scale === 0 ? 1 / Math.sqrt(parameters.headSize) : attributes.scale;\n const components = getMaxComponents(parameters.headSize);\n const vectorizedHeadSize = parameters.headSize / components;\n const TILE_SIZE = 12;\n const dispatch = {\n x: Math.ceil(totalSequenceLength / TILE_SIZE),\n y: Math.ceil(parameters.sequenceLength / TILE_SIZE),\n z: parameters.batchSize * parameters.numHeads\n };\n const programUniforms = [\n { type: 12 /* uint32 */, data: parameters.sequenceLength },\n { type: 12 /* uint32 */, data: vectorizedHeadSize },\n { type: 12 /* uint32 */, data: totalSequenceLength },\n { type: 12 /* uint32 */, data: parameters.numHeads },\n { type: 1 /* float */, data: alpha }\n ];\n const inputDependencies = ["type", "type"];\n if (relativePositionBias) {\n inputDependencies.push("rank");\n programUniforms.push(...createTensorShapeVariables(relativePositionBias.dims));\n }\n const getShaderSource = (shaderHelper) => {\n const qInput = inputVariable("q", q.dataType, q.dims, components);\n const kInput = inputVariable("key", key.dataType, key.dims, components);\n const inputVars = [qInput, kInput];\n const relativePositionBiasInput = relativePositionBias ? inputVariable("relative_position_bias", relativePositionBias.dataType, relativePositionBias.dims.length) : void 0;\n if (relativePositionBiasInput) {\n inputVars.push(relativePositionBiasInput);\n }\n const output = outputVariable("output", q.dataType, probsShape);\n const f32Type = tensorTypeToWsglValueType(1 /* float */, components);\n const uniforms = [\n { name: "M", type: "u32" },\n { name: "K", type: "u32" },\n { name: "N", type: "u32" },\n { name: "num_heads", type: "u32" },\n { name: "alpha", type: "f32" }\n ];\n return `\n const TILE_SIZE = ${TILE_SIZE}u;\n\n var tileQ: array<${qInput.type.storage}, ${TILE_SIZE * TILE_SIZE}>;\n var tileK: array<${qInput.type.storage}, ${TILE_SIZE * TILE_SIZE}>;\n ${shaderHelper.registerUniforms(uniforms).declareVariables(...inputVars, output)}\n ${shaderHelper.mainStart([\n TILE_SIZE,\n TILE_SIZE,\n 1\n ])}\n // x holds the N and y holds the M\n let headIdx = workgroup_id.z;\n let m = workgroup_id.y * TILE_SIZE;\n let n = workgroup_id.x * TILE_SIZE;\n let qOffset = uniforms.M * uniforms.K * headIdx + m * uniforms.K;\n let kOffset = uniforms.N * uniforms.K * headIdx + n * uniforms.K;\n\n var value = ${f32Type}(0);\n for (var w: u32 = 0u; w < uniforms.K; w += TILE_SIZE) {\n if (global_id.y < uniforms.M && w + local_id.x < uniforms.K) {\n tileQ[TILE_SIZE * local_id.y + local_id.x] = q[qOffset + local_id.y * uniforms.K + w + local_id.x];\n }\n if (n + local_id.y < uniforms.N && w + local_id.x < uniforms.K) {\n tileK[TILE_SIZE * local_id.y + local_id.x] = key[kOffset + local_id.y * uniforms.K + w + local_id.x];\n }\n workgroupBarrier();\n\n for (var k: u32 = 0u; k < TILE_SIZE && w+k < uniforms.K; k++) {\n value += ${f32Type}(tileQ[TILE_SIZE * local_id.y + k] * tileK[TILE_SIZE * local_id.x + k]);\n }\n\n workgroupBarrier();\n }\n\n let headOffset = headIdx * uniforms.M * uniforms.N;\n if (global_id.y < uniforms.M && global_id.x < uniforms.N) {\n let outputIdx = headOffset + global_id.y * uniforms.N + global_id.x;\n var sum: f32 = ${(() => {\n switch (components) {\n case 1:\n return "value";\n case 2:\n return "value.x + value.y";\n case 4:\n return "value.x + value.y + value.z + value.w";\n default:\n throw new Error(`Unsupported components: ${components}`);\n }\n })()};\n\n ${(() => {\n if (relativePositionBiasInput) {\n return `\n let batch = workgroup_id.z / uniforms.num_heads;\n let head = workgroup_id.z % uniforms.num_heads;\n var indices = ${relativePositionBiasInput.type.indices}(batch, head, global_id.y, global_id.x);\n output[outputIdx] = ${output.type.value}(sum * uniforms.alpha) + ${relativePositionBiasInput.getByIndices("indices")};`;\n }\n return `output[outputIdx] = ${output.type.value} (sum * uniforms.alpha);`;\n })()}\n }\n }`;\n };\n return {\n name: "AttentionProbs",\n shaderCache: { hint: `${components}`, inputDependencies },\n getRunData: () => ({\n outputs: [{ dims: probsShape, dataType: q.dataType, gpuDataType: 0 /* default */ }],\n dispatchGroup: dispatch,\n programUniforms\n }),\n getShaderSource\n };\n };\n createVxAttentionScoreProgramInfo = (_context, probs, v, params, pastSequenceLength) => {\n const totalSequenceLength = pastSequenceLength + params.kvSequenceLength;\n const outputShape = [params.batchSize, params.sequenceLength, params.vHiddenSize];\n const TILE_SIZE = 12;\n const dispatch = {\n x: Math.ceil(params.vHeadSize / TILE_SIZE),\n y: Math.ceil(params.sequenceLength / TILE_SIZE),\n z: params.batchSize * params.numHeads\n };\n const programUniforms = [\n { type: 12 /* uint32 */, data: params.sequenceLength },\n { type: 12 /* uint32 */, data: totalSequenceLength },\n { type: 12 /* uint32 */, data: params.vHeadSize },\n { type: 12 /* uint32 */, data: params.numHeads },\n { type: 12 /* uint32 */, data: params.vHiddenSize }\n ];\n const inputDependencies = ["type", "type"];\n const getShaderSource = (shaderHelper) => {\n const probsHelper = inputVariable("probs", probs.dataType, probs.dims);\n const vHelper = inputVariable("v", v.dataType, v.dims);\n const output = outputVariable("output", probs.dataType, outputShape);\n const uniforms = [\n { name: "M", type: "u32" },\n { name: "K", type: "u32" },\n { name: "N", type: "u32" },\n { name: "num_heads", type: "u32" },\n { name: "v_hidden_size", type: "u32" }\n ];\n return `\n const TILE_SIZE = ${TILE_SIZE}u;\n var tileQ: array<${probsHelper.type.value}, ${TILE_SIZE * TILE_SIZE}>;\n var tileK: array<${probsHelper.type.value}, ${TILE_SIZE * TILE_SIZE}>;\n ${shaderHelper.registerUniforms(uniforms).declareVariables(probsHelper, vHelper, output)}\n ${shaderHelper.mainStart([\n TILE_SIZE,\n TILE_SIZE,\n 1\n ])}\n let headIdx = workgroup_id.z;\n let m = global_id.y;\n let n = global_id.x;\n\n let offsetA = headIdx * (uniforms.M * uniforms.K) + m * uniforms.K;\n let offsetB = headIdx * (uniforms.N * uniforms.K) + n;\n\n var value = ${probsHelper.type.storage}(0);\n for (var w: u32 = 0u; w < uniforms.K; w += TILE_SIZE) {\n if (m < uniforms.M && w + local_id.x < uniforms.K) {\n tileQ[TILE_SIZE * local_id.y + local_id.x] = probs[offsetA + w + local_id.x];\n }\n if (n < uniforms.N && w + local_id.y < uniforms.K) {\n tileK[TILE_SIZE * local_id.y + local_id.x] = v[offsetB + (w + local_id.y) * uniforms.N];\n }\n workgroupBarrier();\n for (var k: u32 = 0u; k < TILE_SIZE && w+k < uniforms.K; k++) {\n value += tileQ[TILE_SIZE * local_id.y + k] * tileK[TILE_SIZE * k + local_id.x];\n }\n workgroupBarrier();\n }\n\n // we need to transpose output from BNSH_v to BSND_v\n let batchIdx = workgroup_id.z / uniforms.num_heads;\n let currentBatchHeadNumber = workgroup_id.z % uniforms.num_heads;\n if (m < uniforms.M && n < uniforms.N) {\n let outputIdx = batchIdx * uniforms.M * uniforms.v_hidden_size + m * uniforms.v_hidden_size\n + currentBatchHeadNumber * uniforms.N + n;\n output[outputIdx] = value;\n }\n }`;\n };\n return {\n name: "AttentionScore",\n shaderCache: { inputDependencies },\n getRunData: () => ({\n outputs: [{ dims: outputShape, dataType: probs.dataType, gpuDataType: 0 /* default */ }],\n dispatchGroup: dispatch,\n programUniforms\n }),\n getShaderSource\n };\n };\n applyAttention = (context, q, k, v, _maskIndex, _past, pastKey, pastValue, relativePositionBias, parameters, attributes) => {\n const outputPresentKey = context.outputCount > 1;\n const outputPresentValue = context.outputCount > 2;\n const pastSequenceLength = outputPresentKey && outputPresentValue ? parameters.pastSequenceLength : 0;\n const totalSequenceLength = pastSequenceLength + parameters.kvSequenceLength;\n const presentKeyShape = [parameters.batchSize, parameters.numHeads, totalSequenceLength, parameters.headSize];\n const concatKeyInputs = pastKey ? [pastKey, k] : [k];\n const key = outputPresentKey ? context.compute(\n createConcatProgramInfo(concatKeyInputs, 2, presentKeyShape, k.dataType),\n { inputs: concatKeyInputs, outputs: [1] }\n )[0] : k;\n const presentValueShape = [parameters.batchSize, parameters.numHeads, totalSequenceLength, parameters.headSize];\n const concatValueInputs = pastValue ? [pastValue, v] : [v];\n const value = outputPresentValue ? context.compute(\n createConcatProgramInfo(concatValueInputs, 2, presentValueShape, v.dataType),\n { inputs: concatValueInputs, outputs: [2] }\n )[0] : v;\n const inputsK = [q, key];\n if (relativePositionBias) {\n inputsK.push(relativePositionBias);\n }\n const probs = context.compute(\n createAttentionProbsProgramInfo(\n context,\n q,\n key,\n relativePositionBias,\n parameters,\n attributes,\n pastSequenceLength\n ),\n { inputs: inputsK, outputs: [-1] }\n )[0];\n context.compute(\n createInPlaceSoftmaxProgramInfo(\n context,\n probs,\n parameters.batchSize * parameters.numHeads * parameters.sequenceLength,\n totalSequenceLength\n ),\n { inputs: [probs], outputs: [] }\n );\n const inputsV = [probs, value];\n context.compute(\n createVxAttentionScoreProgramInfo(context, probs, value, parameters, pastSequenceLength),\n { inputs: inputsV, outputs: [0] }\n );\n };\n prepare = (context, parameters) => {\n const outputShape = [\n parameters.batchSize,\n parameters.numHeads,\n parameters.sequenceLength,\n parameters.headSize\n ];\n const M = parameters.sequenceLength;\n const K = parameters.inputHiddenSize;\n const N = parameters.headSize;\n const TILE_SIZE = 12;\n const dispatch = {\n x: Math.ceil(parameters.headSize / TILE_SIZE),\n y: Math.ceil(parameters.sequenceLength / TILE_SIZE),\n z: parameters.batchSize * parameters.numHeads\n };\n const inputs = [context.inputs[0], context.inputs[1], context.inputs[2]];\n const programUniforms = [\n { type: 12 /* uint32 */, data: M },\n { type: 12 /* uint32 */, data: K },\n { type: 12 /* uint32 */, data: N },\n { type: 12 /* uint32 */, data: parameters.numHeads },\n { type: 12 /* uint32 */, data: parameters.headSize },\n { type: 12 /* uint32 */, data: parameters.hiddenSize },\n { type: 12 /* uint32 */, data: parameters.hiddenSize + parameters.hiddenSize + parameters.vHiddenSize }\n ];\n const getShaderSource = (shaderHelper) => {\n const outputQ = outputVariable("output_q", inputs[0].dataType, outputShape);\n const outputK = outputVariable("output_k", inputs[0].dataType, outputShape);\n const outputV = outputVariable("output_v", inputs[0].dataType, outputShape);\n const input = inputVariable("input", inputs[0].dataType, inputs[0].dims);\n const weight = inputVariable("weight", inputs[1].dataType, inputs[1].dims);\n const bias = inputVariable("bias", inputs[2].dataType, inputs[2].dims);\n const dataType = input.type.storage;\n const uniforms = [\n { name: "M", type: "u32" },\n { name: "K", type: "u32" },\n { name: "N", type: "u32" },\n { name: "num_heads", type: "u32" },\n { name: "head_size", type: "u32" },\n { name: "hidden_size", type: "u32" },\n { name: "ldb", type: "u32" }\n ];\n return `\n const TILE_SIZE = ${TILE_SIZE}u;\n var tileInput: array<${dataType}, ${TILE_SIZE * TILE_SIZE}>;\n var tileWeightQ: array<${dataType}, ${TILE_SIZE * TILE_SIZE}>;\n var tileWeightK: array<${dataType}, ${TILE_SIZE * TILE_SIZE}>;\n var tileWeightV: array<${dataType}, ${TILE_SIZE * TILE_SIZE}>;\n ${shaderHelper.registerUniforms(uniforms).declareVariables(input, weight, bias, outputQ, outputK, outputV)}\n ${shaderHelper.mainStart([\n TILE_SIZE,\n TILE_SIZE,\n 1\n ])}\n let batchIndex = workgroup_id.z / uniforms.num_heads;\n let headNumber = workgroup_id.z % uniforms.num_heads;\n let m = global_id.y;\n let n = global_id.x;\n\n let inputOffset = batchIndex * (uniforms.M * uniforms.K) + m * uniforms.K;\n let biasOffsetQ = headNumber * uniforms.head_size;\n let biasOffsetK = uniforms.hidden_size + biasOffsetQ;\n let biasOffsetV = uniforms.hidden_size + biasOffsetK;\n\n var valueQ = ${dataType}(0);\n var valueK = ${dataType}(0);\n var valueV = ${dataType}(0);\n for (var w: u32 = 0u; w < uniforms.K; w += TILE_SIZE) {\n if (m < uniforms.M && w + local_id.x < uniforms.K) {\n tileInput[TILE_SIZE * local_id.y + local_id.x] = input[inputOffset + w + local_id.x];\n }\n if (n < uniforms.N && w + local_id.y < uniforms.K) {\n let offset = n + (w + local_id.y) * uniforms.ldb;\n tileWeightQ[TILE_SIZE * local_id.y + local_id.x] = weight[biasOffsetQ + offset];\n tileWeightK[TILE_SIZE * local_id.y + local_id.x] = weight[biasOffsetK + offset];\n tileWeightV[TILE_SIZE * local_id.y + local_id.x] = weight[biasOffsetV + offset];\n }\n workgroupBarrier();\n for (var k: u32 = 0u; k ({\n outputs: [\n { dims: outputShape, dataType: context.inputs[0].dataType, gpuDataType: 0 /* default */ },\n { dims: outputShape, dataType: context.inputs[0].dataType, gpuDataType: 0 /* default */ },\n { dims: outputShape, dataType: context.inputs[0].dataType, gpuDataType: 0 /* default */ }\n ],\n dispatchGroup: dispatch,\n programUniforms\n }),\n getShaderSource\n },\n { inputs, outputs: [-1, -1, -1] }\n );\n };\n attention = (context, attributes) => {\n const params = validateAttentionInputs(context.inputs, attributes);\n const [q, k, v] = prepare(context, params);\n return applyAttention(\n context,\n q,\n k,\n v,\n context.inputs[4],\n void 0,\n void 0,\n void 0,\n context.inputs[5],\n params,\n attributes\n );\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/batch-norm.ts\n var validateInputs5, createBatchNormInferenceProgramInfo, parseBatchNormAttributes, batchNorm;\n var init_batch_norm = __esm({\n "web/lib/wasm/jsep/webgpu/ops/batch-norm.ts"() {\n "use strict";\n init_esm();\n init_wasm_common();\n init_util();\n init_attribute_with_cache_key();\n init_common();\n validateInputs5 = (inputs, attributes) => {\n if (!inputs || inputs.length !== 5) {\n throw new Error("BatchNormalization requires 5 inputs");\n }\n const checkShapeEqual = (actual, expected, message) => {\n const r = expected.length;\n if (r !== actual.length) {\n throw new Error(`${message}: num dimensions != ${r}`);\n }\n expected.forEach((v, i) => {\n if (v !== actual[i]) {\n throw new Error(`${message}: dim[${i}] do not match`);\n }\n });\n };\n if (inputs[0].dims.length > 1) {\n const shape = attributes.format === "NHWC" ? attributes.spatial ? inputs[0].dims.slice(-1) : inputs[0].dims.slice(-1).concat(inputs[0].dims.slice(1, inputs[0].dims.length - 1)) : inputs[0].dims.slice(1, attributes.spatial ? 2 : void 0);\n checkShapeEqual(inputs[1].dims, shape, "Invalid input scale");\n checkShapeEqual(inputs[2].dims, shape, "Invalid input B");\n checkShapeEqual(inputs[3].dims, shape, "Invalid input mean");\n checkShapeEqual(inputs[4].dims, shape, "Invalid input var");\n } else {\n checkShapeEqual(inputs[1].dims, [1], "Invalid input scale");\n checkShapeEqual(inputs[2].dims, [1], "Invalid input B");\n checkShapeEqual(inputs[3].dims, [1], "Invalid input mean");\n checkShapeEqual(inputs[4].dims, [1], "Invalid input var");\n }\n };\n createBatchNormInferenceProgramInfo = (inputs, attributes) => {\n const { epsilon, spatial, format } = attributes;\n const yShape = inputs[0].dims;\n const components = spatial ? getMaxComponents(yShape[yShape.length - 1]) : 1;\n const cComponents = format === "NHWC" && yShape.length > 1 ? components : 1;\n const outputSize = ShapeUtil.size(yShape) / components;\n const useShapesUniforms = spatial;\n const shapeOrRank = useShapesUniforms ? yShape.length : yShape;\n const x = inputVariable("x", inputs[0].dataType, inputs[0].dims, components);\n const scale = inputVariable("scale", inputs[1].dataType, inputs[1].dims, cComponents);\n const bias = inputVariable("bias", inputs[2].dataType, inputs[2].dims, cComponents);\n const inputMean = inputVariable("inputMean", inputs[3].dataType, inputs[3].dims, cComponents);\n const inputVar = inputVariable("inputVar", inputs[4].dataType, inputs[4].dims, cComponents);\n const y = outputVariable("y", inputs[0].dataType, shapeOrRank, components);\n const calcCOffset = () => {\n let cOffset = "";\n if (spatial) {\n cOffset = `let cOffset = ${yShape.length === 1 ? "0u" : format === "NHWC" ? `outputIndices[${yShape.length - 1}] / ${components}` : "outputIndices[1]"};`;\n } else {\n if (format === "NCHW") {\n cOffset = `\n ${y.indicesSet("outputIndices", "0", "0")}\n let cOffset = ${y.indicesToOffset("outputIndices")};`;\n } else {\n cOffset = `var cIndices = ${scale.type.indices}(0);\n cIndices[0] = outputIndices[${yShape.length - 1}];`;\n for (let i = 1; i < scale.rank; i++) {\n cOffset += `cIndices[${i}] = outputIndices[${i}];`;\n }\n cOffset += `let cOffset = ${scale.indicesToOffset("cIndices")};`;\n }\n }\n return cOffset;\n };\n const getInferenceModeShaderSource = (helper) => `\n const epsilon = ${epsilon};\n ${helper.registerUniform("outputSize", "u32").declareVariables(x, scale, bias, inputMean, inputVar, y)}\n ${helper.mainStart()}\n ${helper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")}\n var outputIndices = ${y.offsetToIndices(`global_idx * ${components}`)};\n ${calcCOffset()}\n let scale = ${scale.getByOffset("cOffset")};\n let bias = ${bias.getByOffset("cOffset")};\n let inputMean = ${inputMean.getByOffset("cOffset")};\n let inputVar = ${inputVar.getByOffset("cOffset")};\n let x = ${x.getByOffset("global_idx")};\n let value = (x - inputMean) * inverseSqrt(inputVar + epsilon) * scale + bias;\n ${y.setByOffset("global_idx", "value")}\n }`;\n return {\n name: "BatchNormalization",\n shaderCache: {\n hint: `${attributes.epsilon}_${attributes.format}_${spatial}_${components}`,\n inputDependencies: useShapesUniforms ? ["rank", "type", "type", "type", "type"] : void 0\n },\n getShaderSource: getInferenceModeShaderSource,\n getRunData: () => ({\n outputs: [{ dims: inputs[0].dims, dataType: inputs[0].dataType }],\n dispatchGroup: { x: Math.ceil(\n outputSize / 64\n /* workgroup size */\n ) },\n programUniforms: useShapesUniforms ? [\n { type: 12 /* uint32 */, data: outputSize },\n ...createTensorShapeVariables(yShape)\n ] : [\n { type: 12 /* uint32 */, data: outputSize }\n ]\n })\n };\n };\n parseBatchNormAttributes = (attributes) => createAttributeWithCacheKey(attributes);\n batchNorm = (context, attributes) => {\n const { inputs, outputCount } = context;\n const updatedAttributes = parseBatchNormAttributes({ ...attributes, outputCount });\n if (env2.webgpu.validateInputContent) {\n validateInputs5(inputs, updatedAttributes);\n }\n if (attributes.trainingMode) {\n throw new Error("BatchNormalization trainingMode is not supported yet.");\n } else {\n context.compute(createBatchNormInferenceProgramInfo(inputs, updatedAttributes));\n }\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/bias-add.ts\n var validateInputs6, createBiasAddProgramInfo, biasAdd;\n var init_bias_add = __esm({\n "web/lib/wasm/jsep/webgpu/ops/bias-add.ts"() {\n "use strict";\n init_util();\n init_common();\n validateInputs6 = (inputs) => {\n if (inputs[0].dims.length !== 3) {\n throw new Error("input should have 3 dimensions");\n }\n if (![320, 640, 1280].includes(inputs[0].dims[2])) {\n throw new Error("number of channels should be 320, 640 or 1280");\n }\n if (inputs[1].dims.length !== 1) {\n throw new Error("bias is expected to have 1 dimensions");\n }\n if (inputs[0].dims[2] !== inputs[1].dims[0]) {\n throw new Error("last dimension of input and bias are not the same");\n }\n };\n createBiasAddProgramInfo = (inputs) => {\n const outputShape = inputs[0].dims;\n const channels = inputs[0].dims[2];\n const outputSize = ShapeUtil.size(outputShape) / 4;\n const dataType = inputs[0].dataType;\n const input = inputVariable("input", dataType, outputShape, 4);\n const bias = inputVariable("bias", dataType, [channels], 4);\n const residual = inputVariable("residual", dataType, outputShape, 4);\n const output = outputVariable("output", dataType, outputShape, 4);\n const getShaderSource = (shaderHelper) => `\n const channels = ${channels}u / 4;\n ${shaderHelper.declareVariables(input, bias, residual, output)}\n\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes(outputSize)}\n let value = ${input.getByOffset("global_idx")}\n + ${bias.getByOffset("global_idx % channels")} + ${residual.getByOffset("global_idx")};\n ${output.setByOffset("global_idx", "value")}\n }`;\n return {\n name: "BiasAdd",\n getRunData: () => ({\n outputs: [{ dims: outputShape, dataType: inputs[0].dataType }],\n dispatchGroup: { x: Math.ceil(\n outputSize / 64\n /* workgroup size */\n ) }\n }),\n getShaderSource\n };\n };\n biasAdd = (context) => {\n validateInputs6(context.inputs);\n context.compute(createBiasAddProgramInfo(context.inputs));\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/unary-op.ts\n var createElementwiseProgramShader, createElementwiseProgramInfo, abs, acos, acosh, asin, asinh, atan, atanh, parseCastAttributes, cast, generateClipAttributesFromInputs, clip, ceil, cos, cosh, parseAlphaAttributes, elu, erfImpl, erf, exp, floor, gelu, leakyRelu, not, neg, reciprocal, relu, sigmoid, parseHardSigmoidAttributes, hardSigmoid, sin, sinh, sqrt, tan, tanhExpression, tanh, fastGeluImpl, fastGeluExpression, fastGelu, thresholdedRelu, log;\n var init_unary_op = __esm({\n "web/lib/wasm/jsep/webgpu/ops/unary-op.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_attribute_with_cache_key();\n init_common();\n createElementwiseProgramShader = (shaderHelper, datasize, inputDataType, outputDataType, funcCall, additionalImplementation) => {\n const vecSize = Math.ceil(datasize / 4);\n let expression = "";\n if (typeof funcCall === "string") {\n expression = `${funcCall}(a)`;\n } else {\n expression = funcCall("a");\n }\n const input = inputVariable("inputData", inputDataType, [vecSize], 4);\n const output = outputVariable("outputData", outputDataType, [vecSize], 4);\n return `\n ${shaderHelper.registerUniform("vec_size", "u32").declareVariables(input, output)}\n\n ${additionalImplementation ?? ""}\n\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.vec_size")}\n\n let a = ${input.getByOffset("global_idx")};\n ${output.setByOffset("global_idx", expression)}\n }`;\n };\n createElementwiseProgramInfo = (input, name, funcCall, additionalImplementation, cacheKey, outputDataType = input.dataType) => ({\n name,\n shaderCache: { hint: cacheKey, inputDependencies: ["type"] },\n getShaderSource: (shaderHelper) => createElementwiseProgramShader(\n shaderHelper,\n ShapeUtil.size(input.dims),\n input.dataType,\n outputDataType,\n funcCall,\n additionalImplementation\n ),\n getRunData: (inputTensors) => ({\n outputs: [{ dims: input.dims, dataType: outputDataType }],\n dispatchGroup: { x: Math.ceil(\n ShapeUtil.size(inputTensors[0].dims) / 64 / 4\n /* vec size */\n ) },\n programUniforms: [\n { type: 12 /* uint32 */, data: Math.ceil(ShapeUtil.size(input.dims) / 4) }\n ]\n })\n });\n abs = (context) => {\n context.compute(createElementwiseProgramInfo(context.inputs[0], "Abs", "abs"));\n };\n acos = (context) => {\n context.compute(createElementwiseProgramInfo(context.inputs[0], "Acos", "acos"));\n };\n acosh = (context) => {\n context.compute(createElementwiseProgramInfo(context.inputs[0], "Acosh", "acosh"));\n };\n asin = (context) => {\n context.compute(createElementwiseProgramInfo(context.inputs[0], "Asin", "asin"));\n };\n asinh = (context) => {\n context.compute(createElementwiseProgramInfo(context.inputs[0], "Asinh", "asinh"));\n };\n atan = (context) => {\n context.compute(createElementwiseProgramInfo(context.inputs[0], "Atan", "atan"));\n };\n atanh = (context) => {\n context.compute(createElementwiseProgramInfo(context.inputs[0], "Atanh", "atanh"));\n };\n parseCastAttributes = (attributes) => createAttributeWithCacheKey(attributes);\n cast = (context, attributes) => {\n let func;\n switch (attributes.to) {\n case 10 /* float16 */:\n func = "vec4";\n break;\n case 1 /* float */:\n func = "vec4";\n break;\n case 12 /* uint32 */:\n func = "vec4";\n break;\n case 6 /* int32 */:\n func = "vec4";\n break;\n case 9 /* bool */:\n func = "vec4";\n break;\n default:\n throw new RangeError(`not supported type (specified in attribute \'to\' from \'Cast\' operator): ${attributes.to}`);\n }\n context.compute(\n createElementwiseProgramInfo(context.inputs[0], "Cast", func, void 0, attributes.cacheKey, attributes.to)\n );\n };\n generateClipAttributesFromInputs = (inputs) => {\n const min = inputs.length >= 2 && inputs[1].data !== 0 ? inputs[1].getFloat32Array()[0] : MIN_CLIP;\n const max = inputs.length >= 3 && inputs[2].data !== 0 ? inputs[2].getFloat32Array()[0] : MAX_CLIP;\n return createAttributeWithCacheKey({ min, max });\n };\n clip = (context, clipAttributes) => {\n const attributes = context.inputs.length === 1 ? clipAttributes : generateClipAttributesFromInputs(context.inputs);\n const dataType = tensorTypeToWsglValueType(context.inputs[0].dataType);\n context.compute(\n createElementwiseProgramInfo(\n context.inputs[0],\n "Clip",\n (a) => `clamp(${a}, clip_min_, clip_max_)`,\n `\n const clip_min_: vec4<${dataType}> = vec4(${dataType}(${attributes.min}));\n const clip_max_: vec4<${dataType}> = vec4(${dataType}(${attributes.max}));\n`,\n attributes.cacheKey\n ),\n { inputs: [0] }\n );\n };\n ceil = (context) => {\n context.compute(createElementwiseProgramInfo(context.inputs[0], "Ceil", "ceil"));\n };\n cos = (context) => {\n context.compute(createElementwiseProgramInfo(context.inputs[0], "Cos", "cos"));\n };\n cosh = (context) => {\n context.compute(createElementwiseProgramInfo(context.inputs[0], "Cosh", "cosh"));\n };\n parseAlphaAttributes = (attributes) => createAttributeWithCacheKey(attributes);\n elu = (context, attributes) => {\n const dataType = tensorTypeToWsglValueType(context.inputs[0].dataType);\n context.compute(createElementwiseProgramInfo(\n context.inputs[0],\n "Elu",\n (a) => `elu_vf32(${a})`,\n `\n const elu_alpha_ = ${dataType}(${attributes.alpha});\n\n fn elu_f32(a: ${dataType}) -> ${dataType} {\n return select((exp(a) - 1.0) * elu_alpha_, a, a >= 0.0);\n }\n\n fn elu_vf32(v: vec4<${dataType}>) -> vec4<${dataType}> {\n return vec4(elu_f32(v.x), elu_f32(v.y), elu_f32(v.z), elu_f32(v.w));\n }`,\n attributes.cacheKey\n ));\n };\n erfImpl = (varType = "f32") => `\nconst r0: ${varType} = 0.3275911;\nconst r1: ${varType} = 0.254829592;\nconst r2: ${varType} = -0.284496736;\nconst r3: ${varType} = 1.421413741;\nconst r4: ${varType} = -1.453152027;\nconst r5: ${varType} = 1.061405429;\n\nfn erf_vf32(v: vec4<${varType}>) -> vec4<${varType}> {\n let absv = abs(v);\n let x = 1.0 / (1.0 + r0 * absv);\n return sign(v) * (1.0 - ((((r5 * x + r4) * x + r3) * x + r2) * x + r1) * x * exp(-absv * absv));\n}`;\n erf = (context) => {\n const dataType = tensorTypeToWsglValueType(context.inputs[0].dataType);\n context.compute(createElementwiseProgramInfo(context.inputs[0], "Erf", (a) => `erf_vf32(${a})`, erfImpl(dataType)));\n };\n exp = (context) => {\n context.compute(createElementwiseProgramInfo(context.inputs[0], "Exp", "exp"));\n };\n floor = (context) => {\n context.compute(createElementwiseProgramInfo(context.inputs[0], "Floor", "floor"));\n };\n gelu = (context) => {\n const dataType = tensorTypeToWsglValueType(context.inputs[0].dataType);\n context.compute(createElementwiseProgramInfo(\n context.inputs[0],\n "Gelu",\n (a) => `0.5 * ${a} * (1.0 + erf_vf32(${a} * 0.7071067811865475))`,\n erfImpl(dataType)\n ));\n };\n leakyRelu = (context, attributes) => {\n const dataType = tensorTypeToWsglValueType(context.inputs[0].dataType);\n context.compute(createElementwiseProgramInfo(\n context.inputs[0],\n "LeakyRelu",\n (a) => `select(leaky_relu_alpha_ * ${a}, ${a}, ${a} >= vec4<${dataType}>(0.0))`,\n `const leaky_relu_alpha_ = ${dataType}(${attributes.alpha});`,\n attributes.cacheKey\n ));\n };\n not = (context) => {\n context.compute(createElementwiseProgramInfo(context.inputs[0], "Not", (a) => `!${a}`));\n };\n neg = (context) => {\n context.compute(createElementwiseProgramInfo(context.inputs[0], "Neg", (a) => `-${a}`));\n };\n reciprocal = (context) => {\n context.compute(createElementwiseProgramInfo(context.inputs[0], "Reciprocal", (a) => `1.0/${a}`));\n };\n relu = (context) => {\n const dataType = tensorTypeToWsglValueType(context.inputs[0].dataType);\n context.compute(createElementwiseProgramInfo(\n context.inputs[0],\n "Relu",\n (a) => `select(vec4<${dataType}>(0.0), ${a}, ${a} > vec4<${dataType}>(0.0))`\n ));\n };\n sigmoid = (context) => {\n context.compute(createElementwiseProgramInfo(context.inputs[0], "Sigmoid", (a) => `(1.0 / (1.0 + exp(-${a})))`));\n };\n parseHardSigmoidAttributes = (attributes) => createAttributeWithCacheKey(attributes);\n hardSigmoid = (context, attributes) => {\n const dataType = tensorTypeToWsglValueType(context.inputs[0].dataType);\n context.compute(createElementwiseProgramInfo(\n context.inputs[0],\n "HardSigmoid",\n (a) => `max(vec4<${dataType}>(0.0), min(vec4<${dataType}>(1.0), ${attributes.alpha} * ${a} + vec4<${dataType}>(${attributes.beta})))`,\n void 0,\n attributes.cacheKey\n ));\n };\n sin = (context) => {\n context.compute(createElementwiseProgramInfo(context.inputs[0], "Sin", "sin"));\n };\n sinh = (context) => {\n context.compute(createElementwiseProgramInfo(context.inputs[0], "Sinh", "sinh"));\n };\n sqrt = (context) => {\n context.compute(createElementwiseProgramInfo(context.inputs[0], "Sqrt", "sqrt"));\n };\n tan = (context) => {\n context.compute(createElementwiseProgramInfo(context.inputs[0], "Tan", "tan"));\n };\n tanhExpression = (a) => `sign(${a}) * (1 - exp(-2 * abs(${a}))) / (1 + exp(-2 * abs(${a})))`;\n tanh = (context) => {\n context.compute(createElementwiseProgramInfo(context.inputs[0], "Tanh", tanhExpression));\n };\n fastGeluImpl = (varType = "f32") => `\nconst fast_gelu_a: ${varType} = 0.5;\nconst fast_gelu_b: ${varType} = 0.7978845608028654;\nconst fast_gelu_c: ${varType} = 0.035677408136300125;\n\nfn tanh_v(v: vec4<${varType}>) -> vec4<${varType}> {\n return ${tanhExpression("v")};\n}\n`;\n fastGeluExpression = (x) => `(fast_gelu_a + fast_gelu_a * tanh_v(${x} * (fast_gelu_c * ${x} * ${x} + fast_gelu_b))) * ${x}`;\n fastGelu = (context) => {\n const dataType = tensorTypeToWsglValueType(context.inputs[0].dataType);\n context.compute(createElementwiseProgramInfo(\n context.inputs[0],\n "FastGelu",\n fastGeluExpression,\n fastGeluImpl(dataType),\n void 0,\n context.inputs[0].dataType\n ));\n };\n thresholdedRelu = (context, attributes) => {\n const dataType = tensorTypeToWsglValueType(context.inputs[0].dataType);\n context.compute(createElementwiseProgramInfo(\n context.inputs[0],\n "ThresholdedRelu",\n (a) => `select(vec4<${dataType}>(0.0), ${a}, ${a} > thresholded_relu_alpha_)`,\n `const thresholded_relu_alpha_ = vec4<${dataType}>(${attributes.alpha});`,\n attributes.cacheKey\n ));\n return 0;\n };\n log = (context) => {\n context.compute(createElementwiseProgramInfo(context.inputs[0], "Log", "log"));\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/bias-split-gelu.ts\n var validateInputs7, createBiasSplitGeluProgramInfo, biasSplitGelu;\n var init_bias_split_gelu = __esm({\n "web/lib/wasm/jsep/webgpu/ops/bias-split-gelu.ts"() {\n "use strict";\n init_util();\n init_common();\n init_unary_op();\n validateInputs7 = (inputs) => {\n if (inputs[0].dims.length !== 3) {\n throw new Error("input should have 3 dimensions");\n }\n if (![2560, 5120, 10240].includes(inputs[0].dims[2])) {\n throw new Error("hidden state should be 2560, 5120 or 10240");\n }\n if (inputs[1].dims.length !== 1) {\n throw new Error("bias is expected to have 1 dimensions");\n }\n if (inputs[0].dims[2] !== inputs[1].dims[0]) {\n throw new Error("last dimension of input and bias are not the same");\n }\n };\n createBiasSplitGeluProgramInfo = (inputs) => {\n const outputShape = inputs[0].dims.slice();\n outputShape[2] = outputShape[2] / 2;\n const input = inputVariable("input", inputs[0].dataType, inputs[0].dims, 4);\n const bias = inputVariable("bias", inputs[0].dataType, [inputs[0].dims[2]], 4);\n const output = outputVariable("output", inputs[0].dataType, outputShape, 4);\n const outputSize = ShapeUtil.size(outputShape) / 4;\n const dataType = tensorTypeToWsglStorageType(inputs[0].dataType);\n const getShaderSource = (shaderHelper) => `\n const M_SQRT2 = sqrt(2.0);\n const halfChannels = ${inputs[0].dims[2] / 4 / 2}u;\n\n ${shaderHelper.declareVariables(input, bias, output)}\n\n ${erfImpl(dataType)}\n\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes(outputSize)}\n let biasIdx = global_idx % halfChannels;\n let batchIndex = global_idx / halfChannels;\n let inputOffset = biasIdx + batchIndex * halfChannels * 2;\n let valueLeft = input[inputOffset] + bias[biasIdx];\n let valueRight = input[inputOffset + halfChannels] + bias[biasIdx + halfChannels];\n let geluRight = valueRight * 0.5 * (erf_vf32(valueRight / M_SQRT2) + 1);\n\n ${output.setByOffset("global_idx", "valueLeft * geluRight")}\n }`;\n return {\n name: "BiasSplitGelu",\n getRunData: () => ({\n outputs: [{ dims: outputShape, dataType: inputs[0].dataType }],\n dispatchGroup: { x: Math.ceil(\n outputSize / 64\n /* workgroup size */\n ) }\n }),\n getShaderSource\n };\n };\n biasSplitGelu = (context) => {\n validateInputs7(context.inputs);\n context.compute(createBiasSplitGeluProgramInfo(context.inputs));\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/binary-op.ts\n var createBinaryOpProgramShader, createBinaryOpProgramInfo, runBinaryOp, add, div, equal, mul, pow, sub, greater, less, greaterOrEqual, lessOrEqual;\n var init_binary_op = __esm({\n "web/lib/wasm/jsep/webgpu/ops/binary-op.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_common();\n createBinaryOpProgramShader = (shaderHelper, dimsA, dimsB, dimsOutput, vectorize, doBroadcast, sharedDimensionDivisibleBy4, funcCall, typeA, typeB, typeOutput, additionalImplementation) => {\n let expressionScalar;\n let expressionVector;\n if (typeof funcCall === "string") {\n expressionScalar = expressionVector = (a2, b2) => `${funcCall}((${a2}),(${b2}))`;\n } else if (typeof funcCall === "function") {\n expressionScalar = expressionVector = funcCall;\n } else {\n expressionScalar = funcCall.scalar;\n expressionVector = funcCall.vector;\n }\n const output = outputVariable("outputData", typeOutput, dimsOutput.length, 4);\n const a = inputVariable("aData", typeA, dimsA.length, 4);\n const b = inputVariable("bData", typeB, dimsB.length, 4);\n let assignment;\n if (vectorize) {\n if (doBroadcast) {\n const isAOneElement = ShapeUtil.size(dimsA) === 1;\n const isBOneElement = ShapeUtil.size(dimsB) === 1;\n const aLastDimDivisibleBy4 = dimsA.length > 0 && dimsA[dimsA.length - 1] % 4 === 0;\n const bLastDimDivisibleBy4 = dimsB.length > 0 && dimsB[dimsB.length - 1] % 4 === 0;\n if (isAOneElement || isBOneElement) {\n assignment = output.setByOffset(\n "global_idx",\n expressionVector(\n isAOneElement ? `${a.type.value}(${a.getByOffset("0")}.x)` : a.getByOffset("global_idx"),\n isBOneElement ? `${b.type.value}(${b.getByOffset("0")}.x)` : b.getByOffset("global_idx")\n )\n );\n } else {\n assignment = `\n let outputIndices = ${output.offsetToIndices("global_idx * 4u")};\n let offsetA = ${a.broadcastedIndicesToOffset("outputIndices", output)};\n let offsetB = ${b.broadcastedIndicesToOffset("outputIndices", output)};\n ${output.setByOffset(\n "global_idx",\n expressionVector(\n sharedDimensionDivisibleBy4 || aLastDimDivisibleBy4 ? a.getByOffset("offsetA / 4u") : `${a.type.value}(${a.getByOffset("offsetA / 4u")}[offsetA % 4u])`,\n sharedDimensionDivisibleBy4 || bLastDimDivisibleBy4 ? b.getByOffset("offsetB / 4u") : `${b.type.value}(${b.getByOffset("offsetB / 4u")}[offsetB % 4u])`\n )\n )}\n `;\n }\n } else {\n assignment = output.setByOffset(\n "global_idx",\n expressionVector(a.getByOffset("global_idx"), b.getByOffset("global_idx"))\n );\n }\n } else {\n if (!doBroadcast) {\n throw new Error("no necessary to use scalar implementation for element-wise binary op implementation.");\n }\n const singleAssignment = (resStr, x, typeCast = "") => {\n const expressionA = `aData[indexA${x}][componentA${x}]`;\n const expressionB = `bData[indexB${x}][componentB${x}]`;\n return `\n let outputIndices${x} = ${output.offsetToIndices(`global_idx * 4u + ${x}u`)};\n let offsetA${x} = ${a.broadcastedIndicesToOffset(`outputIndices${x}`, output)};\n let offsetB${x} = ${b.broadcastedIndicesToOffset(`outputIndices${x}`, output)};\n let indexA${x} = offsetA${x} / 4u;\n let indexB${x} = offsetB${x} / 4u;\n let componentA${x} = offsetA${x} % 4u;\n let componentB${x} = offsetB${x} % 4u;\n ${resStr}[${x}] = ${typeCast}(${expressionScalar(expressionA, expressionB)});\n `;\n };\n if (typeOutput === 9 /* bool */) {\n assignment = `\n var data = vec4(0);\n ${singleAssignment("data", 0, "u32")}\n ${singleAssignment("data", 1, "u32")}\n ${singleAssignment("data", 2, "u32")}\n ${singleAssignment("data", 3, "u32")}\n outputData[global_idx] = dot(vec4(0x1, 0x100, 0x10000, 0x1000000), vec4(data));`;\n } else {\n assignment = `\n ${singleAssignment("outputData[global_idx]", 0)}\n ${singleAssignment("outputData[global_idx]", 1)}\n ${singleAssignment("outputData[global_idx]", 2)}\n ${singleAssignment("outputData[global_idx]", 3)}\n `;\n }\n }\n return `\n ${shaderHelper.registerUniform("vec_size", "u32").declareVariables(a, b, output)}\n\n ${additionalImplementation ?? ""}\n\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.vec_size")}\n ${assignment}\n }`;\n };\n createBinaryOpProgramInfo = (name, cacheKey, a, b, funcCall, additionalImplementation, outputDataType = a.dataType) => {\n const isBroadcast = !ShapeUtil.areEqual(a.dims, b.dims);\n let outputShape = a.dims;\n let outputSize = ShapeUtil.size(a.dims);\n let vectorize = false;\n let sharedDimensionDivisibleBy4 = false;\n const cacheKeyAux = [isBroadcast];\n if (isBroadcast) {\n const calculatedShape = BroadcastUtil.calcShape(a.dims, b.dims, false);\n if (!calculatedShape) {\n throw new Error("Can\'t perform binary op on the given tensors");\n }\n outputShape = calculatedShape;\n outputSize = ShapeUtil.size(outputShape);\n const isAOneElement = ShapeUtil.size(a.dims) === 1;\n const isBOneElement = ShapeUtil.size(b.dims) === 1;\n const aLastDimDivisibleBy4 = a.dims.length > 0 && a.dims[a.dims.length - 1] % 4 === 0;\n const bLastDimDivisibleBy4 = b.dims.length > 0 && b.dims[b.dims.length - 1] % 4 === 0;\n cacheKeyAux.push(isAOneElement);\n cacheKeyAux.push(isBOneElement);\n cacheKeyAux.push(aLastDimDivisibleBy4);\n cacheKeyAux.push(bLastDimDivisibleBy4);\n let sharedDimension = 1;\n for (let i = 1; i < outputShape.length; i++) {\n const dimA = a.dims[a.dims.length - i] ?? 1;\n const dimB = b.dims[b.dims.length - i] ?? 1;\n if (dimA === dimB) {\n sharedDimension *= dimA;\n } else {\n break;\n }\n }\n if (sharedDimension % 4 === 0) {\n sharedDimensionDivisibleBy4 = true;\n vectorize = true;\n } else if (isAOneElement || isBOneElement || aLastDimDivisibleBy4 || bLastDimDivisibleBy4) {\n vectorize = true;\n }\n } else {\n vectorize = true;\n }\n cacheKeyAux.push(vectorize);\n return {\n name,\n shaderCache: {\n hint: cacheKey + cacheKeyAux.map((x) => x.toString()).join("_"),\n inputDependencies: ["rank", "rank"]\n },\n getShaderSource: (shaderHelper) => createBinaryOpProgramShader(\n shaderHelper,\n a.dims,\n b.dims,\n outputShape,\n vectorize,\n isBroadcast,\n sharedDimensionDivisibleBy4,\n funcCall,\n a.dataType,\n b.dataType,\n outputDataType,\n additionalImplementation\n ),\n getRunData: () => ({\n outputs: [{ dims: outputShape, dataType: outputDataType }],\n dispatchGroup: { x: Math.ceil(\n outputSize / 64 / 4\n /* component size */\n ) },\n programUniforms: [\n { type: 12 /* uint32 */, data: Math.ceil(ShapeUtil.size(outputShape) / 4) },\n ...createTensorShapeVariables(a.dims, b.dims, outputShape)\n ]\n })\n };\n };\n runBinaryOp = (context, name, funcCall, additionalImplementation, cacheKey, outputDataType) => {\n context.compute(createBinaryOpProgramInfo(\n name,\n cacheKey ?? "",\n context.inputs[0],\n context.inputs[1],\n funcCall,\n additionalImplementation,\n outputDataType\n ));\n };\n add = (context) => {\n runBinaryOp(context, "Add", (a, b) => `${a}+${b}`);\n };\n div = (context) => {\n runBinaryOp(context, "Div", (a, b) => `${a}/${b}`);\n };\n equal = (context) => {\n runBinaryOp(\n context,\n "Equal",\n { scalar: (a, b) => `u32(${a}==${b})`, vector: (a, b) => `vec4(${a}==${b})` },\n void 0,\n void 0,\n 9 /* bool */\n );\n };\n mul = (context) => {\n runBinaryOp(context, "Mul", (a, b) => `${a}*${b}`);\n };\n pow = (context) => {\n const type = inputVariable("input", context.inputs[0].dataType, context.inputs[0].dims).type.value;\n const roundStr = type === "i32" ? "round" : "";\n runBinaryOp(\n context,\n "Pow",\n { scalar: (a, b) => `pow_custom(${a},${b})`, vector: (a, b) => `pow_vector_custom(${a},${b})` },\n `\n fn pow_custom(a : ${type}, b : ${type}) -> ${type} {\n if (b == ${type}(0.0)) {\n return ${type}(1.0);\n } else if (a < ${type}(0.0) && f32(b) != floor(f32(b))) {\n return ${type}(pow(f32(a), f32(b))); // NaN\n }\n return select(sign(a), ${type}(1.0), round(f32(abs(b) % ${type}(2.0))) != 1.0) * ${type}(${roundStr}(pow(f32(abs(a)), f32(b))));\n }\n fn pow_vector_custom(a : vec4<${type}>, b : vec4<${type}>) -> vec4<${type}> {\n // TODO: implement vectorized pow\n return vec4<${type}>(pow_custom(a.x, b.x), pow_custom(a.y, b.y), pow_custom(a.z, b.z), pow_custom(a.w, b.w));\n }\n `\n );\n };\n sub = (context) => {\n runBinaryOp(context, "Sub", (a, b) => `${a}-${b}`);\n };\n greater = (context) => {\n runBinaryOp(\n context,\n "Greater",\n { scalar: (a, b) => `u32(${a}>${b})`, vector: (a, b) => `vec4(${a}>${b})` },\n void 0,\n void 0,\n 9 /* bool */\n );\n };\n less = (context) => {\n runBinaryOp(\n context,\n "Less",\n { scalar: (a, b) => `u32(${a}<${b})`, vector: (a, b) => `vec4(${a}<${b})` },\n void 0,\n void 0,\n 9 /* bool */\n );\n };\n greaterOrEqual = (context) => {\n runBinaryOp(\n context,\n "GreaterOrEqual",\n { scalar: (a, b) => `u32(${a}>=${b})`, vector: (a, b) => `vec4(${a}>=${b})` },\n void 0,\n void 0,\n 9 /* bool */\n );\n };\n lessOrEqual = (context) => {\n runBinaryOp(\n context,\n "LessOrEqual",\n { scalar: (a, b) => `u32(${a}<=${b})`, vector: (a, b) => `vec4(${a}<=${b})` },\n void 0,\n void 0,\n 9 /* bool */\n );\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/fuse-utils.ts\n var getActivationSnippet, appendActivationUniformsData, appendActivationUniforms, parseInternalActivationAttributes;\n var init_fuse_utils = __esm({\n "web/lib/wasm/jsep/webgpu/ops/fuse-utils.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n getActivationSnippet = (attributes, valueType, baseType = "f32") => {\n switch (attributes.activation) {\n case "Relu":\n return `value = max(value, ${valueType}(0.0));`;\n case "Sigmoid":\n return `value = (${valueType}(1.0) / (${valueType}(1.0) + exp(-value)));`;\n case "Clip":\n return `value = clamp(value, ${valueType}(${baseType}(uniforms.clip_min)), ${valueType}(${baseType}(uniforms.clip_max)));`;\n case "HardSigmoid":\n return `value = max(${valueType}(0.0), min(${valueType}(1.0), ${baseType}(uniforms.alpha) * value + ${baseType}(uniforms.beta)));`;\n case "LeakyRelu":\n return `value = select(${baseType}(uniforms.alpha) * value, value, value >= ${valueType}(0.0));`;\n case "":\n return "";\n default:\n throw new Error(`Unsupported activation ${attributes.activation}`);\n }\n };\n appendActivationUniformsData = (attributes, programUniform) => {\n if (attributes.activation === "Clip") {\n programUniform.push(\n { type: 1 /* float */, data: attributes.clipMax },\n { type: 1 /* float */, data: attributes.clipMin }\n );\n } else if (attributes.activation === "HardSigmoid") {\n programUniform.push(\n { type: 1 /* float */, data: attributes.alpha },\n { type: 1 /* float */, data: attributes.beta }\n );\n } else if (attributes.activation === "LeakyRelu") {\n programUniform.push({ type: 1 /* float */, data: attributes.alpha });\n }\n };\n appendActivationUniforms = (attributes, uniforms) => {\n if (attributes.activation === "Clip") {\n uniforms.push({ name: "clip_max", type: "f32" }, { name: "clip_min", type: "f32" });\n } else if (attributes.activation === "HardSigmoid") {\n uniforms.push({ name: "alpha", type: "f32" }, { name: "beta", type: "f32" });\n } else if (attributes.activation === "LeakyRelu") {\n uniforms.push({ name: "alpha", type: "f32" });\n }\n };\n parseInternalActivationAttributes = (attributes) => {\n const activation = attributes?.activation || "";\n if (activation === "HardSigmoid") {\n const [alpha, beta] = attributes?.activation_params || [0.2, 0.5];\n return { activation, alpha, beta };\n } else if (activation === "Clip") {\n const [clipMin, clipMax] = attributes?.activation_params || [MIN_CLIP, MAX_CLIP];\n return { activation, clipMax, clipMin };\n } else if (activation === "LeakyRelu") {\n const [alpha] = attributes?.activation_params || [0.01];\n return { activation, alpha };\n }\n return { activation };\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/3rd-party/activation_util.ts\n var typeSnippet, biasSnippet;\n var init_activation_util = __esm({\n "web/lib/wasm/jsep/webgpu/ops/3rd-party/activation_util.ts"() {\n "use strict";\n typeSnippet = (component, dataType) => {\n switch (component) {\n case 1:\n return dataType;\n case 2:\n return `vec2<${dataType}>`;\n case 3:\n return `vec3<${dataType}>`;\n case 4:\n return `vec4<${dataType}>`;\n default:\n throw new Error(`${component}-component is not supported.`);\n }\n };\n biasSnippet = (hasBias) => `\n ${hasBias ? "value = value + getBiasByOutputCoords(coords);" : ""}\n `;\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/3rd-party/conv_util.ts\n var utilFunctions;\n var init_conv_util = __esm({\n "web/lib/wasm/jsep/webgpu/ops/3rd-party/conv_util.ts"() {\n "use strict";\n utilFunctions = (strideStr) => `\nfn getIndexFromCoords4D(coords : vec4, shape : vec4) -> i32 {\n return dot(coords, vec4(\n shape.y * shape.z * shape.w, shape.z * shape.w, shape.w, 1));\n}\nfn getOutputIndexFromCoords(coords : vec4) -> i32 {\n return dot(coords, vec4(\n i32(${strideStr}.x), i32(${strideStr}.y), i32(${strideStr}.z), 1));\n}\n`;\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/3rd-party/matmul_packed_webgpu.ts\n var writeDataToSubAVec4Snippet, calculateResultSnippet, makeMatMulPackedVec4Source, writeDataToSubASnippet, readDataFromSubASnippet, makeMatMulPackedSource, matMulReadWriteFnSource, createMatmulProgramInfo;\n var init_matmul_packed_webgpu = __esm({\n "web/lib/wasm/jsep/webgpu/ops/3rd-party/matmul_packed_webgpu.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_common();\n init_fuse_utils();\n init_activation_util();\n writeDataToSubAVec4Snippet = (transpose2, batchDims) => {\n if (transpose2) {\n return `\n mm_Asub[inputRow][inputCol] = mm_readA(batch,\n kStart + inputRow,\n globalRowStart / innerElementSize + inputCol${batchDims ? ", batchIndices" : ""});\n `;\n } else {\n return `\n mm_Asub[inputRow][inputCol] = mm_readA(batch,\n globalRow + innerRow,\n kStart / innerElementSize + inputCol${batchDims ? ", batchIndices" : ""});\n `;\n }\n };\n calculateResultSnippet = (transposeA, innerElementSize) => {\n if (transposeA) {\n return `\n let ACached0 = mm_Asub[k * innerElementSize][localRow];\n let ACached1 = mm_Asub[k * innerElementSize + 1][localRow];\n let ACached2 = mm_Asub[k * innerElementSize + 2][localRow];\n ${innerElementSize === 3 ? "" : "let ACached3 = mm_Asub[k * innerElementSize + 3][localRow];"}\n for (var i = 0; i < rowPerThread; i = i + 1) {\n acc[i] = BCached0 * ACached0[i] + acc[i];\n acc[i] = BCached1 * ACached1[i] + acc[i];\n acc[i] = BCached2 * ACached2[i] + acc[i];\n ${innerElementSize === 3 ? "" : "acc[i] = BCached3 * ACached3[i] + acc[i];"}\n }`;\n } else {\n return `\n for (var i = 0; i < rowPerThread; i = i + 1) {\n let ACached = mm_Asub[tileRow + i][k];\n acc[i] = BCached0 * ACached.x + acc[i];\n acc[i] = BCached1 * ACached.y + acc[i];\n acc[i] = BCached2 * ACached.z + acc[i];\n ${innerElementSize === 3 ? "" : "acc[i] = BCached3 * ACached.w + acc[i];"}\n }`;\n }\n };\n makeMatMulPackedVec4Source = (workPerThread, workgroupSize, type = "f32", batchDims, transposeA = false, tileInner = 32, splitK = false, splitedDimInner = 32) => {\n const tileAOuter = workgroupSize[1] * workPerThread[1];\n const tileBOuter = workgroupSize[0] * workPerThread[0];\n const tileAWidth = transposeA ? tileAOuter : tileInner;\n const tileAHight = transposeA ? tileInner : tileAOuter;\n const innerElementSize = tileAWidth / workgroupSize[0];\n const rowPerThreadB = tileInner / workgroupSize[1];\n if (!((transposeA && innerElementSize === 4 && workPerThread[1] === 4 || !transposeA && (innerElementSize === 3 || innerElementSize === 4)) && tileAWidth % workgroupSize[0] === 0 && tileInner % workgroupSize[1] === 0 && workPerThread[0] === 4)) {\n throw new Error(`If transposeA ${transposeA} is true, innerElementSize ${innerElementSize} and workPerThread[1] ${workPerThread[1]} must be 4.\n Otherwise, innerElementSize ${innerElementSize} must be 3 or 4.\n tileAWidth ${tileAWidth} must be divisible by workgroupSize[0]${workgroupSize[0]}. tileInner ${tileInner} must be divisible by workgroupSize[1] ${workgroupSize[1]}. colPerThread ${workPerThread[0]} must be 4.`);\n }\n return `\nvar mm_Asub: array, ${tileAWidth / innerElementSize}>, ${tileAHight}>;\nvar mm_Bsub: array, ${tileBOuter / workPerThread[0]}>, ${tileInner}>;\n\nconst rowPerThread = ${workPerThread[1]};\nconst colPerThread = ${workPerThread[0]};\nconst innerElementSize = ${innerElementSize};\nconst tileInner = ${tileInner};\n\n@compute @workgroup_size(${workgroupSize[0]}, ${workgroupSize[1]}, ${workgroupSize[2]})\nfn main(@builtin(local_invocation_id) localId : vec3,\n @builtin(global_invocation_id) globalId : vec3,\n @builtin(workgroup_id) workgroupId : vec3) {\n let localRow = i32(localId.y);\n let tileRow = localRow * rowPerThread;\n let tileCol = i32(localId.x);\n\n let globalRow =i32(globalId.y) * rowPerThread;\n let globalCol = i32(globalId.x);\n let batch = ${splitK ? "0" : "i32(globalId.z)"};\n ${batchDims ? `let batchIndices = ${batchDims.offsetToIndices("u32(batch)")};` : ""}\n let globalRowStart = i32(workgroupId.y) * ${tileAOuter};\n\n let num_tiles = ${splitK ? `${Math.ceil(splitedDimInner / tileInner)}` : "(uniforms.dim_inner - 1) / tileInner + 1"};\n var kStart = ${splitK ? `i32(globalId.z) * ${splitedDimInner}` : "0"};\n\n var acc: array, rowPerThread>;\n\n // Loop over shared dimension.\n let tileRowB = localRow * ${rowPerThreadB};\n for (var t = 0; t < num_tiles; t = t + 1) {\n // Load one tile of A into local memory.\n for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) {\n let inputRow = tileRow + innerRow;\n let inputCol = tileCol;\n ${writeDataToSubAVec4Snippet(transposeA, batchDims)}\n }\n\n // Load one tile of B into local memory.\n for (var innerRow = 0; innerRow < ${rowPerThreadB}; innerRow = innerRow + 1) {\n let inputRow = tileRowB + innerRow;\n let inputCol = tileCol;\n mm_Bsub[inputRow][inputCol] = mm_readB(batch, kStart + inputRow, globalCol${batchDims ? ", batchIndices" : ""});\n }\n kStart = kStart + tileInner;\n workgroupBarrier();\n\n // Compute acc values for a single thread.\n for (var k = 0; k < tileInner / innerElementSize; k = k + 1) {\n let BCached0 = mm_Bsub[k * innerElementSize][tileCol];\n let BCached1 = mm_Bsub[k * innerElementSize + 1][tileCol];\n let BCached2 = mm_Bsub[k * innerElementSize + 2][tileCol];\n ${innerElementSize === 3 ? "" : "let BCached3 = mm_Bsub[k * innerElementSize + 3][tileCol];"}\n\n ${calculateResultSnippet(transposeA, innerElementSize)}\n }\n\n workgroupBarrier();\n }\n\n for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) {\n mm_write(batch, globalRow + innerRow, globalCol, acc[innerRow]);\n }\n}`;\n };\n writeDataToSubASnippet = (transpose2, batchDims) => {\n if (transpose2) {\n return `\n mm_Asub[inputRow][inputCol] = mm_readA(batch,\n kStart + inputRow,\n globalRowStart + inputCol${batchDims ? ", batchIndices" : ""});\n `;\n } else {\n return `\n mm_Asub[inputRow][inputCol] = mm_readA(batch,\n globalRowStart + inputRow,\n kStart + inputCol${batchDims ? ", batchIndices" : ""});\n `;\n }\n };\n readDataFromSubASnippet = (transposeA) => transposeA ? "let ACached = mm_Asub[k][tileRow + innerRow];" : "let ACached = mm_Asub[tileRow + innerRow][k];";\n makeMatMulPackedSource = (workPerThread, workgroupSize, type = "f32", batchDims, transposeA = false, tileInner = 32, splitK = false, splitedDimInner = 32, sequentialAccessByThreads = false) => {\n const tileAOuter = workPerThread[1] * workgroupSize[1];\n const tileBOuter = workPerThread[0] * workgroupSize[0];\n const tileAWidth = transposeA ? tileAOuter : tileInner;\n const tileAHight = transposeA ? tileInner : tileAOuter;\n if (!(tileAHight % workgroupSize[1] === 0 && tileAWidth % workgroupSize[0] === 0 && tileInner % workgroupSize[1] === 0)) {\n throw new Error(`tileAHight ${tileAHight} must be divisible by workgroupSize[1]${workgroupSize[1]}, tileAWidth ${tileAWidth} must be divisible by workgroupSize[0]${workgroupSize[0]}, tileInner ${tileInner} must be divisible by workgroupSize[1]${workgroupSize[1]}`);\n }\n const rowPerThreadA = tileAHight / workgroupSize[1];\n const colPerThreadA = tileAWidth / workgroupSize[0];\n const rowPerThreadB = tileInner / workgroupSize[1];\n const matmulSnippet = sequentialAccessByThreads ? `\n let localRow = i32(localId.y);\n let localCol = i32(localId.x);\n let globalRowStart = i32(workgroupId.y) * ${tileAOuter};\n let globalColStart = i32(workgroupId.x) * ${tileBOuter};\n\n // Loop over shared dimension.\n for (var t = 0; t < num_tiles; t = t + 1) {\n // Load one tile of A into local memory.\n for (var inputRow = localRow; inputRow < ${tileAHight}; inputRow = inputRow + ${workgroupSize[1]}) {\n for (var inputCol = localCol; inputCol < ${tileAWidth}; inputCol = inputCol + ${workgroupSize[0]}) {\n ${writeDataToSubASnippet(transposeA, batchDims)}\n }\n }\n // Load one tile of B into local memory.\n for (var inputRow = localRow; inputRow < ${tileInner}; inputRow = inputRow + ${workgroupSize[1]}) {\n for (var inputCol = localCol; inputCol < ${tileBOuter}; inputCol = inputCol + ${workgroupSize[0]}) {\n mm_Bsub[inputRow][inputCol] = mm_readB(batch,\n kStart + inputRow,\n globalColStart + inputCol${batchDims ? ", batchIndices" : ""});\n }\n }\n kStart = kStart + tileInner;\n workgroupBarrier();\n\n // Compute acc values for a single thread.\n var BCached : array<${type}, colPerThread>;\n for (var k = 0; k < tileInner; k = k + 1) {\n for (var inner = 0; inner < colPerThread; inner = inner + 1) {\n BCached[inner] = mm_Bsub[k][localCol + inner * ${workgroupSize[0]}];\n }\n for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) {\n let ACached = ${transposeA ? `mm_Asub[k][localRow + innerRow * ${workgroupSize[1]}];` : `mm_Asub[localRow + innerRow * ${workgroupSize[1]}][k];`}\n for (var innerCol = 0; innerCol < colPerThread; innerCol = innerCol + 1) {\n acc[innerRow][innerCol] = acc[innerRow][innerCol] +\n ACached * BCached[innerCol];\n }\n }\n }\n workgroupBarrier();\n }\n for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) {\n let gRow = globalRowStart + localRow + innerRow * ${workgroupSize[1]};\n for (var innerCol = 0; innerCol < colPerThread; innerCol = innerCol + 1) {\n let gCol = globalColStart + localCol + innerCol * ${workgroupSize[0]};\n mm_write(batch, gRow, gCol, acc[innerRow][innerCol]);\n }\n }\n ` : `\nlet tileRow = i32(localId.y) * rowPerThread;\nlet tileCol = i32(localId.x) * colPerThread;\n\nlet globalRow = i32(globalId.y) * rowPerThread;\nlet globalCol = i32(globalId.x) * colPerThread;\nlet globalRowStart = i32(workgroupId.y) * ${tileAOuter};\n\nlet tileRowA = i32(localId.y) * ${rowPerThreadA};\nlet tileColA = i32(localId.x) * ${colPerThreadA};\nlet tileRowB = i32(localId.y) * ${rowPerThreadB};\n// Loop over shared dimension.\nfor (var t = 0; t < num_tiles; t = t + 1) {\n // Load one tile of A into local memory.\n for (var innerRow = 0; innerRow < ${rowPerThreadA}; innerRow = innerRow + 1) {\n for (var innerCol = 0; innerCol < ${colPerThreadA}; innerCol = innerCol + 1) {\n let inputRow = tileRowA + innerRow;\n let inputCol = tileColA + innerCol;\n ${writeDataToSubASnippet(transposeA, batchDims)}\n }\n }\n\n // Load one tile of B into local memory.\n for (var innerRow = 0; innerRow < ${rowPerThreadB}; innerRow = innerRow + 1) {\n for (var innerCol = 0; innerCol < colPerThread; innerCol = innerCol + 1) {\n let inputRow = tileRowB + innerRow;\n let inputCol = tileCol + innerCol;\n mm_Bsub[inputRow][inputCol] = mm_readB(batch,\n kStart + inputRow,\n globalCol + innerCol${batchDims ? ", batchIndices" : ""});\n }\n }\n kStart = kStart + tileInner;\n workgroupBarrier();\n\n // Compute acc values for a single thread.\n var BCached : array<${type}, colPerThread>;\n for (var k = 0; k < tileInner; k = k + 1) {\n for (var inner = 0; inner < colPerThread; inner = inner + 1) {\n BCached[inner] = mm_Bsub[k][tileCol + inner];\n }\n\n for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) {\n ${readDataFromSubASnippet(transposeA)}\n for (var innerCol = 0; innerCol < colPerThread; innerCol = innerCol + 1) {\n acc[innerRow][innerCol] = acc[innerRow][innerCol] + ACached * BCached[innerCol];\n }\n }\n }\n\n workgroupBarrier();\n}\n\nfor (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) {\n for (var innerCol = 0; innerCol < colPerThread; innerCol = innerCol + 1) {\n mm_write(batch, globalRow + innerRow, globalCol + innerCol,\n acc[innerRow][innerCol]);\n }\n}\n`;\n return `\n var mm_Asub : array, ${tileAHight}>;\n var mm_Bsub : array, ${tileInner}>;\n const rowPerThread = ${workPerThread[1]};\n const colPerThread = ${workPerThread[0]};\n const tileInner = ${tileInner};\n\n@compute @workgroup_size(${workgroupSize[0]}, ${workgroupSize[1]}, ${workgroupSize[2]})\nfn main(@builtin(local_invocation_id) localId : vec3,\n @builtin(global_invocation_id) globalId : vec3,\n @builtin(workgroup_id) workgroupId : vec3) {\n let batch = ${splitK ? "0" : "i32(globalId.z)"};\n ${batchDims ? `let batchIndices = ${batchDims.offsetToIndices("u32(batch)")};` : ""}\n let num_tiles = ${splitK ? `${Math.ceil(splitedDimInner / tileInner)}` : "(uniforms.dim_inner - 1) / tileInner + 1"};\n var kStart = ${splitK ? `i32(globalId.z) * ${splitedDimInner}` : "0"};\n\n var acc : array, rowPerThread>;\n\n // Without this initialization strange values show up in acc.\n for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) {\n for (var innerCol = 0; innerCol < colPerThread; innerCol = innerCol + 1) {\n acc[innerRow][innerCol] = 0.0;\n }\n }\n ${matmulSnippet}\n }\n`;\n };\n matMulReadWriteFnSource = (component, hasBias, applyActivation, variables, batchShapes, isChannelsLast = false) => {\n const [batchAShape, batchBShape, batchShape] = batchShapes;\n const [batchVariable, aVariable, bVariable, outputVariable2] = variables;\n const broadCastADims = getBroadcastDims(batchAShape, batchShape);\n const broadCastBDims = getBroadcastDims(batchBShape, batchShape);\n const dataType = tensorTypeToWsglStorageType(variables[0].type.tensor);\n const getAIndices = () => {\n const aRank = aVariable.rank;\n const batchRank = batchVariable.rank;\n let resStr = `var aIndices: ${aVariable.type.indices};`;\n for (let i = aRank - 2 - 1, j = batchRank - 1; i >= 0; i--, j--) {\n resStr += `\naIndices[${i}] = ${batchRank > 1 ? `batchIndices[${j}]` : "batchIndices"};`;\n }\n broadCastADims.forEach((i) => {\n resStr += `\naIndices[${i}] = 0;`;\n });\n resStr += `\naIndices[${aRank - 2}] = u32(row);\n aIndices[${aRank - 1}] = u32(colIn);`;\n return resStr;\n };\n const getBIndices = () => {\n const bRank = bVariable.rank;\n const batchRank = batchVariable.rank;\n let resStr = `var bIndices: ${bVariable.type.indices};`;\n for (let i = bRank - 2 - 1, j = batchRank - 1; i >= 0; i--, j--) {\n resStr += `\nbIndices[${i}] = ${batchRank > 1 ? `batchIndices[${j}]` : "batchIndices"};`;\n }\n broadCastBDims.forEach((i) => {\n resStr += `\nbIndices[${i}] = 0;`;\n });\n resStr += `\nbIndices[${bRank - 2}] = u32(row);\n bIndices[${bRank - 1}] = u32(colIn);`;\n return resStr;\n };\n const source = `\n fn mm_readA(batch: i32, row: i32, colIn: i32, batchIndices: ${batchVariable.type.indices}) -> ${typeSnippet(component, dataType)} {\n var value = ${typeSnippet(component, dataType)}(0.0);\n let col = colIn * ${component};\n if(row < uniforms.dim_a_outer && col < uniforms.dim_inner)\n {\n ${getAIndices()}\n value = ${aVariable.getByIndices("aIndices")};\n }\n return value;\n }\n\n fn mm_readB(batch: i32, row: i32, colIn: i32, batchIndices: ${batchVariable.type.indices}) -> ${typeSnippet(component, dataType)} {\n var value = ${typeSnippet(component, dataType)}(0.0);\n let col = colIn * ${component};\n if(row < uniforms.dim_inner && col < uniforms.dim_b_outer)\n {\n ${getBIndices()}\n value = ${bVariable.getByIndices("bIndices")};\n }\n return value;\n }\n\n fn mm_write(batch: i32, row: i32, colIn: i32, valueIn: ${typeSnippet(component, dataType)}) {\n let col = colIn * ${component};\n if (row < uniforms.dim_a_outer && col < uniforms.dim_b_outer) {\n var value = valueIn;\n let coords = vec3(batch, row, colIn);\n ${hasBias ? `value = value + ${isChannelsLast ? "bias[colIn]" : `${typeSnippet(component, dataType)}(bias[row])`};` : ""}\n ${applyActivation}\n ${outputVariable2.setByIndices("vec3(coords)", "value")}\n }\n }\n `;\n return source;\n };\n createMatmulProgramInfo = (inputs, activationAttributes, outputShape, reshapedOutputShape, isChannelsLast = false) => {\n const aShape = inputs[0].dims;\n const bShape = inputs[1].dims;\n const outerDimsA = aShape.slice(0, -2);\n const outerDimsB = bShape.slice(0, -2);\n const outerDims = reshapedOutputShape ? reshapedOutputShape.slice(0, -2) : outputShape.slice(0, -2);\n const batchSize = ShapeUtil.size(outerDims);\n const dimAOuter = aShape[aShape.length - 2];\n const dimInner = aShape[aShape.length - 1];\n const dimBOuter = bShape[bShape.length - 1];\n const isVec4 = dimInner % 4 === 0 && dimBOuter % 4 === 0;\n const elementsPerThread = dimAOuter <= 8 ? [4, 1, 1] : [4, 4, 1];\n const workgroupSize = [8, 8, 1];\n const dispatch = [\n Math.ceil(dimBOuter / workgroupSize[0] / elementsPerThread[0]),\n Math.ceil(dimAOuter / workgroupSize[1] / elementsPerThread[1]),\n Math.ceil(batchSize / workgroupSize[2] / elementsPerThread[2])\n ];\n const components = isVec4 ? 4 : 1;\n const aShapeTemp = [...outerDimsA, dimAOuter, dimInner / components];\n const aRank = aShapeTemp.length;\n const bShapeTemp = [...outerDimsB, dimInner, dimBOuter / components];\n const bRank = bShapeTemp.length;\n const outputShapeTemp = [batchSize, dimAOuter, dimBOuter / components];\n const programUniforms = [\n { type: 6 /* int32 */, data: dimAOuter },\n { type: 6 /* int32 */, data: dimBOuter },\n { type: 6 /* int32 */, data: dimInner }\n ];\n appendActivationUniformsData(activationAttributes, programUniforms);\n programUniforms.push(...createTensorShapeVariables(outerDims, aShapeTemp, bShapeTemp));\n const inputDependencies = ["rank", "rank"];\n const hasBias = inputs.length > 2;\n if (hasBias) {\n programUniforms.push(...createTensorShapeVariables(inputs[2].dims));\n inputDependencies.push("rank");\n }\n programUniforms.push(...createTensorShapeVariables(outputShapeTemp));\n const getShaderSource = (shaderHelper) => {\n const batchRank = outerDims.length;\n const batchDims = internalVariable("batchDims", inputs[0].dataType, batchRank, 1);\n const dataType = tensorTypeToWsglStorageType(inputs[0].dataType);\n const A = inputVariable("a", inputs[0].dataType, aRank, components);\n const B = inputVariable("b", inputs[1].dataType, bRank, components);\n const output = outputVariable("result", inputs[0].dataType, outputShapeTemp.length, components);\n const inputVariables = [A, B];\n if (hasBias) {\n const biasComponents = isChannelsLast ? components : 1;\n inputVariables.push(inputVariable("bias", inputs[2].dataType, inputs[2].dims.length, biasComponents));\n }\n const uniforms = [{ name: "dim_a_outer", type: "i32" }, { name: "dim_b_outer", type: "i32" }, { name: "dim_inner", type: "i32" }];\n appendActivationUniforms(activationAttributes, uniforms);\n const baseType = tensorTypeToWsglStorageType(output.type.tensor);\n const applyActivation = getActivationSnippet(activationAttributes, output.type.value, baseType);\n const declareFunctions = matMulReadWriteFnSource(\n components,\n hasBias,\n applyActivation,\n [batchDims, A, B, output],\n [outerDimsA, outerDimsB, outerDims],\n isChannelsLast\n );\n return `\n ${shaderHelper.registerUniforms(uniforms).registerInternalVariables(batchDims).declareVariables(\n ...inputVariables,\n output\n )}\n ${declareFunctions}\n ${isVec4 ? makeMatMulPackedVec4Source(elementsPerThread, workgroupSize, dataType, batchDims) : makeMatMulPackedSource(elementsPerThread, workgroupSize, dataType, batchDims)}\n `;\n };\n return {\n name: "MatMul",\n shaderCache: {\n hint: `${elementsPerThread};${activationAttributes.activation};${isVec4};${isChannelsLast}`,\n inputDependencies\n },\n getRunData: () => ({\n outputs: [{ dims: outputShape, dataType: inputs[0].dataType }],\n dispatchGroup: { x: dispatch[0], y: dispatch[1], z: dispatch[2] },\n programUniforms\n }),\n getShaderSource\n };\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/3rd-party/conv2d_mm_webgpu.ts\n var conv2dCommonSnippet, createConv2DMatMulProgramInfo;\n var init_conv2d_mm_webgpu = __esm({\n "web/lib/wasm/jsep/webgpu/ops/3rd-party/conv2d_mm_webgpu.ts"() {\n "use strict";\n init_wasm_common();\n init_log();\n init_common();\n init_fuse_utils();\n init_activation_util();\n init_conv_util();\n init_matmul_packed_webgpu();\n conv2dCommonSnippet = (isChannelsLast, fitAOuter, fitBOuter, fitInner, addBias = false, attributes, innerElementSizeX = 4, innerElementSizeW = 4, innerElementSize = 4, dataType = "f32") => {\n const getXSnippet = (innerElementSize2) => {\n switch (innerElementSize2) {\n case 1:\n return "resData = x[xIndex];";\n case 3:\n return `resData = vec3<${dataType}>(x[xIndex], x[xIndex + 1], x[xIndex + 2]);`;\n case 4:\n return "resData = x[xIndex / 4];";\n default:\n throw new Error(`innerElementSize ${innerElementSize2} is not supported.`);\n }\n };\n const getWSnippet = (innerElementSize2) => {\n switch (innerElementSize2) {\n case 1:\n return "return w[row * i32(uniforms.w_shape[3]) + colIn];";\n case 4:\n return "return w[row * i32(uniforms.w_shape[3]) / 4 + colIn];";\n default:\n throw new Error(`innerElementSize ${innerElementSize2} is not supported.`);\n }\n };\n const coordASnippet = isChannelsLast ? `\n let coord = vec4(batch, xRow, xCol, xCh);\n ` : `\n let coord = vec4(batch, xCh, xRow, xCol);\n `;\n const coordResSnippet = isChannelsLast ? `\n let coords = vec4(\n batch,\n row / outWidth,\n row % outWidth,\n col);\n ` : `\n let coords = vec4(\n batch,\n row,\n col / outWidth,\n col % outWidth);\n `;\n const xHeight = isChannelsLast ? "i32(uniforms.x_shape[1])" : "i32(uniforms.x_shape[2])";\n const xWidth = isChannelsLast ? "i32(uniforms.x_shape[2])" : "i32(uniforms.x_shape[3])";\n const row = isChannelsLast ? "row" : "col";\n const col = isChannelsLast ? "col" : "row";\n const readXSnippet = `\n let inChannels = i32(uniforms.w_shape[2]);\n let outWidth = ${isChannelsLast ? "i32(uniforms.result_shape[2])" : "i32(uniforms.result_shape[3])"};\n let outRow = ${row} / outWidth;\n let outCol = ${row} % outWidth;\n\n let WRow = ${col} / (i32(uniforms.w_shape[1]) * inChannels);\n let WCol = ${col} / inChannels % i32(uniforms.w_shape[1]);\n let xRow = outRow * uniforms.stride[0] + uniforms.dilation[0] * WRow - uniforms.pad[0];\n let xCol = outCol * uniforms.stride[1] + uniforms.dilation[1] * WCol - uniforms.pad[1];\n let xCh = ${col} % inChannels;\n var resData = ${typeSnippet(innerElementSizeX, dataType)}(0.0);\n // The bounds checking is always needed since we use it to pad zero for\n // the \'same\' padding type.\n if (xRow >= 0 && xRow < ${xHeight} && xCol >= 0 && xCol < ${xWidth}) {\n ${coordASnippet}\n let xIndex = getIndexFromCoords4D(coord, vec4(uniforms.x_shape));\n ${getXSnippet(innerElementSizeX)}\n }\n return resData;`;\n const sampleX = isChannelsLast ? fitAOuter && fitInner ? `\n let col = colIn * ${innerElementSizeX};\n ${readXSnippet}` : `\n let col = colIn * ${innerElementSizeX};\n if (row < uniforms.dim_a_outer && col < uniforms.dim_inner) {\n ${readXSnippet}\n }\n return ${typeSnippet(innerElementSizeX, dataType)}(0.0);` : fitInner && fitBOuter ? `\n let col = colIn * ${innerElementSizeX};\n ${readXSnippet}` : `\n let col = colIn * ${innerElementSizeX};\n if (row < uniforms.dim_inner && col < uniforms.dim_b_outer) {\n ${readXSnippet}\n }\n return ${typeSnippet(innerElementSizeX, dataType)}(0.0);`;\n const sampleW = `${getWSnippet(innerElementSizeW)}`;\n const resType = typeSnippet(innerElementSize, dataType);\n const aType = isChannelsLast ? typeSnippet(innerElementSizeX, dataType) : typeSnippet(innerElementSizeW, dataType);\n const bType = isChannelsLast ? typeSnippet(innerElementSizeW, dataType) : typeSnippet(innerElementSizeX, dataType);\n const applyActivation = getActivationSnippet(attributes, resType, dataType);\n const userCode = `\n fn mm_readA(batch: i32, row : i32, colIn : i32) -> ${aType} {\n ${isChannelsLast ? sampleX : sampleW}\n }\n\n fn mm_readB(batch: i32, row : i32, colIn : i32) -> ${bType} {\n ${isChannelsLast ? sampleW : sampleX}\n }\n\n fn mm_write(batch: i32, row : i32, colIn : i32, valueIn : ${resType}) {\n let col = colIn * ${innerElementSize};\n if (row < uniforms.dim_a_outer && col < uniforms.dim_b_outer)\n {\n var value = valueIn;\n let outWidth = ${isChannelsLast ? "i32(uniforms.result_shape[2])" : "i32(uniforms.result_shape[3])"};\n ${coordResSnippet}\n ${biasSnippet(addBias)}\n ${applyActivation}\n setOutputAtCoords(coords[0], coords[1], coords[2], coords[3], value);\n }\n }`;\n return userCode;\n };\n createConv2DMatMulProgramInfo = (inputs, attributes, outputShape, dimAOuter, dimBOuter, dimInner, hasBias, sequentialAccessByThreads) => {\n const isChannelsLast = attributes.format === "NHWC";\n const inChannels = isChannelsLast ? inputs[0].dims[3] : inputs[0].dims[1];\n const batchSize = outputShape[0];\n const outWidth = isChannelsLast ? outputShape[2] : outputShape[3];\n const outHeight = isChannelsLast ? outputShape[1] : outputShape[2];\n const outChannels = isChannelsLast ? outputShape[3] : outputShape[1];\n const isVec4 = isChannelsLast && (inChannels % 4 === 0 || inChannels % 3 === 0) && outChannels % 4 === 0;\n const dispatchX = isChannelsLast ? outChannels : outWidth * outHeight;\n const dispatchY = isChannelsLast ? outWidth * outHeight : outChannels;\n const workGroupSize = [8, 8, 1];\n const elementsPerThread = dimAOuter <= 8 ? [4, 1, 1] : [4, 4, 1];\n const dispatch = [\n Math.ceil(dispatchX / workGroupSize[0] / elementsPerThread[0]),\n Math.ceil(dispatchY / workGroupSize[1] / elementsPerThread[1]),\n Math.ceil(batchSize / workGroupSize[2] / elementsPerThread[2])\n ];\n LOG_DEBUG("verbose", () => `[conv2d_mm_webgpu] dispatch = ${dispatch}`);\n const innerElementSize = isVec4 ? isChannelsLast && inChannels % 4 !== 0 ? 3 : 4 : 1;\n const tileAOuter = workGroupSize[1] * elementsPerThread[1];\n const tileBOuter = workGroupSize[0] * elementsPerThread[0];\n const tileInner = Math.max(workGroupSize[0] * innerElementSize, workGroupSize[1]);\n const fitAOuter = dimAOuter % tileAOuter === 0;\n const fitBOuter = dimBOuter % tileBOuter === 0;\n const fitInner = dimInner % tileInner === 0;\n const elementsSize = isVec4 ? [innerElementSize, 4, 4] : [1, 1, 1];\n const programUniforms = [\n { type: 6 /* int32 */, data: dimAOuter },\n { type: 6 /* int32 */, data: dimBOuter },\n { type: 6 /* int32 */, data: dimInner },\n { type: 6 /* int32 */, data: [attributes.pads[0], attributes.pads[1]] },\n { type: 6 /* int32 */, data: attributes.strides },\n { type: 6 /* int32 */, data: attributes.dilations }\n ];\n appendActivationUniformsData(attributes, programUniforms);\n programUniforms.push(...createTensorShapeVariables(inputs[0].dims, inputs[1].dims));\n const inputDependencies = ["rank", "rank"];\n if (hasBias) {\n programUniforms.push(...createTensorShapeVariables(inputs[2].dims));\n inputDependencies.push("rank");\n }\n programUniforms.push(...createTensorShapeVariables(outputShape));\n const getShaderSource = (shaderHelper) => {\n const uniforms = [\n { name: "dim_a_outer", type: "i32" },\n { name: "dim_b_outer", type: "i32" },\n { name: "dim_inner", type: "i32" },\n { name: "pad", type: "i32", length: 2 },\n { name: "stride", type: "i32", length: 2 },\n { name: "dilation", type: "i32", length: 2 }\n ];\n appendActivationUniforms(attributes, uniforms);\n const components = isVec4 ? 4 : 1;\n const t = tensorTypeToWsglStorageType(inputs[0].dataType);\n let declareFunctions = `\n fn setOutputAtIndex(flatIndex : i32, value : ${isVec4 ? `vec4<${t}>` : t}) {\n result[flatIndex] = ${isVec4 ? `vec4<${t}>` : t}(value);\n }\n fn setOutputAtCoords(d0 : i32, d1 : i32, d2 : i32, d3 : i32, value : ${isVec4 ? `vec4<${t}>` : t}) {\n let flatIndex = getOutputIndexFromCoords(vec4(d0, d1, d2, d3));\n setOutputAtIndex(flatIndex ${isVec4 ? "/ 4" : ""}, value);\n }`;\n const x = inputVariable(\n "x",\n inputs[0].dataType,\n inputs[0].dims.length,\n innerElementSize === 3 ? 1 : innerElementSize\n );\n const w = inputVariable("w", inputs[1].dataType, inputs[1].dims.length, components);\n const inputVariables = [x, w];\n const output = outputVariable("result", inputs[0].dataType, outputShape.length, components);\n if (hasBias) {\n const bias = inputVariable("bias", inputs[2].dataType, inputs[2].dims.length, components);\n inputVariables.push(bias);\n declareFunctions += `\n fn getBiasByOutputCoords(coords : vec4) -> ${isVec4 ? `vec4<${t}>` : t} {\n return bias[coords.${isChannelsLast ? "w" : "y"}${isVec4 ? "/ 4" : ""}];\n }`;\n }\n return `\n ${utilFunctions("uniforms.result_strides")}\n //struct Uniforms { xShape : vec4, wShape : vec4, outShape : vec4,\n // outShapeStrides: vec3, filterDims : vec2, pad : vec2, stride : vec2,\n // dilation : vec2, dimAOuter : i32, dimBOuter : i32, dimInner : i32 };\n ${shaderHelper.registerUniforms(uniforms).declareVariables(...inputVariables, output)}\n ${declareFunctions}\n ${conv2dCommonSnippet(\n isChannelsLast,\n fitAOuter,\n fitBOuter,\n fitInner,\n hasBias,\n attributes,\n elementsSize[0],\n elementsSize[1],\n elementsSize[2],\n t\n )}\n ${isVec4 ? makeMatMulPackedVec4Source(elementsPerThread, workGroupSize, t, void 0, !isChannelsLast, tileInner) : makeMatMulPackedSource(\n elementsPerThread,\n workGroupSize,\n t,\n void 0,\n !isChannelsLast,\n tileInner,\n false,\n void 0,\n sequentialAccessByThreads\n )}`;\n };\n return {\n name: "Conv2DMatMul",\n shaderCache: {\n hint: `${attributes.cacheKey};${innerElementSize};${isVec4};${fitAOuter};${fitBOuter};${fitInner};${tileAOuter};${tileBOuter};${tileInner}`,\n inputDependencies\n },\n getRunData: () => ({\n outputs: [{ dims: outputShape, dataType: inputs[0].dataType }],\n dispatchGroup: { x: dispatch[0], y: dispatch[1], z: dispatch[2] },\n programUniforms\n }),\n getShaderSource\n };\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/conv-grouped.ts\n var createGroupedConvProgramInfo, createGroupedConvVectorizeProgramInfo;\n var init_conv_grouped = __esm({\n "web/lib/wasm/jsep/webgpu/ops/conv-grouped.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_common();\n init_conv();\n init_fuse_utils();\n createGroupedConvProgramInfo = (inputs, attributes, squeezeOutputShapeFunction) => {\n const hasBias = inputs.length > 2;\n const processBias = hasBias ? "value += b[output_channel];" : "";\n const xShape = inputs[0].dims;\n const wShape = inputs[1].dims;\n const outputChannelsPerGroup = wShape[0] / attributes.group;\n const isChannelLast = attributes.format === "NHWC";\n const outputShape = calculateOutputShape(\n xShape,\n wShape,\n attributes.dilations,\n attributes.pads,\n attributes.strides,\n isChannelLast\n );\n const outputSize = ShapeUtil.size(outputShape);\n const programUniforms = [\n { type: 12 /* uint32 */, data: outputSize },\n { type: 12 /* uint32 */, data: attributes.dilations },\n { type: 12 /* uint32 */, data: [attributes.strides[0], attributes.strides[1]] },\n { type: 12 /* uint32 */, data: [attributes.pads[0], attributes.pads[1]] },\n { type: 12 /* uint32 */, data: outputChannelsPerGroup }\n ];\n appendActivationUniformsData(attributes, programUniforms);\n programUniforms.push(...createTensorShapeVariables(xShape, wShape));\n const inputDependencies = ["rank", "rank"];\n if (hasBias) {\n programUniforms.push(...createTensorShapeVariables(inputs[2].dims));\n inputDependencies.push("rank");\n }\n programUniforms.push(...createTensorShapeVariables(outputShape));\n const getShaderSource = (shaderHelper) => {\n const output = outputVariable("output", inputs[0].dataType, outputShape.length);\n const baseType = tensorTypeToWsglStorageType(output.type.tensor);\n const applyActivation = getActivationSnippet(attributes, output.type.value, baseType);\n const x = inputVariable("x", inputs[0].dataType, xShape.length);\n const w = inputVariable("w", inputs[1].dataType, wShape.length);\n const inputVars = [x, w];\n if (hasBias) {\n inputVars.push(inputVariable("b", inputs[2].dataType, inputs[2].dims.length));\n }\n const uniforms = [\n { name: "output_size", type: "u32" },\n { name: "dilations", type: "u32", length: attributes.dilations.length },\n { name: "strides", type: "u32", length: 2 },\n { name: "pads", type: "u32", length: 2 },\n { name: "output_channels_per_group", type: "u32" }\n ];\n appendActivationUniforms(attributes, uniforms);\n return `\n ${shaderHelper.registerUniforms(uniforms).declareVariables(...inputVars, output)}\n\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}\n\n let outputIndices = ${output.offsetToIndices("global_idx")};\n let batch: u32 = outputIndices[0];\n let output_channel: u32 = outputIndices[${isChannelLast ? 3 : 1}];\n let xRCCorner: vec2 = vec2(outputIndices[${isChannelLast ? 1 : 2}], outputIndices[${isChannelLast ? 2 : 3}]) * uniforms.strides - uniforms.pads;\n let group_id: u32 = output_channel / uniforms.output_channels_per_group;\n\n var value: ${output.type.value} = ${output.type.value}(0);\n for (var wInChannel: u32 = 0u; wInChannel < uniforms.w_shape[1]; wInChannel++) {\n let input_channel = group_id * uniforms.w_shape[1] + wInChannel;\n for (var wHeight: u32 = 0u; wHeight < uniforms.w_shape[2]; wHeight++) {\n let xHeight = xRCCorner.x + wHeight * uniforms.dilations[0];\n\n if (xHeight < 0u || xHeight >= uniforms.x_shape[${isChannelLast ? 1 : 2}]) {\n continue;\n }\n\n for (var wWidth: u32 = 0u; wWidth < uniforms.w_shape[3]; wWidth++) {\n let xWidth = xRCCorner.y + wWidth * uniforms.dilations[1];\n if (xWidth < 0u || xWidth >= uniforms.x_shape[${isChannelLast ? 2 : 3}]) {\n continue;\n }\n\n let xVal = ${isChannelLast ? x.get("batch", "xHeight", "xWidth", "input_channel") : x.get("batch", "input_channel", "xHeight", "xWidth")};\n let wVal = ${w.get("output_channel", "wInChannel", "wHeight", "wWidth")};\n value += xVal*wVal;\n }\n }\n }\n ${processBias}\n ${applyActivation}\n ${output.setByOffset("global_idx", "value")}\n }`;\n };\n return {\n name: "GroupedConv",\n shaderCache: { hint: attributes.cacheKey, inputDependencies },\n getRunData: () => ({\n outputs: [{\n dims: squeezeOutputShapeFunction ? squeezeOutputShapeFunction(outputShape) : outputShape,\n dataType: inputs[0].dataType\n }],\n dispatchGroup: { x: Math.ceil(\n outputSize / 64\n /* workgroup size */\n ) },\n programUniforms\n }),\n getShaderSource\n };\n };\n createGroupedConvVectorizeProgramInfo = (inputs, attributes, outputShape) => {\n const hasBias = inputs.length > 2;\n const components = getMaxComponents(outputShape[3]);\n const outputNumber = getMaxComponents(outputShape[2]);\n const outputSize = ShapeUtil.size(outputShape) / components / outputNumber;\n const xShape = [inputs[0].dims[0], inputs[0].dims[1], inputs[0].dims[2], inputs[0].dims[3] / components];\n const wShape = [inputs[1].dims[0], inputs[1].dims[1], inputs[1].dims[2], inputs[1].dims[3] / components];\n const outputShapeInShader = [outputShape[0], outputShape[1], outputShape[2], outputShape[3] / components];\n const programUniforms = [\n { type: 12 /* uint32 */, data: outputSize },\n { type: 6 /* int32 */, data: [attributes.strides[0], attributes.strides[1]] },\n { type: 6 /* int32 */, data: [attributes.pads[0], attributes.pads[1]] }\n ];\n appendActivationUniformsData(attributes, programUniforms);\n programUniforms.push(...createTensorShapeVariables(xShape, wShape, outputShapeInShader));\n const xNumber = (outputNumber - 1) * attributes.strides[1] + wShape[1];\n const getShaderSource = (shaderHelper) => {\n const output = outputVariable("output", inputs[0].dataType, outputShapeInShader.length, components);\n const baseType = tensorTypeToWsglStorageType(output.type.tensor);\n const applyActivation = getActivationSnippet(attributes, output.type.value, baseType);\n const x = inputVariable("x", inputs[0].dataType, xShape.length, components);\n const w = inputVariable("w", inputs[1].dataType, wShape.length, components);\n const inputVars = [x, w];\n if (hasBias) {\n inputVars.push(inputVariable("b", inputs[2].dataType, inputs[2].dims, components));\n }\n const processBias = hasBias ? "value += b[output_channel];" : "";\n const uniforms = [\n { name: "output_size", type: "u32" },\n { name: "strides", type: "i32", length: 2 },\n { name: "pads", type: "i32", length: 2 }\n ];\n appendActivationUniforms(attributes, uniforms);\n return `\n ${shaderHelper.registerUniforms(uniforms).declareVariables(...inputVars, output)}\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}\n let width0 = uniforms.output_shape[3];\n let output_channel = global_idx % width0;\n var index1 = global_idx / width0;\n let width1 = uniforms.output_shape[2] / ${outputNumber}u;\n let col = (index1 % width1) * ${outputNumber}u;\n index1 = index1 / width1;\n let row = index1 % uniforms.output_shape[1];\n let batch = index1 / uniforms.output_shape[1];\n\n let x_corner = vec2(i32(row), i32(col)) * uniforms.strides - uniforms.pads;\n\n var x_vals: array<${x.type.value}, ${xNumber}>;\n var values: array<${output.type.value}, ${outputNumber}>;\n let input_channel = output_channel;\n // Use constant instead of uniform can give better performance for w\'s height/width.\n for (var w_height: u32 = 0u; w_height < ${wShape[0]}; w_height++) {\n let x_height = x_corner.x + i32(w_height);\n if (x_height >= 0 && u32(x_height) < uniforms.x_shape[1]) {\n for (var i = 0; i < ${xNumber}; i++) {\n let x_width = x_corner.y + i;\n if (x_width >= 0 && u32(x_width) < uniforms.x_shape[2]) {\n x_vals[i] = ${x.get("batch", "u32(x_height)", "u32(x_width)", "input_channel")};\n } else {\n x_vals[i] = ${x.type.value}(0);\n }\n }\n for (var w_width: u32 = 0u; w_width < ${wShape[1]}; w_width++) {\n let w_val = ${w.get("w_height", "w_width", "0", "output_channel")};\n for (var i = 0u; i < ${outputNumber}u; i++) {\n values[i] = fma(x_vals[i * u32(uniforms.strides[1]) + w_width], w_val, values[i]);\n }\n }\n }\n }\n\n for (var i = 0u; i < ${outputNumber}u; i++) {\n var value = values[i];\n ${processBias}\n ${applyActivation}\n ${output.set("batch", "row", "col + i", "output_channel", "value")};\n }\n }`;\n };\n return {\n name: "GroupedConv-Vectorize",\n shaderCache: {\n hint: `${attributes.cacheKey};${components};${outputNumber};${xNumber};${wShape[0]};${wShape[1]}`,\n inputDependencies: hasBias ? ["rank", "rank", "type"] : ["rank", "rank"]\n },\n getRunData: () => ({\n outputs: [{ dims: outputShape, dataType: inputs[0].dataType }],\n dispatchGroup: { x: Math.ceil(\n outputSize / 64\n /* workgroup size */\n ) },\n programUniforms\n }),\n getShaderSource\n };\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/matmul.ts\n var createNaiveMatmulProgramInfo, validateInputs8, matMul;\n var init_matmul = __esm({\n "web/lib/wasm/jsep/webgpu/ops/matmul.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_matmul_packed_webgpu();\n init_common();\n init_fuse_utils();\n createNaiveMatmulProgramInfo = (inputs, activationAttributes, outputShape, reshapedOutputShape, isChannelsLast = false) => {\n const aShape = inputs[0].dims;\n const bShape = inputs[1].dims;\n const M = aShape[aShape.length - 2];\n const N = bShape[bShape.length - 1];\n const K = aShape[aShape.length - 1];\n const components = getMaxComponents(N);\n const aComponents = getMaxComponents(K);\n const outputNumber = getMaxComponents(M);\n const outputSize = ShapeUtil.size(outputShape) / components / outputNumber;\n const hasBias = inputs.length > 2;\n const outerDims = reshapedOutputShape ? reshapedOutputShape.slice(0, -2) : outputShape.slice(0, -2);\n const batchSize = ShapeUtil.size(outerDims);\n const outputShapeInShader = [batchSize, M, N];\n const programUniforms = [\n { type: 12 /* uint32 */, data: outputSize },\n { type: 12 /* uint32 */, data: M },\n { type: 12 /* uint32 */, data: N },\n { type: 12 /* uint32 */, data: K }\n ];\n appendActivationUniformsData(activationAttributes, programUniforms);\n programUniforms.push(...createTensorShapeVariables(outerDims, aShape, bShape));\n if (hasBias) {\n programUniforms.push(...createTensorShapeVariables(inputs[2].dims));\n }\n programUniforms.push(...createTensorShapeVariables(outputShapeInShader));\n const getShaderSource = (shaderHelper) => {\n const batchDims = internalVariable("batch_dims", inputs[0].dataType, outerDims.length);\n const a = inputVariable("a", inputs[0].dataType, aShape.length, aComponents);\n const b = inputVariable("b", inputs[1].dataType, bShape.length, components);\n const output = outputVariable("output", inputs[0].dataType, outputShapeInShader.length, components);\n const baseType = tensorTypeToWsglStorageType(output.type.tensor);\n const applyActivation = getActivationSnippet(activationAttributes, output.type.value, baseType);\n const inputVariables = [a, b];\n let processBias = "";\n if (hasBias) {\n const biasComponents = isChannelsLast ? components : 1;\n inputVariables.push(inputVariable("bias", inputs[2].dataType, inputs[2].dims.length, biasComponents));\n processBias = `${isChannelsLast ? `value += bias[col / ${biasComponents}];` : `value += ${output.type.value}(bias[row + i]);`}`;\n }\n const outerDimsA = aShape.slice(0, -2);\n const outerDimsB = bShape.slice(0, -2);\n const broadCastADims = getBroadcastDims(outerDimsA, outerDims);\n const broadCastBDims = getBroadcastDims(outerDimsB, outerDims);\n const uniforms = [\n { name: "output_size", type: "u32" },\n { name: "M", type: "u32" },\n { name: "N", type: "u32" },\n { name: "K", type: "u32" }\n ];\n appendActivationUniforms(activationAttributes, uniforms);\n const getIndices = (variable, broadCastDims) => {\n const rank = variable.rank;\n const name = variable.name;\n if (rank === 2) {\n return `var ${name}_indices = ${variable.type.indices}(0u, 0u);`;\n }\n const batchRank = batchDims.rank;\n let resStr = `var ${name}_indices: ${variable.type.indices};`;\n for (let i = rank - 2 - 1, j = batchRank - 1; i >= 0; i--, j--) {\n resStr += `\n${name}_indices[${i}] = ${batchRank > 1 ? `batch_indices[${j}]` : "batch_indices"};`;\n }\n broadCastDims.forEach((i) => {\n resStr += `\n${name}_indices[${i}] = 0;`;\n });\n resStr += `${name}_indices[${rank - 2}] = 0u;\n ${name}_indices[${rank - 1}] = 0u;`;\n return resStr;\n };\n const calcResult = () => {\n let calcStr = `var a_data: ${a.type.value};`;\n for (let i = 0; i < aComponents; i++) {\n calcStr += `\n let b_data${i} = b[(b_offset + (k + ${i}) * uniforms.N + col) / ${components}];`;\n }\n for (let i = 0; i < outputNumber; i++) {\n calcStr += `a_data = a[(a_offset + (row + ${i}) * uniforms.K + k) / ${aComponents}];`;\n for (let j = 0; j < aComponents; j++) {\n calcStr += `\n values[${i}] = fma(${b.type.value}(a_data${aComponents === 1 ? "" : `[${j}]`}), b_data${j}, values[${i}]);\n`;\n }\n }\n return calcStr;\n };\n return `\n ${shaderHelper.registerUniforms(uniforms).registerInternalVariables(batchDims).declareVariables(\n ...inputVariables,\n output\n )}\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}\n let col = (global_idx % (uniforms.N / ${components})) * ${components};\n var index1 = global_idx / (uniforms.N / ${components});\n let stride1 = uniforms.M / ${outputNumber};\n let row = (index1 % stride1) * ${outputNumber};\n let batch = index1 / stride1;\n\n ${outputShape.length === 2 ? "" : `let batch_indices = ${batchDims.offsetToIndices("batch")};`}\n ${getIndices(a, broadCastADims)}\n let a_offset = ${a.indicesToOffset("a_indices")};\n ${getIndices(b, broadCastBDims)}\n let b_offset = ${b.indicesToOffset("b_indices")};\n var values: array<${output.type.value}, ${outputNumber}>;\n for (var k: u32 = 0u; k < uniforms.K; k = k + ${aComponents}) {\n ${calcResult()}\n }\n for (var i = 0u; i < ${outputNumber}u; i++) {\n var value = values[i];\n ${processBias}\n ${applyActivation}\n let cur_indices = ${output.type.indices}(batch, row + i, col);\n let offset = ${output.indicesToOffset("cur_indices")};\n ${output.setByOffset(`offset / ${components}`, "value")};\n }\n }\n `;\n };\n return {\n name: "MatMulNaive",\n shaderCache: {\n hint: `${activationAttributes.activation};${components};${aComponents};${outputNumber};${isChannelsLast}`,\n inputDependencies: hasBias ? ["rank", "rank", "rank"] : ["rank", "rank"]\n },\n getRunData: () => ({\n outputs: [{ dims: outputShape, dataType: inputs[0].dataType }],\n dispatchGroup: { x: Math.ceil(\n outputSize / 64\n /* workgroup size */\n ) },\n programUniforms\n }),\n getShaderSource\n };\n };\n validateInputs8 = (inputs) => {\n if (!inputs || inputs.length !== 2) {\n throw new Error("MatMul requires 2 inputs.");\n }\n if (inputs[0].dims[inputs[0].dims.length - 1] !== inputs[1].dims[inputs[1].dims.length - 2]) {\n throw new Error("shared dimension does not match.");\n }\n };\n matMul = (context) => {\n validateInputs8(context.inputs);\n const outputShape = BroadcastUtil.calcShape(context.inputs[0].dims, context.inputs[1].dims, true);\n if (!outputShape) {\n throw new Error("Can\'t use matmul on the given tensors");\n }\n const N = outputShape[outputShape.length - 1];\n const K = context.inputs[0].dims[context.inputs[0].dims.length - 1];\n if (N < 8 && K < 8) {\n context.compute(createNaiveMatmulProgramInfo(context.inputs, { activation: "" }, outputShape));\n } else {\n context.compute(createMatmulProgramInfo(context.inputs, { activation: "" }, outputShape));\n }\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/conv.ts\n var calculateOutputShape, weightTransposeAttribute, validateInputs9, getAdjustedConvAttributes, parseConvAttributes, conv2d, conv1d, conv;\n var init_conv = __esm({\n "web/lib/wasm/jsep/webgpu/ops/conv.ts"() {\n "use strict";\n init_util();\n init_conv2d_mm_webgpu();\n init_matmul_packed_webgpu();\n init_conv_grouped();\n init_fuse_utils();\n init_matmul();\n init_transpose();\n calculateOutputShape = (inputShape, kernelShape, dilations, adjustPads, strides, isChannelLast) => {\n const batchSize = inputShape[0];\n const inputSpatialShape = inputShape.slice(isChannelLast ? 1 : 2, isChannelLast ? 3 : 4);\n const spatialRank = inputSpatialShape.length;\n const outChannels = kernelShape[0];\n const kernelSpatialShape = kernelShape.slice(2);\n const dilatedKernelShape = kernelSpatialShape.map((v, i) => v + (v - 1) * (dilations[i] - 1));\n const inputSpatialShapeWithPad = inputSpatialShape.map((v, i) => v + adjustPads[i] + adjustPads[i + spatialRank]);\n const outputShape = inputSpatialShapeWithPad.map((v, i) => Math.floor((v - dilatedKernelShape[i] + strides[i]) / strides[i]));\n outputShape.splice(0, 0, batchSize);\n outputShape.splice(isChannelLast ? 3 : 1, 0, outChannels);\n return outputShape;\n };\n weightTransposeAttribute = [2, 3, 1, 0];\n validateInputs9 = (inputs, attributes) => {\n if (!inputs || inputs.length !== 2 && inputs.length !== 3) {\n throw new Error("Conv requires 2 or 3 inputs");\n }\n if (inputs[0].dims.length !== 4 && inputs[0].dims.length !== 3) {\n throw new Error("currently only support conv 1D and 2D");\n }\n if (inputs[0].dims.length !== inputs[1].dims.length) {\n throw new Error("filter does not have same dimension as input");\n }\n const dataChannel = inputs[0].dims[attributes.format === "NHWC" ? inputs[0].dims.length - 1 : 1];\n const filterInChannel = inputs[1].dims[1] * attributes.group;\n if (dataChannel !== filterInChannel) {\n throw new Error("FILTER_IN_CHANNEL should be equal to DATA_CHANNEL");\n }\n if (inputs.length === 3 && (inputs[2].dims.length !== 1 || inputs[1].dims[0] !== inputs[2].dims[0])) {\n throw new Error("invalid bias");\n }\n const spatialRank = inputs[0].dims.length - 2;\n if (attributes.dilations.length !== spatialRank) {\n throw new Error(`dilations should be ${spatialRank}D`);\n }\n if (attributes.strides.length !== spatialRank) {\n throw new Error(`strides should be ${spatialRank}D`);\n }\n if (attributes.pads.length !== spatialRank * 2) {\n throw new Error(`pads should be ${spatialRank * 2}D`);\n }\n if (attributes.kernelShape.length !== 0 && attributes.kernelShape.length !== inputs[1].dims.length - 2) {\n throw new Error("invalid kernel shape");\n }\n };\n getAdjustedConvAttributes = (attributes, inputs) => {\n const kernelShape = attributes.kernelShape.slice();\n for (let i = 2; i < inputs[1].dims.length; ++i) {\n if (kernelShape[i - 2] === 0) {\n kernelShape[i - 2] = inputs[1].dims[i];\n }\n }\n const pads = attributes.pads.slice();\n PoolConvUtil.adjustPadsBasedOnAutoPad(\n inputs[0].dims,\n attributes.strides,\n attributes.dilations,\n kernelShape,\n pads,\n attributes.format === "NHWC",\n attributes.autoPad\n );\n const newAttributes = Object.assign({}, attributes);\n Object.assign(newAttributes, { kernelShape, pads });\n return newAttributes;\n };\n parseConvAttributes = (attributes) => {\n const activationAttributes = parseInternalActivationAttributes(attributes);\n const format = attributes.format;\n const autoPad = ["NOTSET", "VALID", "SAME_UPPER", "SAME_LOWER"][attributes.auto_pad];\n const dilations = attributes.dilations;\n const group = attributes.group;\n const kernelShape = attributes.kernel_shape;\n const pads = attributes.pads;\n const strides = attributes.strides;\n const wIsConst = attributes.w_is_const();\n return {\n autoPad,\n format,\n dilations,\n group,\n kernelShape,\n pads,\n strides,\n wIsConst,\n ...activationAttributes,\n cacheKey: `${attributes.format};${activationAttributes.activation};`\n };\n };\n conv2d = (context, inputs, attributes) => {\n const adjustedAttributes = getAdjustedConvAttributes(attributes, inputs);\n const isChannelsLast = attributes.format === "NHWC";\n if (attributes.group !== 1) {\n const enableGroupedConvVectorize = !context.adapterInfo.isArchitecture("ampere");\n if (enableGroupedConvVectorize && isChannelsLast && inputs[1].dims[0] === attributes.group && inputs[1].dims[1] === 1 && attributes.dilations[0] === 1 && attributes.dilations[1] === 1) {\n const outputShape2 = calculateOutputShape(\n inputs[0].dims,\n inputs[1].dims,\n attributes.dilations,\n adjustedAttributes.pads,\n attributes.strides,\n isChannelsLast\n );\n const transposedWeight2 = context.kernelCustomData.wT ?? context.compute(\n createTransposeProgramInfo(inputs[1], weightTransposeAttribute),\n { inputs: [1], outputs: [attributes.wIsConst ? -2 : -1] }\n )[0];\n if (attributes.wIsConst && !context.kernelCustomData.wT) {\n context.kernelCustomData.wT = transposedWeight2;\n }\n const convInputs2 = [inputs[0], transposedWeight2];\n if (inputs.length === 3) {\n convInputs2.push(inputs[2]);\n }\n context.compute(\n createGroupedConvVectorizeProgramInfo(convInputs2, adjustedAttributes, outputShape2),\n { inputs: convInputs2 }\n );\n } else {\n context.compute(createGroupedConvProgramInfo(inputs, adjustedAttributes));\n }\n return;\n }\n const hasBias = inputs.length === 3;\n const inputHeight = inputs[0].dims[isChannelsLast ? 1 : 2];\n const inputWidth = inputs[0].dims[isChannelsLast ? 2 : 3];\n const inputChannels = inputs[0].dims[isChannelsLast ? 3 : 1];\n const weightHeight = inputs[1].dims[2];\n const weightWidth = inputs[1].dims[3];\n const outputShape = calculateOutputShape(\n inputs[0].dims,\n inputs[1].dims,\n attributes.dilations,\n adjustedAttributes.pads,\n attributes.strides,\n isChannelsLast\n );\n const outHeight = outputShape[isChannelsLast ? 1 : 2];\n const outWidth = outputShape[isChannelsLast ? 2 : 3];\n const outChannels = outputShape[isChannelsLast ? 3 : 1];\n const sameSize = isChannelsLast && weightHeight === inputHeight && weightWidth === inputWidth && attributes.pads[0] === 0 && attributes.pads[1] === 0;\n if (sameSize || weightHeight === 1 && weightWidth === 1 && attributes.dilations[0] === 1 && attributes.dilations[1] === 1 && attributes.strides[0] === 1 && attributes.strides[1] === 1 && attributes.pads[0] === 0 && attributes.pads[1] === 0) {\n const batch = outputShape[0];\n let xReshaped, wReshaped, matmulOutputShape;\n const matmulInputs = [];\n if (isChannelsLast) {\n const transposedWeight2 = context.kernelCustomData.wT ?? context.compute(\n createTransposeProgramInfo(inputs[1], weightTransposeAttribute),\n { inputs: [1], outputs: [attributes.wIsConst ? -2 : -1] }\n )[0];\n if (attributes.wIsConst && !context.kernelCustomData.wT) {\n context.kernelCustomData.wT = transposedWeight2;\n }\n if (sameSize) {\n const sharedDim = inputHeight * inputWidth * inputChannels;\n xReshaped = inputs[0].reshape([1, batch, sharedDim]);\n wReshaped = transposedWeight2.reshape([1, sharedDim, outChannels]);\n matmulOutputShape = [1, batch, outChannels];\n } else {\n xReshaped = inputs[0].reshape([batch, inputHeight * inputWidth, inputChannels]);\n wReshaped = transposedWeight2.reshape([1, inputChannels, outChannels]);\n matmulOutputShape = [batch, outHeight * outWidth, outChannels];\n }\n matmulInputs.push(xReshaped);\n matmulInputs.push(wReshaped);\n } else {\n xReshaped = inputs[0].reshape([batch, inputChannels, inputHeight * inputWidth]);\n wReshaped = inputs[1].reshape([1, outChannels, inputChannels]);\n matmulOutputShape = [batch, outChannels, outHeight * outWidth];\n matmulInputs.push(wReshaped);\n matmulInputs.push(xReshaped);\n }\n if (hasBias) {\n matmulInputs.push(inputs[2]);\n }\n const N = matmulOutputShape[2];\n const K = matmulInputs[0].dims[matmulInputs[0].dims.length - 1];\n if (N < 8 && K < 8) {\n context.compute(\n createNaiveMatmulProgramInfo(\n matmulInputs,\n adjustedAttributes,\n outputShape,\n matmulOutputShape,\n isChannelsLast\n ),\n { inputs: matmulInputs }\n );\n } else {\n context.compute(\n createMatmulProgramInfo(matmulInputs, adjustedAttributes, outputShape, matmulOutputShape, isChannelsLast),\n { inputs: matmulInputs }\n );\n }\n return;\n }\n const sequentialAccessByThreads = (\n /* backend.adapterInfo.isIntel() */\n true\n );\n const transposedWeight = context.kernelCustomData.wT ?? context.compute(\n createTransposeProgramInfo(inputs[1], weightTransposeAttribute),\n { inputs: [1], outputs: [attributes.wIsConst ? -2 : -1] }\n )[0];\n if (attributes.wIsConst && !context.kernelCustomData.wT) {\n context.kernelCustomData.wT = transposedWeight;\n }\n const convInputs = [inputs[0], transposedWeight];\n if (hasBias) {\n convInputs.push(inputs[2]);\n }\n const dimAOuter = isChannelsLast ? outHeight * outWidth : outChannels;\n const dimBOuter = isChannelsLast ? outChannels : outHeight * outWidth;\n const dimInner = weightHeight * weightWidth * inputChannels;\n context.compute(\n createConv2DMatMulProgramInfo(\n convInputs,\n adjustedAttributes,\n outputShape,\n dimAOuter,\n dimBOuter,\n dimInner,\n hasBias,\n sequentialAccessByThreads\n ),\n { inputs: convInputs }\n );\n };\n conv1d = (context, attributes) => {\n const isChannelLast = attributes.format === "NHWC";\n const inputs = [\n context.inputs[0].reshape(\n isChannelLast ? (\n // [N, W, C] -> [N, H=1, W, C]\n [context.inputs[0].dims[0], 1, context.inputs[0].dims[1], context.inputs[0].dims[2]]\n ) : (\n // [N, C, W] -> [N, C, H=1, W]\n [context.inputs[0].dims[0], context.inputs[0].dims[1], 1, context.inputs[0].dims[2]]\n )\n ),\n //[FILTER_OUT_CHANNEL, FILTER_IN_CHANNEL, kW] -> [FILTER_OUT_CHANNEL, FILTER_IN_CHANNEL, kH=1, kW]\n context.inputs[1].reshape([context.inputs[1].dims[0], context.inputs[1].dims[1], 1, context.inputs[1].dims[2]])\n ];\n if (context.inputs.length === 3) {\n inputs.push(context.inputs[2]);\n }\n const pads = [0, attributes.pads[0], 0, attributes.pads[1]];\n const strides = [1].concat(attributes.strides);\n const dilations = [1].concat(attributes.dilations);\n const kernelShape = [1].concat(attributes.kernelShape);\n const adjustedAttributes = getAdjustedConvAttributes({ ...attributes, pads, strides, dilations, kernelShape }, inputs);\n context.compute(createGroupedConvProgramInfo(\n inputs,\n adjustedAttributes,\n (outputShape) => isChannelLast ? [outputShape[0], outputShape[2], outputShape[3]] : []\n ));\n };\n conv = (context, attributes) => {\n validateInputs9(context.inputs, attributes);\n if (context.inputs[0].dims.length === 3) {\n conv1d(context, attributes);\n } else {\n conv2d(context, context.inputs, attributes);\n }\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/3rd-party/conv_backprop_mm_webgpu.ts\n var conv2dTransposeCommonSnippet, createConv2DTransposeMatMulProgramInfo;\n var init_conv_backprop_mm_webgpu = __esm({\n "web/lib/wasm/jsep/webgpu/ops/3rd-party/conv_backprop_mm_webgpu.ts"() {\n "use strict";\n init_wasm_common();\n init_log();\n init_common();\n init_fuse_utils();\n init_activation_util();\n init_conv_util();\n init_matmul_packed_webgpu();\n conv2dTransposeCommonSnippet = (isChannelsLast, addBias = false, attributes, type, innerElementSize = 4) => {\n const getWSnippet = (innerElementSize2) => {\n switch (innerElementSize2) {\n case 1:\n return "return w[getIndexFromCoords4D(coord, vec4(uniforms.w_shape))];";\n case 4:\n return `\n let coord1 = vec4(coordX, coordY, col + 1, rowInner);\n let coord2 = vec4(coordX, coordY, col + 2, rowInner);\n let coord3 = vec4(coordX, coordY, col + 3, rowInner);\n let v0 = w[getIndexFromCoords4D(coord, vec4(uniforms.w_shape))];\n let v1 = w[getIndexFromCoords4D(coord1, vec4(uniforms.w_shape))];\n let v2 = w[getIndexFromCoords4D(coord2, vec4(uniforms.w_shape))];\n let v3 = w[getIndexFromCoords4D(coord3, vec4(uniforms.w_shape))];\n return ${type}(v0, v1, v2, v3);\n `;\n default:\n throw new Error(`innerElementSize ${innerElementSize2} is not supported.`);\n }\n };\n const coordASnippet = isChannelsLast ? `\n let coord = vec4(batch, iXR, iXC, xCh);\n ` : `\n let coord = vec4(batch, xCh, iXR, iXC);\n `;\n const coordResSnippet = isChannelsLast ? `\n let coords = vec4(\n batch,\n row / outWidth,\n row % outWidth,\n col);\n ` : `\n let coords = vec4(\n batch,\n row,\n col / outWidth,\n col % outWidth);\n `;\n const xHeight = isChannelsLast ? "i32(uniforms.x_shape[1])" : "i32(uniforms.x_shape[2])";\n const xWidth = isChannelsLast ? "i32(uniforms.x_shape[2])" : "i32(uniforms.x_shape[3])";\n const row = isChannelsLast ? "row" : "col";\n const col = isChannelsLast ? "col" : "row";\n const readASnippet = `\n let inChannels = ${isChannelsLast ? "i32(uniforms.x_shape[3])" : "i32(uniforms.x_shape[1])"};\n let outWidth = ${isChannelsLast ? "i32(uniforms.result_shape[2])" : "i32(uniforms.result_shape[3])"};\n let outRow = ${row} / outWidth;\n let outCol = ${row} % outWidth;\n\n let WRow = ${col} / (uniforms.filter_dims[1] * inChannels);\n let WCol = ${col} / inChannels % uniforms.filter_dims[1];\n let xR = f32(outRow - uniforms.pads[0] + uniforms.dilations[0] * WRow) / f32(uniforms.strides[0]);\n let xC = f32(outCol - uniforms.pads[1] + uniforms.dilations[1] * WCol) / f32(uniforms.strides[1]);\n if (xR < 0.0 || xR >= f32(${xHeight}) || fract(xR) > 0.0) {\n return ${type}(0.0);\n }\n if (xC < 0.0 || xC >= f32(${xWidth}) || fract(xC) > 0.0) {\n return ${type}(0.0);\n }\n let iXR = i32(xR);\n let iXC = i32(xC);\n let xCh = ${col} % inChannels;\n ${coordASnippet}\n return x[getIndexFromCoords4D(coord, vec4(uniforms.x_shape))/${innerElementSize}];`;\n const sampleA = isChannelsLast ? `\n let col = colIn * ${innerElementSize};\n if (row < uniforms.dim_a_outer && col < uniforms.dim_inner) {\n ${readASnippet}\n }\n return ${type}(0.0);` : `\n let col = colIn * ${innerElementSize};\n if (row < uniforms.dim_inner && col < uniforms.dim_b_outer) {\n ${readASnippet}\n }\n return ${type}(0.0);`;\n const sampleW = `\n let col = colIn * ${innerElementSize};\n let inChannels = ${isChannelsLast ? "i32(uniforms.x_shape[3])" : "i32(uniforms.x_shape[1])"};\n let coordX = uniforms.filter_dims[0] - 1 - row / (uniforms.filter_dims[1] * inChannels);\n let coordY = uniforms.filter_dims[1] - 1 - (row / inChannels) % uniforms.filter_dims[1];\n if (${isChannelsLast ? "row < uniforms.dim_inner && col < uniforms.dim_b_outer" : "row < uniforms.dim_inner && col < uniforms.dim_a_outer"} && coordX >= 0 && coordY >= 0) {\n let rowInner = row % inChannels;\n let coord = vec4(coordX, coordY, col, rowInner);\n ${getWSnippet(innerElementSize)}\n }\n return ${type}(0.0);\n `;\n const applyActivation = getActivationSnippet(attributes, type);\n const userCode = `\n fn mm_readA(batch: i32, row : i32, colIn : i32) -> ${type} {\n ${isChannelsLast ? sampleA : sampleW}\n }\n\n fn mm_readB(batch: i32, row : i32, colIn : i32) -> ${type} {\n ${isChannelsLast ? sampleW : sampleA}\n }\n\n fn mm_write(batch: i32, row : i32, colIn : i32, valueInput : ${type}) {\n let col = colIn * ${innerElementSize};\n if (row < uniforms.dim_a_outer && col < uniforms.dim_b_outer) {\n var value = valueInput;\n let outWidth = ${isChannelsLast ? "i32(uniforms.result_shape[2])" : "i32(uniforms.result_shape[3])"};\n ${coordResSnippet}\n ${biasSnippet(addBias)}\n ${applyActivation}\n result[getIndexFromCoords4D(coords, vec4(uniforms.result_shape))/${innerElementSize}] = value;\n }\n }`;\n return userCode;\n };\n createConv2DTransposeMatMulProgramInfo = (inputs, attributes, outputShape, dimAOuter, dimBOuter, dimInner, hasBias, sequentialAccessByThreads) => {\n const isChannelsLast = attributes.format === "NHWC";\n const inChannels = isChannelsLast ? inputs[0].dims[3] : inputs[0].dims[1];\n const batchSize = outputShape[0];\n const outWidth = isChannelsLast ? outputShape[2] : outputShape[3];\n const outHeight = isChannelsLast ? outputShape[1] : outputShape[2];\n const outChannels = isChannelsLast ? outputShape[3] : outputShape[1];\n const isVec4 = isChannelsLast && (inChannels % 4 === 0 && inChannels % 3) && outChannels % 4 === 0;\n const dispatchX = isChannelsLast ? outChannels : outWidth * outHeight;\n const dispatchY = isChannelsLast ? outWidth * outHeight : outChannels;\n const workGroupSize = [8, 8, 1];\n const elementsPerThread = dimAOuter <= 8 ? [4, 1, 1] : [4, 4, 1];\n const dispatch = [\n Math.ceil(dispatchX / workGroupSize[0] / elementsPerThread[0]),\n Math.ceil(dispatchY / workGroupSize[1] / elementsPerThread[1]),\n Math.ceil(batchSize / workGroupSize[2] / elementsPerThread[2])\n ];\n LOG_DEBUG("verbose", () => `[conv_backprop_mm_webgpu] dispatch = ${dispatch}`);\n const innerElementSize = isVec4 ? 4 : 1;\n const tileInner = Math.max(workGroupSize[0] * innerElementSize, workGroupSize[1]);\n const components = isVec4 ? 4 : 1;\n const filterDims = [attributes.kernelShape[isChannelsLast ? 1 : 2], attributes.kernelShape[isChannelsLast ? 2 : 3]];\n const effectiveFilterDims = [\n filterDims[0] + (attributes.dilations[0] <= 1 ? 0 : (filterDims[0] - 1) * (attributes.dilations[0] - 1)),\n filterDims[1] + (attributes.dilations[1] <= 1 ? 0 : (filterDims[1] - 1) * (attributes.dilations[1] - 1))\n ];\n const pads = [\n effectiveFilterDims[0] - 1 - Math.floor((attributes.pads[0] + attributes.pads[2]) / 2),\n effectiveFilterDims[1] - 1 - Math.floor((attributes.pads[1] + attributes.pads[3]) / 2)\n ];\n const programUniforms = [\n { type: 6 /* int32 */, data: dimAOuter },\n { type: 6 /* int32 */, data: dimBOuter },\n { type: 6 /* int32 */, data: dimInner },\n { type: 6 /* int32 */, data: attributes.strides },\n { type: 6 /* int32 */, data: attributes.dilations },\n { type: 6 /* int32 */, data: filterDims },\n { type: 6 /* int32 */, data: pads }\n ];\n appendActivationUniformsData(attributes, programUniforms);\n programUniforms.push(...createTensorShapeVariables(inputs[0].dims, inputs[1].dims));\n const inputDependencies = ["rank", "rank"];\n if (hasBias) {\n programUniforms.push(...createTensorShapeVariables(inputs[2].dims));\n inputDependencies.push("rank");\n }\n programUniforms.push(...createTensorShapeVariables(outputShape));\n const getShaderSource = (shaderHelper) => {\n const x = inputVariable("x", inputs[0].dataType, inputs[0].dims.length, components);\n const w = inputVariable("w", inputs[1].dataType, inputs[1].dims.length, 1);\n const output = outputVariable("result", inputs[0].dataType, outputShape.length, components);\n const inputVariables = [x, w];\n let declareFunctions = "";\n if (hasBias) {\n const bias = inputVariable("bias", inputs[2].dataType, inputs[2].dims.length, components);\n inputVariables.push(bias);\n declareFunctions += `\n fn getBiasByOutputCoords(coords : vec4) -> ${bias.type.value} {\n return bias[coords.${isChannelsLast ? "w" : "y"}${isVec4 ? "/ 4" : ""}];\n }`;\n }\n const uniforms = [\n { name: "dim_a_outer", type: "i32" },\n { name: "dim_b_outer", type: "i32" },\n { name: "dim_inner", type: "i32" },\n { name: "strides", type: "i32", length: 2 },\n { name: "dilations", type: "i32", length: 2 },\n { name: "filter_dims", type: "i32", length: filterDims.length },\n { name: "pads", type: "i32", length: pads.length }\n ];\n appendActivationUniforms(attributes, uniforms);\n const elemType = tensorTypeToWsglStorageType(inputs[0].dataType, 1);\n if (elemType !== "f16" && elemType !== "f32") {\n throw new Error(`elemType ${elemType} is not supported.`);\n }\n return `\n ${utilFunctions("uniforms.result_strides")}\n ${shaderHelper.registerUniforms(uniforms).declareVariables(...inputVariables, output)};\n ${declareFunctions}\n ${conv2dTransposeCommonSnippet(isChannelsLast, hasBias, attributes, x.type.value, innerElementSize)}\n ${isVec4 ? makeMatMulPackedVec4Source(\n elementsPerThread,\n workGroupSize,\n elemType,\n void 0,\n !isChannelsLast,\n tileInner\n ) : makeMatMulPackedSource(\n elementsPerThread,\n workGroupSize,\n elemType,\n void 0,\n !isChannelsLast,\n tileInner,\n false,\n void 0,\n sequentialAccessByThreads\n )}`;\n };\n return {\n name: "Conv2DTransposeMatMul",\n shaderCache: { hint: `${attributes.cacheKey};${elementsPerThread};${workGroupSize};${isVec4}`, inputDependencies },\n getRunData: () => ({\n outputs: [{ dims: outputShape, dataType: inputs[0].dataType }],\n dispatchGroup: { x: dispatch[0], y: dispatch[1], z: dispatch[2] },\n programUniforms\n }),\n getShaderSource\n };\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/3rd-party/conv_backprop_webgpu.ts\n var createConvTranspose2DOpProgramShaderSource, createConvTranspose2DProgramInfo;\n var init_conv_backprop_webgpu = __esm({\n "web/lib/wasm/jsep/webgpu/ops/3rd-party/conv_backprop_webgpu.ts"() {\n "use strict";\n init_wasm_common();\n init_log();\n init_util();\n init_common();\n createConvTranspose2DOpProgramShaderSource = (shaderHelper, inputs, outputShape, hasBias, is1DimensionDispatch, isVec4 = false, dataType, uniforms, isChannelsLast = false) => {\n const rowDim = isChannelsLast ? 1 : 2;\n const colDim = isChannelsLast ? 2 : 3;\n const channelDim = isChannelsLast ? 3 : 1;\n const workPerThread = isVec4 ? 2 : 1;\n let declareFunctions = `\n fn setOutputAtIndex(flatIndex : u32, value : ${isVec4 ? `vec4<${dataType}>` : dataType}) {\n result[flatIndex] = ${isVec4 ? `vec4<${dataType}>` : dataType}(value);\n }`;\n if (hasBias) {\n declareFunctions += `\n fn getBiasByOutputCoords(coords : vec4) -> ${isVec4 ? `vec4<${dataType}>` : dataType} {\n return bias[coords.${isChannelsLast ? "w" : "y"}${isVec4 ? "/ 4" : ""}];\n }`;\n }\n const components = isVec4 ? 4 : 1;\n const w = inputVariable("W", inputs[1].dataType, inputs[1].dims.length, components);\n const dy = inputVariable("Dy", inputs[0].dataType, inputs[0].dims.length, components);\n const inputVariables = [dy, w];\n if (hasBias) {\n inputVariables.push(inputVariable("bias", inputs[2].dataType, [outputShape[channelDim]].length, components));\n }\n const output = outputVariable("result", inputs[0].dataType, outputShape.length, components);\n const codeSnippet4 = `{\n let batch: u32 = ${is1DimensionDispatch ? "global_id.z" : "workgroup_id.z"} / uniforms.result_shape[1];\n let r = ${is1DimensionDispatch ? "global_id.z" : "workgroup_id.z"} % uniforms.result_shape[1];\n let c = ${is1DimensionDispatch ? "global_id.y" : "workgroup_id.y"} * ${workPerThread};\n let d1: u32 = ${is1DimensionDispatch ? "global_id.x" : "workgroup_id.x"} * 4;\n\n let dyCorner = vec2(i32(r), i32(c)) - vec2(uniforms.pads);\n\n // Convolve dy(?, ?, d2) with w(:, :, d1, d2) to compute dx(xR, xC, d1).\n // ? = to be determined. : = across all values in that axis.\n var dotProd: array, ${workPerThread}>;\n for (var i = 0; i < ${workPerThread}; i++) {\n dotProd[i] = vec4<${dataType}>(0.0);\n }\n for (var wR: u32 = 0; wR < uniforms.filter_dims[0]; wR = wR + 1) {\n var dyR = (${dataType}(dyCorner.x) + ${dataType}(wR)) / ${dataType}(uniforms.strides.x);\n let wRPerm = uniforms.filter_dims[0] - 1 - wR;\n if (dyR < 0.0 || dyR >= ${dataType}(uniforms.Dy_shape[1]) ||\n fract(dyR) > 0.0 || wRPerm < 0) {\n continue;\n }\n let idyR: u32 = u32(dyR);\n\n for (var wC: u32 = 0; wC < uniforms.filter_dims[1]; wC = wC + 1) {\n let dyC = (${dataType}(dyCorner.y) + ${dataType}(wC)) / ${dataType}(uniforms.strides.y);\n let dyC2 = (${dataType}(dyCorner.y) + 1.0 + ${dataType}(wC)) / ${dataType}(uniforms.strides.y);\n let wCPerm = uniforms.filter_dims[1] - 1 - wC;\n if (wCPerm < 0) {\n continue;\n }\n var bDyCVal = true;\n var bDyCVal2 = true;\n if (dyC < 0.0 || dyC >= ${dataType}(uniforms.Dy_shape[2]) ||\n fract(dyC) > 0.0) {\n bDyCVal = false;\n }\n if (dyC2 < 0.0 || dyC2 >= ${dataType}(uniforms.Dy_shape[2]) ||\n fract(dyC2) > 0.0) {\n bDyCVal2 = false;\n }\n\n let idyC: u32 = u32(dyC);\n let idyC2: u32 = u32(dyC2);\n if (bDyCVal && bDyCVal2) {\n let d2Length = uniforms.Dy_shape[3];\n for (var d2 :u32 = 0; d2 < d2Length; d2 = d2 + 4) {\n let wValue0 = ${w.get("u32(wRPerm)", "u32(wCPerm)", "d1", "d2")};\n let wValue1 = ${w.get("u32(wRPerm)", "u32(wCPerm)", "d1 + 1", "d2")};\n let wValue2 = ${w.get("u32(wRPerm)", "u32(wCPerm)", "d1 + 2", "d2")};\n let wValue3 = ${w.get("u32(wRPerm)", "u32(wCPerm)", "d1 + 3", "d2")};\n\n var xValue = ${dy.get("batch", "idyR", "idyC", "d2")};\n let tmpval = vec4<${dataType}>(dot(xValue, wValue0),\n dot(xValue, wValue1),\n dot(xValue, wValue2),\n dot(xValue, wValue3));\n dotProd[0] = dotProd[0] + tmpval;\n\n xValue = ${dy.get("batch", "idyR", "idyC2", "d2")};\n\n dotProd[1] = dotProd[1] + vec4<${dataType}>(dot(xValue, wValue0),\n dot(xValue, wValue1),\n dot(xValue, wValue2),\n dot(xValue, wValue3));\n }\n } else if (bDyCVal) {\n let d2Length = uniforms.Dy_shape[${channelDim}];\n for (var d2: u32 = 0; d2 < d2Length; d2 = d2 + 4) {\n let wValue0 = ${w.get("u32(wRPerm)", "u32(wCPerm)", "d1", "d2")};\n let wValue1 = ${w.get("u32(wRPerm)", "u32(wCPerm)", "d1 + 1", "d2")};\n let wValue2 = ${w.get("u32(wRPerm)", "u32(wCPerm)", "d1 + 2", "d2")};\n let wValue3 = ${w.get("u32(wRPerm)", "u32(wCPerm)", "d1 + 3", "d2")};\n\n var xValue = ${dy.get("batch", "idyR", "idyC", "d2")};\n let tmpval = vec4<${dataType}>(dot(xValue, wValue0),\n dot(xValue, wValue1),\n dot(xValue, wValue2),\n dot(xValue, wValue3));\n dotProd[0] = dotProd[0] + tmpval;\n }\n } else if (bDyCVal2) {\n let d2Length = uniforms.Dy_shape[3];\n for (var d2: u32 = 0; d2 < d2Length; d2 = d2 + 4) {\n let wValue0 = ${w.get("u32(wRPerm)", "u32(wCPerm)", "d1", "d2")};\n let wValue1 = ${w.get("u32(wRPerm)", "u32(wCPerm)", "d1 + 1", "d2")};\n let wValue2 = ${w.get("u32(wRPerm)", "u32(wCPerm)", "d1 + 2", "d2")};\n let wValue3 = ${w.get("u32(wRPerm)", "u32(wCPerm)", "d1 + 3", "d2")};\n\n var xValue = ${dy.get("batch", "idyR", "idyC2", "d2")};\n let tmpval = vec4<${dataType}>(dot(xValue, wValue0),\n dot(xValue, wValue1),\n dot(xValue, wValue2),\n dot(xValue, wValue3));\n dotProd[1] = dotProd[1] + tmpval;\n }\n }\n }\n }\n\n for (var i: u32 = 0; i < ${workPerThread}; i = i + 1) {\n let value = dotProd[i] + ${hasBias ? "bias[c+i]" : `vec4<${dataType}>(0.0)`};\n ${output.set("batch", "r", "c + i", "d1", "value")};\n }\n }`;\n const codeSnippet = `\n let outputIndices = ${output.offsetToIndices("global_idx")};\n let batch = ${output.indicesGet("outputIndices", 0)};\n let d1 = ${output.indicesGet("outputIndices", channelDim)};\n let r = ${output.indicesGet("outputIndices", rowDim)};\n let c = ${output.indicesGet("outputIndices", colDim)};\n let dyCorner = vec2(i32(r), i32(c)) - uniforms.pads;\n let dyRCorner = dyCorner.x;\n let dyCCorner = dyCorner.y;\n let groupId = d1 / uniforms.output_channels_per_group;\n let wOutChannel = d1 - groupId * uniforms.output_channels_per_group;\n // Convolve dy(?, ?, d2) with w(:, :, d1, d2) to compute dx(xR, xC, d1).\n // ? = to be determined. : = across all values in that axis.\n var dotProd = ${dataType}(0.0);\n for (var wR: u32 = 0; wR < uniforms.effective_filter_dims.x; wR = wR + 1) {\n if (wR % uniforms.dilations.x != 0) {\n continue;\n }\n let dyR = (${dataType}(dyRCorner) + ${dataType}(wR)) / ${dataType}(uniforms.strides[0]);\n let wRPerm = uniforms.filter_dims.x - 1 - wR / uniforms.dilations.x;\n if (dyR < 0.0 || dyR >= ${dataType}(uniforms.Dy_shape[${rowDim}]) || fract(dyR) > 0.0 ||\n wRPerm < 0) {\n continue;\n }\n let idyR: u32 = u32(dyR);\n\n for (var wC: u32 = 0; wC < uniforms.effective_filter_dims.y; wC = wC + 1) {\n if (wC % uniforms.dilations.y != 0) {\n continue;\n }\n let dyC = (${dataType}(dyCCorner) + ${dataType}(wC)) / ${dataType}(uniforms.strides.y);\n let wCPerm = uniforms.filter_dims.y - 1 - wC / uniforms.dilations.y;\n if (dyC < 0.0 || dyC >= ${dataType}(uniforms.Dy_shape[${colDim}]) ||\n fract(dyC) > 0.0 || wCPerm < 0) {\n continue;\n }\n let idyC: u32 = u32(dyC);\n var inputChannel = groupId * uniforms.input_channels_per_group;\n for (var d2: u32 = 0; d2 < uniforms.input_channels_per_group; d2 = d2 + 1) {\n let xValue = ${isChannelsLast ? dy.get("batch", "idyR", "idyC", "inputChannel") : dy.get("batch", "inputChannel", "idyR", "idyC")};\n let wValue = ${w.get("inputChannel", "wOutChannel", "u32(wRPerm)", "u32(wCPerm)")};\n dotProd = dotProd + xValue * wValue;\n inputChannel = inputChannel + 1;\n }\n }\n }\n let value = dotProd + ${hasBias ? "bias[d1]" : `${dataType}(0.0)`};\n ${output.setByOffset("global_idx", "value")};\n `;\n return `\n ${shaderHelper.registerUniforms(uniforms).declareVariables(...inputVariables, output)}\n ${declareFunctions}\n\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")};\n ${isVec4 ? codeSnippet4 : codeSnippet}}`;\n };\n createConvTranspose2DProgramInfo = (inputs, attributes, squeezeOutputShapeFunction) => {\n const hasBias = inputs.length > 2;\n const outputShape = attributes.outputShape;\n const outputSize = ShapeUtil.size(outputShape);\n const dispatch = [\n Math.ceil(outputSize / 64),\n 1,\n 1\n ];\n LOG_DEBUG("verbose", () => `[conv2d_backprop_webgpu] dispatch = ${dispatch}`);\n const isChannelsLast = attributes.format === "NHWC";\n const inputDependencies = ["rank", "rank"];\n const strides = [attributes.strides[0], attributes.strides[1]];\n const filterDims = [attributes.kernelShape[isChannelsLast ? 1 : 2], attributes.kernelShape[isChannelsLast ? 2 : 3]];\n const dilations = [attributes.dilations[0], attributes.dilations[1]];\n const effectiveFilterDims = [\n filterDims[0] + (attributes.dilations[0] <= 1 ? 0 : (attributes.kernelShape[isChannelsLast ? 1 : 2] - 1) * (attributes.dilations[0] - 1)),\n filterDims[1] + (attributes.dilations[1] <= 1 ? 0 : (attributes.kernelShape[isChannelsLast ? 2 : 3] - 1) * (attributes.dilations[1] - 1))\n ];\n const pads = [\n effectiveFilterDims[0] - 1 - Math.floor((attributes.pads[0] + attributes.pads[2]) / 2),\n effectiveFilterDims[1] - 1 - Math.floor(attributes.pads[1] + attributes.pads[3]) / 2\n ];\n const isVec4 = false;\n const group = attributes.group;\n const wShape = inputs[1].dims;\n const inputChannelsPerGroup = wShape[0] / group;\n const outputChannelsPerGroup = wShape[1];\n const programUniforms = [\n { type: 12 /* uint32 */, data: outputSize },\n { type: 12 /* uint32 */, data: strides },\n { type: 12 /* uint32 */, data: filterDims },\n { type: 12 /* uint32 */, data: dilations },\n { type: 12 /* uint32 */, data: effectiveFilterDims },\n { type: 6 /* int32 */, data: pads },\n { type: 12 /* uint32 */, data: inputChannelsPerGroup },\n { type: 12 /* uint32 */, data: outputChannelsPerGroup },\n ...createTensorShapeVariables(inputs[0].dims, inputs[1].dims)\n ];\n if (hasBias) {\n programUniforms.push(...createTensorShapeVariables(inputs[2].dims));\n inputDependencies.push("rank");\n }\n programUniforms.push(...createTensorShapeVariables(outputShape));\n const is1DimensionDispatch = dispatch[1] === 1 && dispatch[2] === 1;\n const getShaderSource = (shaderHelper) => {\n const uniforms = [\n { name: "output_size", type: "u32" },\n { name: "strides", type: "u32", length: strides.length },\n { name: "filter_dims", type: "u32", length: filterDims.length },\n { name: "dilations", type: "u32", length: filterDims.length },\n { name: "effective_filter_dims", type: "u32", length: effectiveFilterDims.length },\n { name: "pads", type: "i32", length: pads.length },\n { name: "input_channels_per_group", type: "u32" },\n { name: "output_channels_per_group", type: "u32" }\n ];\n const dataType = tensorTypeToWsglStorageType(inputs[0].dataType);\n return `${createConvTranspose2DOpProgramShaderSource(\n shaderHelper,\n inputs,\n outputShape,\n hasBias,\n is1DimensionDispatch,\n isVec4,\n dataType,\n uniforms,\n isChannelsLast\n )}`;\n };\n return {\n name: "ConvTranspose2D",\n shaderCache: { hint: `${attributes.cacheKey};`, inputDependencies },\n getRunData: () => ({\n dispatchGroup: { x: dispatch[0], y: dispatch[1], z: dispatch[2] },\n outputs: [{\n dims: squeezeOutputShapeFunction ? squeezeOutputShapeFunction(outputShape) : outputShape,\n dataType: inputs[0].dataType\n }],\n programUniforms\n }),\n getShaderSource\n };\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/conv-transpose.ts\n var computeTotalPad, distributePadding, calculateOutputShapeAndPads, getAdjustedConvTransposeAttributes, parseConvTransposeAttributes, validateInputs10, weightTransposePerm, convTranspose2d, convTranspose1d, convTranspose;\n var init_conv_transpose = __esm({\n "web/lib/wasm/jsep/webgpu/ops/conv-transpose.ts"() {\n "use strict";\n init_conv_backprop_mm_webgpu();\n init_conv_backprop_webgpu();\n init_fuse_utils();\n init_transpose();\n computeTotalPad = (inDim, stride, adj, kernel, dilation, outSize) => (inDim - 1) * stride + adj + (kernel - 1) * dilation + 1 - outSize;\n distributePadding = (totalPad, autoPad, pads, head, tail) => {\n const smallPad = Math.floor(totalPad / 2);\n if (autoPad === "SAME_UPPER") {\n pads[head] = smallPad;\n pads[tail] = totalPad - smallPad;\n } else if (autoPad === "SAME_LOWER") {\n pads[head] = totalPad - smallPad;\n pads[tail] = smallPad;\n }\n };\n calculateOutputShapeAndPads = (inputShape, kernelShape, dilations, autoPad, group, pads, strides, isChannelLast, outputPadding, outputShape) => {\n const spatialRank = inputShape.length - 2;\n const updateOutputShape = outputShape.length === 0;\n if (outputPadding.length === 0) {\n for (let i = 0; i < spatialRank; ++i) {\n outputPadding.push(0);\n }\n }\n const batchSize = inputShape[0];\n const outChannels = kernelShape[isChannelLast ? 3 : 1] * group;\n for (let i = 0, j = inputShape.length - spatialRank - (isChannelLast ? 1 : 0); i < spatialRank; ++i, ++j) {\n const inSize = inputShape[j];\n const outSize = updateOutputShape ? inSize * strides[i] : outputShape[i];\n const totalPad = computeTotalPad(inSize, strides[i], pads[i], kernelShape[j], dilations[i], outSize);\n distributePadding(totalPad, autoPad, pads, i, i + spatialRank);\n if (updateOutputShape) {\n outputShape.push(\n strides[i] * (inSize - 1) + outputPadding[i] + (kernelShape[j] - 1) * dilations[i] + 1 - pads[i] - pads[i + spatialRank]\n );\n }\n }\n outputShape.splice(0, 0, batchSize);\n outputShape.splice(isChannelLast ? 3 : 1, 0, outChannels);\n };\n getAdjustedConvTransposeAttributes = (attributes, inputs) => {\n const kernelShape = attributes.kernelShape.slice();\n if (attributes.kernelShape.length === 0 || attributes.kernelShape.reduce((a, b) => a * b, 1) === 0) {\n kernelShape.length = 0;\n for (let i = 2; i < inputs[1].dims.length; ++i) {\n kernelShape.push(inputs[1].dims[i]);\n }\n }\n const isChannelsLast = attributes.format === "NHWC";\n kernelShape.splice(0, 0, inputs[1].dims[0]);\n kernelShape.splice(isChannelsLast ? 3 : 1, 0, inputs[1].dims[1]);\n const pads = attributes.pads.slice();\n const outputShape = attributes.outputShape.slice();\n const outputPadding = attributes.outputPadding.slice();\n const inputShape = inputs[0].dims;\n let dilations = attributes.dilations.slice();\n if (dilations.reduce((a, b) => a + b, 0) === 0) {\n const spatialRank = inputs[0].dims.length - 2;\n dilations = new Array(spatialRank).fill(1);\n }\n let strides = attributes.strides.slice();\n if (strides.reduce((a, b) => a + b, 0) === 0) {\n const spatialRank = inputs[0].dims.length - 2;\n strides = new Array(spatialRank).fill(1);\n }\n calculateOutputShapeAndPads(\n inputShape,\n kernelShape,\n dilations,\n attributes.autoPad,\n attributes.group,\n pads,\n strides,\n isChannelsLast,\n outputPadding,\n outputShape\n );\n const newAttributes = Object.assign({}, attributes);\n Object.assign(newAttributes, { kernelShape, pads, outputPadding, outputShape, dilations, strides });\n return newAttributes;\n };\n parseConvTransposeAttributes = (attributes) => {\n const activationAttributes = parseInternalActivationAttributes(attributes);\n const format = attributes.format;\n const autoPad = [\n "NOTSET",\n "VALID",\n "SAME_UPPER",\n "SAME_LOWER"\n ][typeof attributes.autoPad == "undefined" ? 0 : attributes.autoPad];\n const dilations = attributes.dilations;\n const group = attributes.group;\n const kernelShape = attributes.kernelShape;\n const pads = attributes.pads;\n const strides = attributes.strides;\n const wIsConst = attributes.wIsConst();\n const outputPadding = attributes.outputPadding;\n const outputShape = attributes.outputShape;\n return {\n autoPad,\n format,\n dilations,\n group,\n kernelShape,\n outputPadding,\n outputShape,\n pads,\n strides,\n wIsConst,\n ...activationAttributes,\n cacheKey: `${attributes.format};${activationAttributes.activation};`\n };\n };\n validateInputs10 = (inputs, attributes) => {\n if (!inputs || inputs.length !== 2 && inputs.length !== 3) {\n throw new Error("Conv requires 2 or 3 inputs");\n }\n if (inputs[0].dims.length !== 4 && inputs[0].dims.length !== 3) {\n throw new Error("currently only support 2-dimensional conv");\n }\n if (inputs[0].dims.length !== inputs[1].dims.length) {\n throw new Error("filter does not have same dimension as input");\n }\n const dataChannel = inputs[0].dims[attributes.format === "NHWC" ? inputs[0].dims.length - 1 : 1];\n const filterInChannel = inputs[1].dims[0];\n if (dataChannel !== filterInChannel) {\n throw new Error("FILTER_IN_CHANNEL should be equal to DATA_CHANNEL");\n }\n const featureMaps = inputs[1].dims[1] * attributes.group;\n if (inputs.length === 3 && (inputs[2].dims.length !== 1 || inputs[2].dims[0] !== featureMaps)) {\n throw new Error("invalid bias");\n }\n const spatialRank = inputs[0].dims.length - 2;\n const dilationsSet = attributes.dilations.reduce((a, b) => a + b, 0) > 0;\n if (dilationsSet && attributes.dilations.length !== spatialRank) {\n throw new Error(`dilations should be ${spatialRank}D`);\n }\n const stridesSet = attributes.strides.reduce((a, b) => a + b, 0) > 0;\n if (stridesSet && attributes.strides.length !== spatialRank) {\n throw new Error(`strides should be ${spatialRank}D`);\n }\n const padsSet = attributes.pads.reduce((a, b) => a + b, 0) > 0;\n if (padsSet && attributes.pads.length !== spatialRank * 2) {\n throw new Error(`pads should be ${spatialRank * 2}D`);\n }\n if (attributes.outputPadding.length !== spatialRank && attributes.outputPadding.length !== 0) {\n throw new Error(`output_padding should be ${spatialRank}D`);\n }\n const kernelShapeSet = attributes.kernelShape.reduce((a, b) => a + b, 0) > 0;\n if (kernelShapeSet && attributes.kernelShape.length !== 0 && attributes.kernelShape.length !== inputs[1].dims.length - 2) {\n throw new Error("invalid kernel shape");\n }\n if (attributes.outputShape.length !== 0 && attributes.outputShape.length !== inputs[0].dims.length - 2) {\n throw new Error("invalid output shape");\n }\n };\n weightTransposePerm = [2, 3, 1, 0];\n convTranspose2d = (context, inputs, attributes) => {\n const adjustedAttributes = getAdjustedConvTransposeAttributes(attributes, inputs);\n const isChannelsLast = attributes.format === "NHWC";\n const outputShape = adjustedAttributes.outputShape;\n const outChannels = outputShape[isChannelsLast ? 3 : 1];\n const inputChannels = inputs[0].dims[isChannelsLast ? 3 : 1];\n if (adjustedAttributes.group !== 1 || outChannels === 1 && inputChannels === 1) {\n context.compute(createConvTranspose2DProgramInfo(inputs, adjustedAttributes));\n return;\n }\n const outHeight = outputShape[isChannelsLast ? 1 : 2];\n const outWidth = outputShape[isChannelsLast ? 2 : 3];\n const weightHeight = inputs[1].dims[2];\n const weightWidth = inputs[1].dims[3];\n const dimAOuter = isChannelsLast ? outHeight * outWidth : outChannels;\n const dimBOuter = isChannelsLast ? outChannels : outHeight * outWidth;\n const dimInner = weightHeight * weightWidth * inputChannels;\n const sequentialAccessByThreads = (\n /* backend.adapterInfo.isIntel() */\n true\n );\n const transposedWeight = context.kernelCustomData.wT ?? context.compute(\n createTransposeProgramInfo(inputs[1], weightTransposePerm),\n { inputs: [1], outputs: [attributes.wIsConst ? -2 : -1] }\n )[0];\n if (attributes.wIsConst && !context.kernelCustomData.wT) {\n context.kernelCustomData.wT = transposedWeight;\n }\n const convTransposeInputs = [inputs[0], transposedWeight];\n const hasBias = inputs.length === 3;\n if (hasBias) {\n if (!isChannelsLast && inputs[2].dims.length === 1) {\n convTransposeInputs.push(inputs[2].reshape([inputs[2].dims[0], 1, 1]));\n } else {\n convTransposeInputs.push(inputs[2]);\n }\n }\n context.compute(\n createConv2DTransposeMatMulProgramInfo(\n convTransposeInputs,\n adjustedAttributes,\n outputShape,\n dimAOuter,\n dimBOuter,\n dimInner,\n hasBias,\n sequentialAccessByThreads\n ),\n { inputs: convTransposeInputs }\n );\n };\n convTranspose1d = (context, attributes) => {\n const isChannelLast = attributes.format === "NHWC";\n const inputs = [\n context.inputs[0].reshape(\n isChannelLast ? (\n // [N, W, C] -> [N, H=1, W, C]\n [context.inputs[0].dims[0], 1, context.inputs[0].dims[1], context.inputs[0].dims[2]]\n ) : (\n // [N, C, W] -> [N, C, H=1, W]\n [context.inputs[0].dims[0], context.inputs[0].dims[1], 1, context.inputs[0].dims[2]]\n )\n ),\n //[FILTER_OUT_CHANNEL, FILTER_IN_CHANNEL, kW] -> [FILTER_OUT_CHANNEL, FILTER_IN_CHANNEL, kH=1, kW]\n context.inputs[1].reshape([context.inputs[1].dims[0], context.inputs[1].dims[1], 1, context.inputs[1].dims[2]])\n ];\n if (context.inputs.length === 3) {\n inputs.push(context.inputs[2]);\n }\n let kernelShape = attributes.kernelShape;\n if (kernelShape.length === 0 || kernelShape[0] === 0) {\n kernelShape = [context.inputs[1].dims[2]];\n }\n let dilations = attributes.dilations;\n if (dilations.length === 0 || dilations[0] === 0) {\n dilations = [1];\n }\n let strides = attributes.strides;\n if (strides.length === 0 || strides[0] === 0) {\n strides = [1];\n }\n let pads = attributes.pads;\n if (pads.length === 0) {\n pads = [0, 0];\n }\n pads = [0, pads[0], 0, pads[1]];\n strides = [1].concat(strides);\n dilations = [1].concat(dilations);\n kernelShape = [1].concat(kernelShape);\n const adjustedAttributes = getAdjustedConvTransposeAttributes({ ...attributes, pads, strides, dilations, kernelShape }, inputs);\n context.compute(createConvTranspose2DProgramInfo(\n inputs,\n adjustedAttributes,\n (outputShape) => isChannelLast ? [outputShape[0], outputShape[2], outputShape[3]] : [outputShape[0], outputShape[1], outputShape[3]]\n ));\n };\n convTranspose = (context, attributes) => {\n validateInputs10(context.inputs, attributes);\n if (context.inputs[0].dims.length === 3) {\n convTranspose1d(context, attributes);\n } else {\n convTranspose2d(context, context.inputs, attributes);\n }\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/cumsum.ts\n var createCumsumProgramInfo, cumsum, parseCumSumAttributes;\n var init_cumsum = __esm({\n "web/lib/wasm/jsep/webgpu/ops/cumsum.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_attribute_with_cache_key();\n init_common();\n createCumsumProgramInfo = (inputType, inputShape, axisInput, attributes) => {\n const outputSize = ShapeUtil.size(inputShape);\n const rank = inputShape.length;\n const input = inputVariable("input", inputType, rank);\n const output = outputVariable("output", inputType, rank);\n const axisValue = axisInput.dataType === 6 /* int32 */ ? axisInput.getInt32Array()[0] : Number(axisInput.getBigInt64Array()[0]);\n const axis = ShapeUtil.normalizeAxis(axisValue, rank);\n const getShaderSource = (shaderHelper) => {\n const index = ` i32(${input.indicesGet("inputIndices", "uniforms.axis")}) `;\n const max = getElementAt("uniforms.input_shape", "uniforms.axis", rank);\n const lowerLimit = attributes.reverse ? index + (attributes.exclusive ? " + 1" : "") : "0";\n const upperLimit = attributes.reverse ? max : index + (attributes.exclusive ? "" : " + 1");\n return `\n ${shaderHelper.registerUniform("outputSize", "u32").registerUniform("axis", "u32").declareVariables(input, output)}\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")}\n var inputIndices = ${output.offsetToIndices("global_idx")};\n var sum = ${output.type.value}(0);\n let first : i32 = ${lowerLimit};\n let last : i32 = ${upperLimit};\n for (var i : i32 = first; i < last; i++) {\n ${input.indicesSet("inputIndices", "uniforms.axis", "u32(i)")};\n sum = sum + ${input.getByIndices("inputIndices")};\n }\n ${output.setByOffset("global_idx", "sum")};\n }`;\n };\n return {\n name: "CumSum",\n shaderCache: { hint: attributes.cacheKey, inputDependencies: ["rank"] },\n getRunData: () => ({\n outputs: [{ dims: inputShape, dataType: inputType }],\n dispatchGroup: { x: Math.ceil(\n outputSize / 64\n /* workgroup size */\n ) },\n programUniforms: [\n { type: 12 /* uint32 */, data: outputSize },\n { type: 12 /* uint32 */, data: axis },\n ...createTensorShapeVariables(inputShape, inputShape)\n ]\n }),\n getShaderSource\n };\n };\n cumsum = (context, attributes) => {\n const inputShape = context.inputs[0].dims;\n const inputType = context.inputs[0].dataType;\n const axis = context.inputs[1];\n context.compute(createCumsumProgramInfo(inputType, inputShape, axis, attributes), { inputs: [0] });\n };\n parseCumSumAttributes = (attributes) => {\n const exclusive = attributes.exclusive === 1;\n const reverse = attributes.reverse === 1;\n return createAttributeWithCacheKey({ exclusive, reverse });\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/depth-to-space.ts\n var validateInputs11, permFunctionBody2, createDepthToSpaceProgramInfo, depthToSpace, parseDepthToSpaceAttributes;\n var init_depth_to_space = __esm({\n "web/lib/wasm/jsep/webgpu/ops/depth-to-space.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_attribute_with_cache_key();\n init_common();\n validateInputs11 = (inputs) => {\n if (!inputs || inputs.length !== 1) {\n throw new Error("DepthToSpace requires 1 input.");\n }\n if (inputs[0].dims.length !== 4) {\n throw new Error("DepthToSpace requires 4D input.");\n }\n };\n permFunctionBody2 = (perm, rank, input, output) => {\n const reverseFunc = [];\n reverseFunc.push(`fn perm(i: ${output.type.indices}) -> ${input.type.indices} {\n var a: ${input.type.indices};`);\n for (let i = 0; i < rank; ++i) {\n reverseFunc.push(input.indicesSet("a", perm[i], `i[${i}]`));\n }\n reverseFunc.push("return a;}");\n return reverseFunc.join("\\n");\n };\n createDepthToSpaceProgramInfo = (inputTensor, attributes) => {\n let n, h, w, c;\n let shape;\n let perm;\n const isChannelLast = attributes.format === "NHWC";\n const blocksize = attributes.blocksize;\n const isDCRmode = attributes.mode === "DCR";\n if (isChannelLast) {\n [n, h, w, c] = inputTensor.dims;\n shape = isDCRmode ? [n, h, w, blocksize, blocksize, c / blocksize ** 2] : [n, h, w, c / blocksize ** 2, blocksize, blocksize];\n perm = isDCRmode ? [0, 1, 3, 2, 4, 5] : [0, 1, 4, 2, 5, 3];\n } else {\n [n, h, w, c] = [inputTensor.dims[0], inputTensor.dims[2], inputTensor.dims[3], inputTensor.dims[1]];\n shape = isDCRmode ? [n, blocksize, blocksize, c / blocksize ** 2, h, w] : [n, c / blocksize ** 2, blocksize, blocksize, h, w];\n perm = isDCRmode ? [0, 3, 4, 1, 5, 2] : [0, 1, 4, 2, 5, 3];\n }\n const reshapedInputTensor = inputTensor.reshape(shape);\n const reshapedInputRank = reshapedInputTensor.dims.length;\n const inputDataType = inputTensor.dataType;\n const reshapedInput = inputVariable("a", inputDataType, reshapedInputRank);\n const permedOutput = outputVariable("output", inputDataType, reshapedInputRank);\n const getShaderSource = (shaderHelper) => `\n ${shaderHelper.registerUniform("output_size", "u32").declareVariables(reshapedInput, permedOutput)}\n\n ${permFunctionBody2(perm, reshapedInputRank, reshapedInput, permedOutput)}\n\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}\n\n let indices = ${permedOutput.offsetToIndices("global_idx")};\n let aIndices = perm(indices);\n\n ${permedOutput.setByOffset("global_idx", reshapedInput.getByIndices("aIndices"))}\n }`;\n return {\n name: "DepthToSpace",\n shaderCache: { hint: `${inputTensor.dims};${attributes.blocksize};${attributes.mode}`, inputDependencies: ["rank"] },\n getRunData: (inputs) => {\n const outputShape = isChannelLast ? [n, h * blocksize, w * blocksize, c / blocksize ** 2] : [n, c / blocksize ** 2, h * blocksize, w * blocksize];\n const outputSize = ShapeUtil.size(outputShape);\n const shapeBeforePerm = reshapedInputTensor.dims;\n const shapeAfterPerm = ShapeUtil.sortBasedOnPerm(shapeBeforePerm, perm);\n return {\n outputs: [{ dims: outputShape, dataType: inputs[0].dataType }],\n dispatchGroup: { x: Math.ceil(\n outputSize / 64\n /* workgroup size */\n ) },\n programUniforms: [{ type: 12 /* uint32 */, data: outputSize }, ...createTensorShapeVariables(shapeBeforePerm, shapeAfterPerm)]\n };\n },\n getShaderSource\n };\n };\n depthToSpace = (context, attributes) => {\n validateInputs11(context.inputs);\n context.compute(createDepthToSpaceProgramInfo(context.inputs[0], attributes));\n };\n parseDepthToSpaceAttributes = (attributes) => createAttributeWithCacheKey({\n blocksize: attributes.blocksize,\n mode: attributes.mode,\n format: attributes.format\n });\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/einsum.ts\n var symbolPattern, termPattern, termPatternOnly, lhsPattern, lhsPatternOnly, EinsumTerm, EinsumEquation, appendMax, createEinsumProgramInfo, einsum, parseEinsumAttributes;\n var init_einsum = __esm({\n "web/lib/wasm/jsep/webgpu/ops/einsum.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_attribute_with_cache_key();\n init_common();\n symbolPattern = "[a-zA-Z]|\\\\.\\\\.\\\\.";\n termPattern = "(" + symbolPattern + ")+";\n termPatternOnly = "^" + termPattern + "$";\n lhsPattern = "(" + termPattern + ",)*" + termPattern;\n lhsPatternOnly = "^" + lhsPattern + "$";\n EinsumTerm = class {\n constructor(inputIndex = -1) {\n this.symbolToIndices = /* @__PURE__ */ new Map();\n this.inputIndex = inputIndex;\n }\n // Add a symbol to the term\n addSymbol(symbol, index) {\n let value = this.symbolToIndices.get(symbol);\n if (value === void 0) {\n value = [index];\n } else {\n value.push(index);\n }\n this.symbolToIndices.set(symbol, value);\n }\n // -1 for output and 0, 1, 2, ... for inputs\n };\n EinsumEquation = class {\n constructor(inputs, equation) {\n this.equation = equation;\n this.hasEllipsis = false;\n this.symbolToInfo = /* @__PURE__ */ new Map();\n this.lhs = new Array();\n this.outputDims = [];\n let [lhs, rhs] = equation.includes("->") ? equation.split("->", 2) : [equation, ""];\n if (!lhs.match(RegExp(lhsPatternOnly))) {\n throw new Error("Invalid LHS term");\n }\n const inputTerms = lhs.split(",");\n inputTerms.forEach((inputTerm, index) => {\n const dims = inputs[index].dims.slice();\n if (!inputTerm.match(RegExp(termPatternOnly))) {\n throw new Error("Invalid LHS term");\n }\n const einsumTerm = this.processTerm(inputTerm, true, dims, index);\n this.lhs.push(einsumTerm);\n });\n if (rhs === "") {\n rhs += [...this.symbolToInfo.entries()].filter(([sym, info]) => info.count === 1 || sym === "...").map(([sym]) => sym).join("");\n } else {\n if (!rhs.match(RegExp(termPattern))) {\n throw new Error("Invalid RHS");\n }\n }\n const rhsSymbols = rhs.match(RegExp(symbolPattern, "g"));\n rhsSymbols?.forEach((symbol) => {\n if (symbol === "...") {\n this.outputDims = this.outputDims.concat(this.ellipsisDims);\n } else {\n const info = this.symbolToInfo.get(symbol);\n if (info === void 0) {\n throw new Error("Invalid RHS symbol");\n }\n this.outputDims.push(info.dimValue);\n }\n });\n this.rhs = this.processTerm(rhs, false, this.outputDims);\n }\n // End of EinsumEqation constructor\n // Add a symbol to the equation\n addSymbol(symbol, dimValue, inputIndex) {\n let info = this.symbolToInfo.get(symbol);\n if (info !== void 0) {\n if (info.dimValue !== dimValue && info.count !== 1) {\n throw new Error("Dimension mismatch");\n } else {\n info.count++;\n info.inputIndices.push(inputIndex);\n }\n } else {\n info = { count: 1, dimValue, inputIndices: [inputIndex] };\n }\n this.symbolToInfo.set(symbol, info);\n }\n // Process one input/output term\n processTerm(term, isInput, dims, index = -1) {\n const rank = dims.length;\n let ellipsis = false;\n let ellipsisDims = [];\n let nextDim = 0;\n if (!term.match(RegExp(termPatternOnly)) && (!isInput && term !== "")) {\n throw new Error("Invalid LHS term");\n }\n const indexSymbols = term.match(RegExp(symbolPattern, "g"));\n const einsumTerm = new EinsumTerm(index);\n indexSymbols?.forEach((symbol, i) => {\n if (symbol === "...") {\n if (ellipsis) {\n throw new Error("Only one ellipsis is allowed per input term");\n }\n ellipsis = true;\n const ellipsisDimLength = rank - indexSymbols.length + 1;\n if (ellipsisDimLength < 0) {\n throw new Error("Ellipsis out of bounds");\n }\n ellipsisDims = dims.slice(nextDim, nextDim + ellipsisDimLength);\n if (this.hasEllipsis) {\n if (this.ellipsisDims.length !== ellipsisDims.length || this.ellipsisDims.toString() !== ellipsisDims.toString()) {\n throw new Error("Ellipsis dimensions mismatch");\n }\n } else if (isInput) {\n this.hasEllipsis = true;\n this.ellipsisDims = ellipsisDims;\n } else {\n throw new Error("Ellipsis must be specified in the LHS");\n }\n for (let j = 0; j < ellipsisDims.length; j++) {\n const symbol2 = String.fromCharCode("0".charCodeAt(0) + j);\n einsumTerm.addSymbol(symbol2, i + j);\n this.addSymbol(symbol2, dims[nextDim++], index);\n }\n } else {\n einsumTerm.addSymbol(symbol, i + (this.hasEllipsis ? this.ellipsisDims.length - 1 : 0));\n this.addSymbol(symbol, dims[nextDim++], index);\n }\n });\n return einsumTerm;\n }\n // Output dimensions of the equation\n };\n appendMax = (name) => name + "_max";\n createEinsumProgramInfo = (inputShapes, dataType, einsumEquation, outputShape) => {\n const ranks = inputShapes.map((dims) => dims.length);\n const inputVars = ranks.map((rank, index) => inputVariable(`input${index}`, dataType, rank));\n const outputSize = ShapeUtil.size(outputShape);\n const output = outputVariable("output", dataType, outputShape.length);\n const uniformsSymbols = [...einsumEquation.symbolToInfo.keys()].filter((symbol) => !einsumEquation.rhs.symbolToIndices.has(symbol));\n const getShaderSource = (shaderHelper) => {\n const idxCopy = [];\n const initProd = "var prod = 1.0;";\n const initSum = "var sum = 0.0;";\n const updateSum = "sum += prod;";\n const reduceOpsSetIndices = [];\n const reduceOpsLoopHeaders = [];\n const reduceOpsLoopFooters = [];\n const reduceOpCompute = [];\n const isReduceOpsWithoutLoop = einsumEquation.symbolToInfo.size === einsumEquation.rhs.symbolToIndices.size;\n einsumEquation.symbolToInfo.forEach((info, symbol) => {\n if (einsumEquation.rhs.symbolToIndices.has(symbol)) {\n const outputIndex = einsumEquation.rhs.symbolToIndices.get(symbol)?.[0];\n if (outputIndex !== void 0) {\n einsumEquation.lhs.forEach((term, i) => {\n if (info.inputIndices.includes(i)) {\n const indices = term.symbolToIndices.get(symbol);\n if (indices === void 0) {\n throw new Error("Invalid symbol error");\n }\n indices.forEach((index) => {\n idxCopy.push(`${inputVars[i].indicesSet(\n `input${i}Indices`,\n index,\n output.indicesGet("outputIndices", outputIndex)\n )}`);\n });\n }\n });\n }\n } else {\n einsumEquation.lhs.forEach((term, i) => {\n if (info.inputIndices.includes(i)) {\n const indices = term.symbolToIndices.get(symbol);\n if (indices === void 0) {\n throw new Error("Invalid symbol error");\n }\n indices.forEach((index) => {\n reduceOpsSetIndices.push(`${inputVars[i].indicesSet(`input${i}Indices`, index, `${symbol}`)}`);\n });\n reduceOpCompute.push(`prod *= ${inputVars[i].getByIndices(`input${i}Indices`)};`);\n }\n });\n reduceOpsLoopHeaders.push(\n `for(var ${symbol}: u32 = 0; ${symbol} < uniforms.${appendMax(symbol)}; ${symbol}++) {`\n );\n reduceOpsLoopFooters.push("}");\n }\n });\n const reduceOps2 = isReduceOpsWithoutLoop ? [\n ...idxCopy,\n `let sum = ${inputVars.map((inputVar, i) => inputVar.getByIndices(`input${i}Indices`)).join(" * ")};`\n ] : [\n ...idxCopy,\n initSum,\n ...reduceOpsLoopHeaders,\n ...reduceOpsSetIndices,\n initProd,\n ...reduceOpCompute,\n updateSum,\n ...reduceOpsLoopFooters\n ];\n return `\n ${shaderHelper.registerUniforms(uniformsSymbols.map((symbol) => ({ name: `${appendMax(symbol)}`, type: "u32" }))).registerUniform("outputSize", "u32").declareVariables(...inputVars, output)}\n\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")}\n var outputIndices = ${output.offsetToIndices("global_idx")};\n ${inputVars.map((_var, i) => `var input${i}Indices: ${inputVars[i].type.indices};`).join("\\n")}\n ${reduceOps2.join("\\n")};\n ${output.setByOffset("global_idx", "sum")};\n }`;\n };\n return {\n name: "Einsum",\n shaderCache: { hint: einsumEquation.equation, inputDependencies: inputShapes.map(() => "rank") },\n getRunData: () => {\n const programUniformsInit = uniformsSymbols.filter((symbol) => einsumEquation.symbolToInfo.has(symbol)).map(\n (symbol) => ({ type: 12 /* uint32 */, data: einsumEquation.symbolToInfo.get(symbol)?.dimValue || 0 })\n );\n programUniformsInit.push({ type: 12 /* uint32 */, data: outputSize });\n const programUniforms = inputShapes.map((dims, _) => [...createTensorShapeVariables(dims)]).reduce((acc, inputProgramUniforms) => acc.concat(inputProgramUniforms), programUniformsInit);\n programUniforms.push(...createTensorShapeVariables(outputShape));\n return {\n outputs: [{ dims: outputShape, dataType }],\n dispatchGroup: { x: Math.ceil(\n outputSize / 64\n /* workgroup size */\n ) },\n programUniforms\n };\n },\n getShaderSource\n };\n };\n einsum = (context, attributes) => {\n const einsumEquation = new EinsumEquation(context.inputs, attributes.equation);\n const outputShape = einsumEquation.outputDims;\n const inputShapes = context.inputs.map((input, _) => input.dims);\n context.compute(createEinsumProgramInfo(inputShapes, context.inputs[0].dataType, einsumEquation, outputShape));\n };\n parseEinsumAttributes = (attributes) => {\n const equation = attributes.equation.replace(/\\s+/g, "");\n return createAttributeWithCacheKey({ equation });\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/expand.ts\n var validateInputs12, getAdjustedShape, calculateOutputShape2, createExpandProgramInfo, expand;\n var init_expand = __esm({\n "web/lib/wasm/jsep/webgpu/ops/expand.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_common();\n validateInputs12 = (inputs) => {\n if (!inputs || inputs.length !== 2) {\n throw new Error("Expand requires 2 input.");\n }\n const inputShape = inputs[0].dims;\n const shape = Array.from(inputs[1].getBigInt64Array(), Number);\n let shapeIndex = shape.length < inputShape.length ? 0 : shape.length - inputShape.length;\n let inputShapeIndex = inputShape.length < shape.length ? 0 : inputShape.length - shape.length;\n for (; shapeIndex < shape.length && inputShapeIndex < inputShape.length; ++shapeIndex, ++inputShapeIndex) {\n if (shape[shapeIndex] !== inputShape[inputShapeIndex] && shape[shapeIndex] !== 1 && inputShape[inputShapeIndex] !== 1) {\n throw new Error("Expand requires shape to be broadcastable to input");\n }\n }\n };\n getAdjustedShape = (shape1, shape2) => {\n const diff = shape1.length - shape2.length;\n const shape = [];\n for (let i = 0; i < diff; ++i) {\n shape.push(shape1[i]);\n }\n for (let i = 0; i < shape2.length; ++i) {\n shape.push(shape2[i] === 1 ? shape1[i + diff] : shape2[i]);\n }\n return shape;\n };\n calculateOutputShape2 = (inputShape, shape) => inputShape.length > shape.length ? getAdjustedShape(inputShape, shape) : getAdjustedShape(shape, inputShape);\n createExpandProgramInfo = (inputs) => {\n const inputShape = inputs[0].dims;\n const shape = Array.from(inputs[1].getBigInt64Array(), Number);\n const outputShape = calculateOutputShape2(inputShape, shape);\n const dataType = inputs[0].dataType;\n const components = dataType === 9 /* bool */ ? 4 : 1;\n const outputSize = Math.ceil(ShapeUtil.size(outputShape) / components);\n const getShaderSource = (shaderHelper) => {\n const input = inputVariable("input", dataType, inputShape.length, components);\n const output = outputVariable("output", dataType, outputShape.length, components);\n let assignment;\n if (dataType === 9 /* bool */) {\n const singleAssignment = (resStr, x, typeCast = "") => `\n let outputIndices${x} = ${output.offsetToIndices(`outputOffset + ${x}u`)};\n let offset${x} = ${input.broadcastedIndicesToOffset(`outputIndices${x}`, output)};\n let index${x} = offset${x} / 4u;\n let component${x} = offset${x} % 4u;\n ${resStr}[${x}] = ${typeCast}(${input.getByOffset(`index${x}`)}[component${x}]);\n `;\n assignment = `\n let outputOffset = global_idx * ${components};\n var data = vec4(0);\n ${singleAssignment("data", 0, "u32")}\n ${singleAssignment("data", 1, "u32")}\n ${singleAssignment("data", 2, "u32")}\n ${singleAssignment("data", 3, "u32")}\n ${output.setByOffset("global_idx", "data")}\n }`;\n } else {\n assignment = `\n let outputIndices = ${output.offsetToIndices("global_idx")};\n let inputOffset = ${input.broadcastedIndicesToOffset("outputIndices", output)};\n ${output.setByOffset("global_idx", input.getByOffset("inputOffset"))}\n }`;\n }\n return `\n ${shaderHelper.registerUniform("vec_size", "u32").declareVariables(input, output)}\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.vec_size")}\n ${assignment}`;\n };\n const programUniforms = [{ type: 12 /* uint32 */, data: outputSize }, ...createTensorShapeVariables(inputShape, outputShape)];\n return {\n name: "Expand",\n shaderCache: { hint: `${outputShape.length}`, inputDependencies: ["rank"] },\n getShaderSource,\n getRunData: () => ({\n outputs: [{ dims: outputShape, dataType: inputs[0].dataType }],\n dispatchGroup: { x: Math.ceil(\n outputSize / 64\n /* workgroup size */\n ) },\n programUniforms\n })\n };\n };\n expand = (context) => {\n validateInputs12(context.inputs);\n context.compute(createExpandProgramInfo(context.inputs), { inputs: [0] });\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/fast-gelu.ts\n var createFastGeluProgramInfo, fastGelu2;\n var init_fast_gelu = __esm({\n "web/lib/wasm/jsep/webgpu/ops/fast-gelu.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_common();\n init_unary_op();\n createFastGeluProgramInfo = (inputTensors) => {\n const dataType = inputTensors[0].dataType;\n const outputSize = ShapeUtil.size(inputTensors[0].dims);\n const biasLength = ShapeUtil.size(inputTensors[1].dims);\n const useVec4 = biasLength % 4 === 0;\n const getShaderSource = (shaderHelper) => {\n const x = inputVariable("x", dataType, [1], 4);\n const bias = inputVariable("bias", dataType, [1], 4);\n const y = outputVariable("y", dataType, [1], 4);\n const uniforms = [{ name: "output_vec_size", type: "u32" }, { name: "bias_size", type: "u32" }];\n const singleElementBias = (i) => `\n let bias${i}_offset: u32 = (global_idx * 4 + ${i}) % uniforms.bias_size;\n let bias${i} = ${bias.getByOffset(`bias${i}_offset / 4`)}[bias${i}_offset % 4];`;\n const biasGetExpression = useVec4 ? `\n let bias = ${bias.getByOffset("global_idx % (uniforms.bias_size / 4)")};` : `${singleElementBias(0)}${singleElementBias(1)}${singleElementBias(2)}${singleElementBias(3)}\n let bias = ${x.type.value}(bias0, bias1, bias2, bias3);`;\n return `${shaderHelper.registerUniforms(uniforms).declareVariables(x, bias, y)}\n\n ${fastGeluImpl(tensorTypeToWsglValueType(dataType))}\n\n ${shaderHelper.mainStart(WORKGROUP_SIZE)}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_vec_size")}\n\n let x = ${x.getByOffset("global_idx")};\n ${biasGetExpression}\n let x_in = x + bias;\n ${y.setByOffset("global_idx", fastGeluExpression("x_in"))}\n }`;\n };\n return {\n name: "FastGeluWithBias",\n shaderCache: { hint: `${useVec4}`, inputDependencies: ["type", "type"] },\n getShaderSource,\n getRunData: (inputs) => ({\n outputs: [{ dims: inputs[0].dims, dataType: inputs[0].dataType }],\n programUniforms: [{ type: 12 /* uint32 */, data: Math.ceil(outputSize / 4) }, { type: 12 /* uint32 */, data: biasLength }],\n dispatchGroup: { x: Math.ceil(outputSize / WORKGROUP_SIZE / 4) }\n })\n };\n };\n fastGelu2 = (context) => {\n if (context.inputs.length < 2 || ShapeUtil.size(context.inputs[1].dims) === 0) {\n fastGelu(context);\n } else {\n context.compute(createFastGeluProgramInfo(context.inputs));\n }\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/gather.ts\n var validateInputs13, createGatherProgramInfo, parseGatherAttributes, gather;\n var init_gather = __esm({\n "web/lib/wasm/jsep/webgpu/ops/gather.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_attribute_with_cache_key();\n init_common();\n validateInputs13 = (inputs) => {\n if (!inputs || inputs.length !== 2) {\n throw new Error("Gather requires 2 inputs.");\n }\n };\n createGatherProgramInfo = (inputs, attributes) => {\n const inputShape = inputs[0].dims;\n const indicesShape = inputs[1].dims;\n const inputRank = inputShape.length;\n const axis = ShapeUtil.normalizeAxis(attributes.axis, inputRank);\n const outputShape = inputShape.slice(0);\n outputShape.splice(axis, 1, ...indicesShape);\n const axisDimLimit = inputShape[axis];\n const components = inputs[0].dataType === 9 /* bool */ ? 4 : 1;\n const outputSize = Math.ceil(ShapeUtil.size(outputShape) / components);\n const programUniforms = [\n { type: 12 /* uint32 */, data: outputSize },\n { type: 6 /* int32 */, data: axisDimLimit },\n { type: 12 /* uint32 */, data: axis },\n ...createTensorShapeVariables(inputs[0].dims, inputs[1].dims, outputShape)\n ];\n const getShaderSource = (shaderHelper) => {\n const data = inputVariable("data", inputs[0].dataType, inputs[0].dims.length, components);\n const indices = inputVariable("inputIndices", inputs[1].dataType, inputs[1].dims.length);\n const output = outputVariable("output", inputs[0].dataType, outputShape.length, components);\n const calcDataIndices = (x) => {\n const indicesRank = indicesShape.length;\n let calcStr = `var indicesIndices${x} = ${indices.type.indices}(0);`;\n for (let i = 0; i < indicesRank; i++) {\n calcStr += `${indicesRank > 1 ? `indicesIndices${x}[${i}]` : `indicesIndices${x}`} = ${outputShape.length > 1 ? `outputIndices${x}[uniforms.axis + ${i}]` : `outputIndices${x}`};`;\n }\n calcStr += `\n var idx${x} = ${indices.getByIndices(`indicesIndices${x}`)};\n if (idx${x} < 0) {\n idx${x} = idx${x} + uniforms.axisDimLimit;\n }\n var dataIndices${x} : ${data.type.indices};\n `;\n for (let i = 0, j = 0; i < inputRank; i++) {\n if (i === axis) {\n calcStr += `${inputRank > 1 ? `dataIndices${x}[${i}]` : `dataIndices${x}`} = u32(idx${x});`;\n j += indicesRank;\n } else {\n calcStr += `${inputRank > 1 ? `dataIndices${x}[${i}]` : `dataIndices${x}`} = ${outputShape.length > 1 ? `outputIndices${x}[${j}]` : `outputIndices${x}`};`;\n j++;\n }\n }\n return calcStr;\n };\n let assignment;\n if (inputs[0].dataType === 9 /* bool */) {\n const singleAssignment = (resStr, x, typeCast = "") => `\n let outputIndices${x} = ${output.offsetToIndices(`outputOffset + ${x}u`)};\n ${calcDataIndices(x)};\n let offset${x} = ${data.indicesToOffset(`dataIndices${x}`)};\n let index${x} = offset${x} / 4u;\n let component${x} = offset${x} % 4u;\n ${resStr}[${x}] = ${typeCast}(${data.getByOffset(`index${x}`)}[component${x}]);\n `;\n assignment = `\n let outputOffset = global_idx * ${components};\n var value = vec4(0);\n ${singleAssignment("value", 0, "u32")}\n ${singleAssignment("value", 1, "u32")}\n ${singleAssignment("value", 2, "u32")}\n ${singleAssignment("value", 3, "u32")}\n ${output.setByOffset("global_idx", "value")}\n `;\n } else {\n assignment = `\n let outputIndices = ${output.offsetToIndices("global_idx")};\n ${calcDataIndices("")};\n let value = ${data.getByIndices("dataIndices")};\n ${output.setByOffset("global_idx", "value")};\n `;\n }\n return `\n ${shaderHelper.registerUniform("outputSize", "u32").registerUniform("axisDimLimit", "i32").registerUniform("axis", "u32").declareVariables(data, indices, output)}\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")}\n ${assignment}\n }`;\n };\n return {\n name: "Gather",\n shaderCache: { hint: attributes.cacheKey, inputDependencies: ["rank", "rank"] },\n getRunData: () => ({\n outputs: [\n { dims: outputShape, dataType: inputs[0].dataType }\n ],\n dispatchGroup: { x: Math.ceil(\n outputSize / 64\n /* workgroup size */\n ) },\n programUniforms\n }),\n getShaderSource\n };\n };\n parseGatherAttributes = (attributes) => createAttributeWithCacheKey({ axis: attributes.axis });\n gather = (context, attributes) => {\n const inputs = context.inputs;\n validateInputs13(inputs);\n context.compute(createGatherProgramInfo(context.inputs, attributes));\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/gather-elements.ts\n var validateInputs14, createGatherElementsProgramInfo, parseGatherElementsAttributes, gatherElements;\n var init_gather_elements = __esm({\n "web/lib/wasm/jsep/webgpu/ops/gather-elements.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_attribute_with_cache_key();\n init_common();\n validateInputs14 = (inputs) => {\n if (!inputs || inputs.length !== 2) {\n throw new Error("GatherElements requires 2 inputs.");\n }\n if (inputs[0].dims.length < 1) {\n throw new Error("GatherElements requires that the data input be rank >= 1.");\n }\n if (inputs[0].dims.length !== inputs[1].dims.length) {\n throw new Error(`GatherElements requires that the data input and\n indices input tensors be of same rank.`);\n }\n };\n createGatherElementsProgramInfo = (inputs, attributes) => {\n const inputShape = inputs[0].dims;\n const inputOutputDataType = inputs[0].dataType;\n const inputRank = inputShape.length;\n const indicesShape = inputs[1].dims;\n const indicesDataType = inputs[1].dataType;\n const axis = ShapeUtil.normalizeAxis(attributes.axis, inputRank);\n const axisDimLimit = inputShape[axis];\n const outputShape = indicesShape.slice(0);\n const outputSize = ShapeUtil.size(outputShape);\n const input = inputVariable("input", inputOutputDataType, inputRank);\n const indices = inputVariable("indicesInput", indicesDataType, indicesShape.length);\n const output = outputVariable("output", inputOutputDataType, outputShape.length);\n const programUniforms = [\n { type: 12 /* uint32 */, data: outputSize },\n { type: 6 /* int32 */, data: axisDimLimit },\n { type: 12 /* uint32 */, data: axis }\n ];\n programUniforms.push(...createTensorShapeVariables(inputShape, indicesShape, outputShape));\n const inputDependencies = ["rank", "rank"];\n const getShaderSource = (shaderHelper) => `\n ${shaderHelper.registerUniform("outputSize", "u32").registerUniform("axisDimLimit", "i32").registerUniform("axis", "u32").declareVariables(input, indices, output)}\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")}\n\n let outputIndices = ${output.offsetToIndices("global_idx")};\n\n var idx = ${indices.getByOffset("global_idx")};\n if (idx < 0) {\n idx = idx + uniforms.axisDimLimit;\n }\n var inputIndices = ${input.type.indices}(outputIndices);\n ${input.indicesSet("inputIndices", "uniforms.axis", "u32(idx)")};\n let value = ${input.getByIndices("inputIndices")};\n\n ${output.setByOffset("global_idx", "value")};\n }`;\n return {\n name: "GatherElements",\n shaderCache: { inputDependencies },\n getRunData: () => ({\n outputs: [{ dims: outputShape, dataType: inputs[0].dataType }],\n dispatchGroup: { x: Math.ceil(\n outputSize / 64\n /* workgroup size */\n ) },\n programUniforms\n }),\n getShaderSource\n };\n };\n parseGatherElementsAttributes = (attributes) => createAttributeWithCacheKey({ axis: attributes.axis });\n gatherElements = (context, attributes) => {\n const inputs = context.inputs;\n validateInputs14(inputs);\n context.compute(createGatherElementsProgramInfo(context.inputs, attributes));\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/gemm.ts\n var validateInputs15, createGemmProgramInfo, parseGemmAttributes, gemm;\n var init_gemm = __esm({\n "web/lib/wasm/jsep/webgpu/ops/gemm.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_common();\n validateInputs15 = (inputs) => {\n if (!inputs) {\n throw new Error("Input is missing");\n }\n if (inputs.length < 2 || inputs.length > 3) {\n throw new Error("Invaid input number.");\n }\n if (inputs.length === 3 && inputs[2].dims.length > 2) {\n throw new Error("Invalid input shape of C");\n }\n if (inputs[0].dataType !== inputs[1].dataType || inputs.length === 3 && inputs[0].dataType !== inputs[2].dataType) {\n throw new Error("Input types are mismatched");\n }\n };\n createGemmProgramInfo = (inputs, attributes) => {\n const aShape = inputs[0].dims.slice();\n const bShape = inputs[1].dims.slice();\n const [M, N, K] = GemmUtil.getShapeOfGemmResult(\n aShape,\n attributes.transA,\n bShape,\n attributes.transB,\n inputs.length === 3 ? inputs[2].dims : void 0\n );\n const outputShape = [M, N];\n if (!outputShape) {\n throw new Error("Can\'t use gemm on the given tensors");\n }\n const outputSize = ShapeUtil.size(outputShape);\n const programUniforms = [\n { type: 12 /* uint32 */, data: outputSize },\n { type: 12 /* uint32 */, data: M },\n { type: 12 /* uint32 */, data: N },\n { type: 12 /* uint32 */, data: K },\n { type: 1 /* float */, data: attributes.alpha },\n { type: 1 /* float */, data: attributes.beta }\n ];\n const inputDependencies = ["type", "type"];\n if (inputs.length === 3) {\n programUniforms.push(...createTensorShapeVariables(inputs[2].dims));\n inputDependencies.push("rank");\n }\n programUniforms.push(...createTensorShapeVariables(outputShape));\n const getShaderSource = (shaderHelper) => {\n let line = "";\n if (attributes.transA && attributes.transB) {\n line = "value += a[k * uniforms.M + m] * b[n * uniforms.K + k];";\n } else if (attributes.transA && !attributes.transB) {\n line = "value += a[k * uniforms.M + m] * b[k * uniforms.N + n];";\n } else if (!attributes.transA && attributes.transB) {\n line = "value += a[m * uniforms.K + k] * b[n * uniforms.K + k];";\n } else if (!attributes.transA && !attributes.transB) {\n line = "value += a[m * uniforms.K + k] * b[k * uniforms.N + n];";\n }\n const calculateAlpha = attributes.alpha === 1 ? "" : "value *= uniforms.alpha;";\n const a = inputVariable("a", inputs[0].dataType, inputs[0].dims);\n const b = inputVariable("b", inputs[1].dataType, inputs[1].dims);\n const dataType = a.type.value;\n let c = null;\n const variables = [a, b];\n if (inputs.length === 3) {\n c = inputVariable("c", inputs[2].dataType, inputs[2].dims.length);\n variables.push(c);\n }\n const output = outputVariable("output", inputs[0].dataType, outputShape.length);\n variables.push(output);\n const uniforms = [\n { name: "output_size", type: "u32" },\n { name: "M", type: "u32" },\n { name: "N", type: "u32" },\n { name: "K", type: "u32" },\n { name: "alpha", type: "f32" },\n { name: "beta", type: "f32" }\n ];\n return `\n ${shaderHelper.registerUniforms(uniforms).declareVariables(...variables)}\n\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}\n\n let m = global_idx / uniforms.N;\n let n = global_idx % uniforms.N;\n\n var value = ${dataType}(0);\n for (var k: u32 = 0u; k < uniforms.K; k++) {\n ${line}\n }\n\n ${calculateAlpha}\n ${(() => {\n if (c != null) {\n return `let cOffset = ${c.broadcastedIndicesToOffset("vec2(m, n)", output)}; value += ${dataType}(uniforms.beta) * ${c.getByOffset("cOffset")};`;\n }\n return "";\n })()}\n output[global_idx] = value;\n }`;\n };\n return {\n name: "Gemm",\n shaderCache: { hint: `${attributes.cacheKey}`, inputDependencies },\n getRunData: () => ({\n outputs: [{ dims: outputShape, dataType: inputs[0].dataType }],\n dispatchGroup: { x: Math.ceil(\n outputSize / 64\n /* workgroup size */\n ) },\n programUniforms\n }),\n getShaderSource\n };\n };\n parseGemmAttributes = (attributes) => {\n const transA = attributes.transA;\n const transB = attributes.transB;\n const alpha = attributes.alpha;\n const beta = attributes.beta;\n return { transA, transB, alpha, beta, cacheKey: `${attributes.transA};${attributes.transB};${attributes.alpha === 1}` };\n };\n gemm = (context, attributes) => {\n validateInputs15(context.inputs);\n context.compute(createGemmProgramInfo(context.inputs, attributes));\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/instance-norm.ts\n var createInstanceNormProgramInfo, computeMean, createInstanceNormNHWCProgramInfo, instanceNorm;\n var init_instance_norm = __esm({\n "web/lib/wasm/jsep/webgpu/ops/instance-norm.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_common();\n createInstanceNormProgramInfo = (inputs, attributes) => {\n const xShape = inputs[0].dims;\n const outputShape = xShape;\n const axis = 2;\n const normCount = ShapeUtil.sizeToDimension(xShape, axis);\n const normSize = ShapeUtil.sizeFromDimension(xShape, axis);\n const components = getMaxComponents(normSize);\n const normPackedSize = normSize / components;\n const inputShape = [xShape[0], xShape[1], normPackedSize];\n const inputDependencies = ["rank", "type", "type"];\n const programUniforms = [{ type: 12 /* uint32 */, data: normSize }, { type: 12 /* uint32 */, data: normPackedSize }];\n programUniforms.push(...createTensorShapeVariables(inputShape, inputShape));\n const getShaderSource = (shaderHelper) => {\n const x = inputVariable("x", inputs[0].dataType, inputShape.length, components);\n const scale = inputVariable("scale", inputs[1].dataType, inputs[1].dims);\n const bias = inputVariable("bias", inputs[2].dataType, inputs[2].dims);\n const output = outputVariable("output", inputs[0].dataType, inputShape.length, components);\n const variables = [x, scale, bias, output];\n const dataType = x.type.value;\n const f32Type = components === 1 ? "f32" : `vec${components}`;\n const workgroupSize = 64;\n const uniforms = [{ name: "normSize", type: "u32" }, { name: "normPackedSize", type: "u32" }];\n return `\n var meanShared : f32;\n var squaredNormShared : f32;\n var workgroupShared : array<${f32Type}, ${workgroupSize}>;\n const workgroupSize = ${workgroupSize}u;\n ${shaderHelper.registerUniforms(uniforms).declareVariables(...variables)}\n ${shaderHelper.mainStart(workgroupSize)}\n let norm = global_idx / workgroupSize;\n let batch = norm / uniforms.x_shape[1];\n let channel = norm % uniforms.x_shape[1];\n let localIndex = local_id.x;\n\n // initialize workgroup memory\n var initial = ${f32Type}(0);\n for (var h = localIndex; h < uniforms.normPackedSize; h += workgroupSize) {\n initial = initial + ${f32Type}(${x.get("batch", "channel", "h")});\n }\n workgroupShared[localIndex] = initial;\n workgroupBarrier();\n\n // Calculate the mean of current channel data.\n for (var currSize = workgroupSize >> 1; currSize > 0; currSize = currSize >> 1) {\n if (localIndex < currSize) {\n workgroupShared[localIndex] = workgroupShared[localIndex] + workgroupShared[localIndex + currSize];\n }\n workgroupBarrier();\n }\n if (localIndex == 0) {\n meanShared = ${sumVector("workgroupShared[0]", components)} / f32(uniforms.normSize);\n }\n workgroupBarrier();\n\n // reinitialize workgroup memory.\n initial = ${f32Type}(0);\n for (var h = localIndex; h < uniforms.normPackedSize; h += workgroupSize) {\n let deviation = ${f32Type}(${x.get("batch", "channel", "h")}) - ${f32Type}(meanShared);\n initial = initial + deviation * deviation;\n }\n workgroupShared[localIndex] = initial;\n workgroupBarrier();\n\n // Calculate the sum of square of deviation of current channel data.\n for (var currSize = workgroupSize >> 1; currSize > 0; currSize = currSize >> 1) {\n if (localIndex < currSize) {\n workgroupShared[localIndex] = workgroupShared[localIndex] + workgroupShared[localIndex + currSize];\n }\n workgroupBarrier();\n }\n if (localIndex == 0) {\n squaredNormShared = ${sumVector("workgroupShared[0]", components)};\n }\n workgroupBarrier();\n\n let invStdDev = inverseSqrt(squaredNormShared / f32(uniforms.normSize) + f32(${attributes.epsilon}));\n let channelScale = invStdDev * f32(${scale.getByOffset("channel")});\n let channelShift = f32(${bias.getByOffset("channel")}) - meanShared * channelScale;\n for (var h = localIndex; h < uniforms.normPackedSize; h += workgroupSize) {\n let value = ${x.get("batch", "channel", "h")} * ${dataType}(${f32Type}(channelScale)) + ${dataType}(${f32Type}(channelShift));\n ${output.set("batch", "channel", "h", "value")};\n }\n }`;\n };\n return {\n ...{ name: "InstanceNormalization" },\n // TODO: use epsilon as uniform. Currently epsilon as uniform fails test_instancenorm_epsilon.\n shaderCache: { hint: `${attributes.epsilon};${components}`, inputDependencies },\n getRunData: () => ({\n outputs: [\n { dims: outputShape, dataType: inputs[0].dataType }\n ],\n dispatchGroup: { x: normCount },\n programUniforms\n }),\n getShaderSource\n };\n };\n computeMean = (context, input, scale, bias, n, h, c, epsilon) => {\n const components = getMaxComponents(c);\n const WG = 64;\n const outputType = components === 1 ? "vec2f" : `mat2x${components}f`;\n const sumCastType = components === 1 ? "f32" : `vec${components}f`;\n const setOutputValue = (var1, var2) => `${outputType}(${var1}, ${var2})`;\n const unitsOfWork = n * c / components;\n const wgSize = Math.ceil(h / WG);\n const meanInputDependencies = ["type"];\n const meanProgramUniforms = [\n { type: 12 /* uint32 */, data: wgSize },\n { type: 12 /* uint32 */, data: h },\n { type: 12 /* uint32 */, data: Math.floor(c / components) },\n { type: 12 /* uint32 */, data: Math.floor(h * c / components) }\n ];\n const getMeanShaderSource = (shaderHelper) => {\n const inputHelper = inputVariable("input", input.dataType, input.dims, components);\n return `\n ${shaderHelper.declareVariables(inputHelper)}\n @group(0) @binding(1) var output : array<${outputType}>;\n struct Uniforms {wg_size:u32, H:u32, C:u32, image_size:u32};\n @group(0) @binding(2) var uniforms: Uniforms;\n\n ${shaderHelper.mainStart(WG)}\n let currentImageNumber = global_idx / ${WG} / uniforms.C;\n let currentChannelNumber = (global_idx / ${WG}) % uniforms.C;\n let wgOffset = local_id.x * uniforms.wg_size;\n if (wgOffset >= uniforms.H) {\n return;\n }\n let wgMax = min(wgOffset + uniforms.wg_size, uniforms.H);\n\n let offset = currentImageNumber * uniforms.image_size + currentChannelNumber;\n var sum = ${fillVector("f32", components)};\n var squaredSum = ${fillVector("f32", components)};\n for (var i: u32 = wgOffset; i < wgMax; i++) {\n let value = ${sumCastType}(input[offset + i * uniforms.C]);\n sum += value;\n squaredSum += value * value;\n }\n output[global_idx] = ${setOutputValue("sum", "squaredSum")};\n }`;\n };\n const meanValues = context.compute(\n {\n name: "InstanceNormComputeMean",\n shaderCache: { hint: `${components}`, inputDependencies: meanInputDependencies },\n getRunData: () => ({\n outputs: [\n { dims: [n, c, WG, 2], dataType: 1 /* float */ }\n ],\n dispatchGroup: { x: n * c / components },\n programUniforms: meanProgramUniforms\n }),\n getShaderSource: getMeanShaderSource\n },\n { inputs: [input], outputs: [-1] }\n )[0];\n const programUniforms = [\n { type: 12 /* uint32 */, data: unitsOfWork },\n { type: 12 /* uint32 */, data: h },\n { type: 12 /* uint32 */, data: Math.floor(c / components) },\n { type: 12 /* uint32 */, data: Math.floor(WG * c / components) }\n ];\n const inputDependencies = ["type", "type", "type"];\n const getShaderSource = (shaderHelper) => {\n const scaleHelper = inputVariable("scale", scale.dataType, scale.dims, components);\n const biasHelper = inputVariable("bias", bias.dataType, bias.dims, components);\n return `\n @group(0) @binding(0) var input : array<${outputType}>;\n @group(0) @binding(1) var scale : array<${scaleHelper.type.storage}>;\n @group(0) @binding(2) var bias : array<${biasHelper.type.storage}>;\n @group(0) @binding(3) var output : array<${outputType}>;\n struct Uniforms {units_of_work : u32, H: u32, C : u32, image_size : u32};\n @group(0) @binding(4) var uniforms: Uniforms;\n\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.units_of_work")}\n let currentImageNumber = global_idx / uniforms.C;\n let currentChannelNumber = global_idx % uniforms.C;\n\n let offset = currentImageNumber * uniforms.image_size;\n var sum = ${fillVector("f32", components)};\n var squaredSum = ${fillVector("f32", components)};\n for (var i: u32 = 0; i < min(${WG}, uniforms.H); i++) {\n let value = input[offset + i + currentChannelNumber * ${WG}];\n sum += value[0];\n squaredSum += value[1];\n }\n sum = sum / f32(uniforms.H);\n squaredSum = squaredSum / f32(uniforms.H);\n let invStdDev = inverseSqrt(squaredSum - sum * sum + f32(${epsilon}));\n let channelScale = invStdDev * ${sumCastType}(scale[currentChannelNumber]);\n let channelShift = ${sumCastType}(bias[currentChannelNumber]) - sum * channelScale;\n\n output[global_idx] = ${setOutputValue("channelScale", "channelShift")};\n }`;\n };\n return context.compute(\n {\n name: "InstanceNormComputeChannelScaleShift",\n // TODO: use epsilon as uniform. Currently epsilon as uniform fails test_instancenorm_epsilon.\n shaderCache: { hint: `${components};${epsilon}`, inputDependencies },\n getRunData: () => ({\n outputs: [\n { dims: [n, c, 2], dataType: 1 /* float */ }\n ],\n dispatchGroup: { x: Math.ceil(\n unitsOfWork / 64\n /* workgroup size */\n ) },\n programUniforms\n }),\n getShaderSource\n },\n { inputs: [meanValues, scale, bias], outputs: [-1] }\n )[0];\n };\n createInstanceNormNHWCProgramInfo = (context, inputs, attributes) => {\n const xShape = inputs[0].dims;\n const outputShape = xShape;\n const N = xShape[0];\n const C = xShape[xShape.length - 1];\n const H = ShapeUtil.sizeFromDimension(xShape, 1) / C;\n const components = getMaxComponents(C);\n const outputSize = ShapeUtil.size(outputShape) / components;\n const programUniforms = [{ type: 12 /* uint32 */, data: H }, { type: 12 /* uint32 */, data: Math.floor(C / components) }];\n const inputDependencies = ["type", "type"];\n const channelScaleShift = computeMean(context, inputs[0], inputs[1], inputs[2], N, H, C, attributes.epsilon);\n const getShaderSource = (shaderHelper) => {\n const dataType = tensorTypeToWsglStorageType(inputs[0].dataType);\n const scaleType = components === 1 ? "vec2f" : `mat2x${components}f`;\n const scaleCastType = components === 1 ? dataType : `vec${components}<${dataType}>`;\n const inputHelper = inputVariable("input", inputs[0].dataType, inputs[0].dims, components);\n const outputHelper = outputVariable("output", inputs[0].dataType, outputShape, components);\n return `\n @group(0) @binding(0) var input : array<${inputHelper.type.storage}>;\n @group(0) @binding(1) var scaleInput : array<${scaleType}>;\n @group(0) @binding(2) var output : array<${outputHelper.type.storage}>;\n struct Uniforms {H: u32, C : u32};\n @group(0) @binding(3) var uniforms: Uniforms;\n\n ${shaderHelper.mainStart()}\n let currentImageNumber = global_idx / (uniforms.C * uniforms.H);\n let currentChannelNumber = global_idx % uniforms.C;\n\n let scaleOffset = currentImageNumber * uniforms.C + currentChannelNumber;\n let scale = scaleInput[scaleOffset];\n output[global_idx] = fma(input[global_idx], ${scaleCastType}(scale[0]), ${scaleCastType}(scale[1]));\n }`;\n };\n context.compute(\n {\n name: "InstanceNormalizationNHWC",\n shaderCache: { hint: `${components}`, inputDependencies },\n getRunData: () => ({\n outputs: [{ dims: outputShape, dataType: inputs[0].dataType }],\n dispatchGroup: { x: Math.ceil(\n outputSize / 64\n /* workgroup size */\n ) },\n programUniforms\n }),\n getShaderSource\n },\n { inputs: [inputs[0], channelScaleShift] }\n );\n };\n instanceNorm = (context, attributes) => {\n if (attributes.format === "NHWC") {\n createInstanceNormNHWCProgramInfo(context, context.inputs, attributes);\n } else {\n context.compute(createInstanceNormProgramInfo(context.inputs, attributes));\n }\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/layer-norm.ts\n var validateInputs16, createLayerNormProgramInfo, layerNorm;\n var init_layer_norm = __esm({\n "web/lib/wasm/jsep/webgpu/ops/layer-norm.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_common();\n validateInputs16 = (inputs) => {\n if (!inputs || inputs.length < 2) {\n throw new Error("layerNorm requires at least 2 inputs.");\n }\n };\n createLayerNormProgramInfo = (inputs, attributes, outputCount) => {\n const simplified = attributes.simplified;\n const xShape = inputs[0].dims;\n const scale = inputs[1];\n const bias = !simplified && inputs[2];\n const outputShape = xShape;\n const axis = ShapeUtil.normalizeAxis(attributes.axis, xShape.length);\n const normCount = ShapeUtil.sizeToDimension(xShape, axis);\n const normSize = ShapeUtil.sizeFromDimension(xShape, axis);\n const scaleSize = ShapeUtil.size(scale.dims);\n const biasSize = bias ? ShapeUtil.size(bias.dims) : 0;\n if (scaleSize !== normSize || bias && biasSize !== normSize) {\n throw new Error(`Size of X.shape()[axis:] == ${normSize}.\n Size of scale and bias (if provided) must match this.\n Got scale size of ${scaleSize} and bias size of ${biasSize}`);\n }\n const meanInvStdDevDim = [];\n for (let i = 0; i < xShape.length; ++i) {\n if (i < axis) {\n meanInvStdDevDim.push(xShape[i]);\n } else {\n meanInvStdDevDim.push(1);\n }\n }\n const components = getMaxComponents(normSize);\n const inputDependencies = ["type", "type"];\n const programUniforms = [\n { type: 12 /* uint32 */, data: normCount },\n { type: 1 /* float */, data: normSize },\n { type: 12 /* uint32 */, data: Math.floor(normSize / components) },\n { type: 1 /* float */, data: attributes.epsilon }\n ];\n if (bias) {\n inputDependencies.push("type");\n }\n const hasMeanDataOutput = outputCount > 1;\n const hasInvStdOutput = outputCount > 2;\n const getShaderSource = (shaderHelper) => {\n const dataType = tensorTypeToWsglStorageType(inputs[0].dataType);\n const variables = [\n inputVariable("x", inputs[0].dataType, inputs[0].dims, components),\n inputVariable("scale", scale.dataType, scale.dims, components)\n ];\n if (bias) {\n variables.push(inputVariable("bias", bias.dataType, bias.dims, components));\n }\n variables.push(outputVariable("output", inputs[0].dataType, outputShape, components));\n if (hasMeanDataOutput) {\n variables.push(outputVariable("mean_data_output", 1 /* float */, meanInvStdDevDim));\n }\n if (hasInvStdOutput) {\n variables.push(outputVariable("inv_std_output", 1 /* float */, meanInvStdDevDim));\n }\n const uniforms = [\n { name: "norm_count", type: "u32" },\n { name: "norm_size", type: "f32" },\n { name: "norm_size_vectorized", type: "u32" },\n { name: "epsilon", type: "f32" }\n ];\n return `\n ${shaderHelper.registerUniforms(uniforms).declareVariables(...variables)}\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.norm_count")}\n let offset = global_idx * uniforms.norm_size_vectorized;\n var mean_vector = ${fillVector("f32", components)};\n var mean_square_vector = ${fillVector("f32", components)};\n\n for (var h: u32 = 0u; h < uniforms.norm_size_vectorized; h++) {\n let value = ${castToF32(dataType, components, "x[h + offset]")};\n mean_vector += value;\n mean_square_vector += value * value;\n }\n let mean = ${sumVector("mean_vector", components)} / uniforms.norm_size;\n let inv_std_dev = inverseSqrt(${sumVector("mean_square_vector", components)} / uniforms.norm_size ${simplified ? "" : "- mean * mean"} + uniforms.epsilon);\n\n for (var j: u32 = 0; j < uniforms.norm_size_vectorized; j++) {\n let f32input = ${castToF32(dataType, components, "x[j + offset]")};\n let f32scale = ${castToF32(dataType, components, "scale[j]")};\n output[j + offset] = ${variables[0].type.value}((f32input ${simplified ? "" : "- mean"}) * inv_std_dev * f32scale\n ${bias ? `+ ${castToF32(dataType, components, "bias[j]")}` : ""}\n );\n }\n\n ${hasMeanDataOutput ? "mean_data_output[global_idx] = mean" : ""};\n ${hasInvStdOutput ? "inv_std_output[global_idx] = inv_std_dev" : ""};\n }`;\n };\n const outputs = [{ dims: outputShape, dataType: inputs[0].dataType }];\n if (hasMeanDataOutput) {\n outputs.push({ dims: meanInvStdDevDim, dataType: 1 /* float */ });\n }\n if (hasInvStdOutput) {\n outputs.push({ dims: meanInvStdDevDim, dataType: 1 /* float */ });\n }\n return {\n name: "LayerNormalization",\n shaderCache: { hint: `${components};${outputCount};${simplified}`, inputDependencies },\n getRunData: () => ({ outputs, dispatchGroup: { x: Math.ceil(\n normCount / 64\n /* workgroup size */\n ) }, programUniforms }),\n getShaderSource\n };\n };\n layerNorm = (context, attributes) => {\n validateInputs16(context.inputs);\n context.compute(createLayerNormProgramInfo(context.inputs, attributes, context.outputCount));\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/matmulnbits.ts\n var validateInputs17, createMatMulNBitsProgramInfo, matMulNBits, parseMatMulNBitsAttributes;\n var init_matmulnbits = __esm({\n "web/lib/wasm/jsep/webgpu/ops/matmulnbits.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_attribute_with_cache_key();\n init_common();\n validateInputs17 = (inputs, attributes) => {\n if (inputs.length < 3 || inputs.length > 4) {\n throw new Error("MatMulNBits requires 3 or 4 inputs");\n }\n const a = inputs[0];\n const aRank = a.dims.length;\n if (a.dims[aRank - 1] !== attributes.k) {\n throw new Error("The last dim of input shape does not match the k value");\n }\n const nBlocksPerCol = Math.floor((attributes.k + attributes.blockSize - 1) / attributes.blockSize);\n const blobSize = attributes.blockSize / 8 * attributes.bits;\n const b = inputs[1];\n if (!ShapeUtil.areEqual(b.dims, [attributes.n, nBlocksPerCol, blobSize])) {\n throw new Error("The second inputs must be 3D tensor with shape N X nBlocksPerCol X blobSize");\n }\n const scales = inputs[2];\n const scalesShape = scales.dims;\n if (ShapeUtil.size(scalesShape) !== attributes.n * nBlocksPerCol) {\n throw new Error("scales input size error.");\n }\n if (inputs.length === 4) {\n const zeroPoints = inputs[3];\n const zeroPointsShape = zeroPoints.dims;\n const expectedZeroPointsSize = attributes.bits > 4 ? attributes.n * nBlocksPerCol : attributes.n * Math.floor((nBlocksPerCol + 1) / 2);\n if (ShapeUtil.size(zeroPointsShape) !== expectedZeroPointsSize) {\n throw new Error("zeroPoints input size error.");\n }\n }\n };\n createMatMulNBitsProgramInfo = (inputs, attributes, maxComputeWorkgroupSizes, maxComputeWorkgroupStorageSize) => {\n const inputShape = inputs[0].dims;\n const aRank = inputShape.length;\n const nBlocksPerCol = Math.floor((attributes.k + attributes.blockSize - 1) / attributes.blockSize);\n const dimAOuter = inputShape[aRank - 2];\n const dimInner = attributes.k;\n const dimBOuter = attributes.n;\n const batchDims = inputShape.slice(0, aRank - 2);\n const batchSize = ShapeUtil.size(batchDims);\n const blobSize = attributes.blockSize / 8 * attributes.bits;\n const blobSizeInWords = blobSize / 4;\n const dataType = inputs[0].dataType;\n const outputNumber = getMaxComponents(dimAOuter);\n const aComponents = getMaxComponents(attributes.k);\n const bComponents = getMaxComponents(blobSizeInWords);\n const elementSize = getTensorElementSize(dataType);\n const workgroupOutputSize = dimAOuter * nBlocksPerCol * elementSize;\n const maxNumberOfComponents = Math.floor(maxComputeWorkgroupStorageSize / workgroupOutputSize);\n const useBlockwiseMatMulNBits = nBlocksPerCol <= maxComputeWorkgroupSizes[0] && maxNumberOfComponents > 0;\n const components = !useBlockwiseMatMulNBits || maxNumberOfComponents >= 4 ? getMaxComponents(dimBOuter) : maxNumberOfComponents >= 2 && getMaxComponents(dimBOuter) >= 2 ? 2 : 1;\n const outputShape = batchDims.concat([dimAOuter, dimBOuter]);\n const outputSize = ShapeUtil.size(outputShape) / components / outputNumber;\n const programUniforms = useBlockwiseMatMulNBits ? [] : [{ type: 12 /* uint32 */, data: outputSize }, { type: 12 /* uint32 */, data: attributes.blockSize }];\n const inputShapeTemp = [batchSize, dimAOuter, dimInner / aComponents];\n const bShape = ShapeUtil.convertShape(inputs[1].dims).slice();\n bShape.splice(-1, 1, blobSizeInWords / bComponents);\n programUniforms.push(...createTensorShapeVariables(inputShapeTemp));\n programUniforms.push(...createTensorShapeVariables(bShape));\n programUniforms.push(...createTensorShapeVariables(inputs[2].dims));\n if (inputs.length === 4) {\n programUniforms.push(...createTensorShapeVariables(ShapeUtil.convertShape(inputs[3].dims)));\n }\n const outputShapeTemp = [batchSize, dimAOuter, dimBOuter / components];\n programUniforms.push(...createTensorShapeVariables(outputShapeTemp));\n const getShaderSource = (shaderHelper) => {\n const inputRank = inputShapeTemp.length;\n const a = inputVariable("a", inputs[0].dataType, inputRank, aComponents);\n const b = inputVariable("b", 12 /* uint32 */, bShape.length, bComponents);\n const scales = inputVariable("scales", inputs[2].dataType, inputs[2].dims.length);\n const inputVariables = [a, b, scales];\n const zeroPoints = inputs.length === 4 ? inputVariable("zero_points", 12 /* uint32 */, inputs[3].dims.length) : void 0;\n if (zeroPoints) {\n inputVariables.push(zeroPoints);\n }\n const outputRank = outputShapeTemp.length;\n const output = outputVariable("output", inputs[0].dataType, outputRank, components);\n const uniforms = [{ name: "output_size", type: "u32" }, { name: "block_size", type: "u32" }];\n const dataType2 = tensorTypeToWsglStorageType(inputs[0].dataType);\n const qDqDataType = (() => {\n switch (aComponents) {\n case 1:\n return `array<${dataType2}, 8>`;\n case 2:\n return `mat4x2<${dataType2}>`;\n case 4:\n return `mat2x4<${dataType2}>`;\n default:\n throw new Error(`${aComponents}-component is not supported.`);\n }\n })();\n const processOneBlock = `\n for (var word: u32 = 0; word < ${blobSizeInWords}; word += ${bComponents}) {\n ${b.indicesSet("b_indices", "2", "word")};\n let b_data = ${b.getByIndices("b_indices")};\n for (var i: u32 = 0; i < ${bComponents}; i++) {\n let b_value: u32 = ${bComponents === 1 ? "b_data" : "b_data[word + i]"};\n let b_mask: u32 = 0x0F0F0F0Fu;\n let b_value_lower: vec4 = unpack4xU8(b_value & b_mask);\n let b_value_upper: vec4 = unpack4xU8((b_value >> 4) & b_mask);\n let b_quantized_values = ${qDqDataType}(${Array.from({ length: 4 }, (_, i) => `${dataType2}(b_value_lower[${i}]), ${dataType2}(b_value_upper[${i}])`).join(", ")});\n let b_dequantized_values = ${(() => {\n if (aComponents === 1) {\n return `${qDqDataType}(${Array.from({ length: 8 }, (_, i) => `(b_quantized_values[${i}] - zero_point) * scale`).join(", ")});`;\n } else {\n return `(b_quantized_values - ${qDqDataType}(${Array(8).fill("zero_point").join(",")})) * scale;`;\n }\n })()};\n // Number of B elements per 32-bit word is 32/bits = 32/4 = 8\n for (var m: u32 = 0; m < ${useBlockwiseMatMulNBits ? dimAOuter : outputNumber}u; m++) {\n ${a.indicesSet("a_indices", inputRank - 2, useBlockwiseMatMulNBits ? "m" : `row * ${outputNumber} + m`)};\n ${a.indicesSet("a_indices", inputRank - 1, "word_offset")};\n var input_offset = ${a.indicesToOffset("a_indices")};\n var a_data: ${qDqDataType};\n for (var j: u32 = 0; j < ${8 / aComponents}; j++) {\n a_data[j] = ${a.getByOffset("input_offset")};\n input_offset++;\n }\n ${useBlockwiseMatMulNBits ? "workgroup_shared[workgroup_shared_offset + m]" : "output_values[m]"}${components > 1 ? "[c]" : ""} += ${Array.from(\n { length: 8 / aComponents },\n (_, i) => `${aComponents === 1 ? `a_data[${i}] * b_dequantized_values[${i}]` : `dot(a_data[${i}], b_dequantized_values[${i}])`}`\n ).join(" + ")};\n }\n word_offset += ${8 / aComponents};\n }\n }`;\n const updateZeroPointIndex = zeroPoints ? `\n zero_point_offset += 4;\n if (zero_point_offset == 32) {\n zero_point_offset = 0;\n zero_point_index++;\n zero_point_word = ${zeroPoints.getByOffset("zero_point_index")};\n }` : "";\n return useBlockwiseMatMulNBits ? `\n var workgroup_shared: array<${output.type.value}, ${dimAOuter * nBlocksPerCol}>;\n ${shaderHelper.declareVariables(...inputVariables, output)}\n ${shaderHelper.mainStart([\n nBlocksPerCol,\n 1,\n 1\n ])}\n var a_indices: ${a.type.indices};\n var block = local_id.x;\n var col = workgroup_id.y;\n var batch = workgroup_id.z;\n ${a.indicesSet("a_indices", "0", "batch")};\n // Two zero points are packed into one byte when uniforms.bits is 4.\n for (var c: u32 = 0; c < ${components}; c++) {\n let col_times_components_plus_c = col * ${components} + c;\n ${zeroPoints ? `\n var zero_point_bytes_per_col: u32 = (${nBlocksPerCol} + 1) / 2;\n var zero_point_byte_count: u32 = col_times_components_plus_c * zero_point_bytes_per_col + (block >> 0x1u);\n var zero_point_word_index: u32 = zero_point_byte_count >> 0x2u;\n var zero_point_byte_offset: u32 = zero_point_byte_count & 0x3u;\n var zero_point_nibble_offset: u32 = block & 0x1u;\n var zero_point_bits_offset: u32 = (zero_point_byte_offset << 3) + (zero_point_nibble_offset << 2);\n var zero_point_word: u32 = ${zeroPoints.getByOffset("zero_point_word_index")} >> zero_point_bits_offset;` : ""}\n var b_indices: ${b.type.indices};\n ${b.indicesSet("b_indices", "0", "col_times_components_plus_c")};\n // The scale and zero points are computed per block.\n var scales_index = col_times_components_plus_c * ${nBlocksPerCol} + block;\n let scale = ${scales.getByOffset("scales_index")};\n // The default zero point is 8 for unsigned 4-bit quantization.\n let zero_point = ${dataType2}(${zeroPoints ? "(zero_point_word) & 0xFu" : 8});\n ${b.indicesSet("b_indices", "1", "block")};\n var word_offset: u32 = block * ${attributes.blockSize / aComponents};\n var workgroup_shared_offset: u32 = block * ${dimAOuter};\n ${processOneBlock}\n }\n workgroupBarrier();\n if (local_id.x == 0u) {\n var output_indices: ${output.type.indices};\n ${output.indicesSet("output_indices", "0", "batch")};\n ${output.indicesSet("output_indices", outputRank - 1, "col")};\n ${output.indicesSet("output_indices", outputRank - 2, "0")};\n var output_offset = ${output.indicesToOffset("output_indices")};\n for (var m: u32 = 0u; m < ${dimAOuter}u; m++) {\n var output_value: ${output.type.value} = ${output.type.value}(0);\n var workgroup_shared_offset: u32 = m;\n for (var b: u32 = 0u; b < ${nBlocksPerCol}u; b++) {\n output_value += workgroup_shared[workgroup_shared_offset];\n workgroup_shared_offset += ${dimAOuter};\n }\n ${output.setByOffset("output_offset", "output_value")};\n output_offset += ${dimBOuter / components};\n }\n }\n }` : `\n ${shaderHelper.registerUniforms(uniforms).declareVariables(...inputVariables, output)}\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}\n var output_values: array<${output.type.value}, ${outputNumber}>;\n var output_indices = ${output.offsetToIndices("global_idx")};\n var col = ${output.indicesGet("output_indices", outputRank - 1)};\n var row = ${output.indicesGet("output_indices", outputRank - 2)};\n var a_indices: ${a.type.indices} = output_indices;\n // Two zero points are packed into one byte because uniforms.bits <= 4.\n // zero_point_offset is either 0 or 4. It is bit offset within one byte.\n // TODO support zero_point_offset for bits > 4\n ${zeroPoints ? `\n var zero_point_abs_offset = col * ${components} * ((${nBlocksPerCol} + 1) / 2);\n var zero_point_index: u32 = zero_point_abs_offset / 4;\n var zero_point_word: u32 = ${zeroPoints.getByOffset("zero_point_index")};\n var zero_point_offset: u32 = (zero_point_abs_offset % 4) * 8;` : ""}\n var scale_index = col * ${nBlocksPerCol * components};\n var b_indices: ${b.type.indices};\n for (var c: u32 = 0; c < ${components}; c++) {\n ${b.indicesSet("b_indices", "0", `col * ${components} + c`)};\n var block_offset: u32 = 0;\n for (var block: u32 = 0; block < ${nBlocksPerCol}; block++) {\n // The scale and zero points are computed per block.\n let scale = ${scales.getByOffset("scale_index")};\n // The default zero point is 8 for unsigned 4-bit quantization.\n let zero_point = ${dataType2}(${zeroPoints ? "extractBits(zero_point_word, zero_point_offset, 4)" : 8});\n ${b.indicesSet("b_indices", "1", "block")};\n var word_offset: u32 = block_offset;\n ${processOneBlock}\n scale_index++;\n ${updateZeroPointIndex}\n block_offset += uniforms.block_size / ${aComponents};\n }\n // Drop the trailing 4 bits if the zero_poit_offset is not a byte boundary to align with the next byte.\n ${zeroPoints ? `if (zero_point_offset % 8 > 0) {\n ${updateZeroPointIndex}\n }` : ""}\n }\n for (var k: u32 = 0u; k < ${outputNumber}u; k++) {\n ${output.indicesSet("output_indices", outputRank - 2, `${outputNumber} * row + k`)};\n ${output.setByIndices("output_indices", "output_values[k]")}\n }\n }`;\n };\n return {\n name: useBlockwiseMatMulNBits ? "BlockwiseMatMulNBits" : "MatMulNBits",\n shaderCache: {\n hint: `${attributes.cacheKey};${dimAOuter};${dataType};${inputs.length}`,\n inputDependencies: Array(inputs.length).fill("rank")\n },\n getRunData: () => ({\n outputs: [{ dims: outputShape, dataType }],\n name: useBlockwiseMatMulNBits ? "BlockwiseMatMulNBits" : "MatMulNBits",\n dispatchGroup: useBlockwiseMatMulNBits ? { x: 1, y: Math.ceil(dimBOuter / components), z: batchSize } : { x: Math.ceil(\n outputSize / 64\n /* workgroup size */\n ) },\n programUniforms\n }),\n getShaderSource\n };\n };\n matMulNBits = (context, attributes) => {\n validateInputs17(context.inputs, attributes);\n const maxComputeWorkgroupSizes = context.getMaxComputeWorkgroupSizes();\n const maxComputeWorkgroupStorageSize = context.getMaxComputeWorkgroupStoragesize();\n context.compute(createMatMulNBitsProgramInfo(\n context.inputs,\n attributes,\n maxComputeWorkgroupSizes,\n maxComputeWorkgroupStorageSize\n ));\n };\n parseMatMulNBitsAttributes = (attributes) => createAttributeWithCacheKey(attributes);\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/multihead-attentiion.ts\n var getInput, validateInputs18, parseMultiHeadAttentionAttributes, weightTransposeAttribute2, addBiasTranspose, maybeTransposeToBNSHAndAddBias, multiHeadAttention;\n var init_multihead_attentiion = __esm({\n "web/lib/wasm/jsep/webgpu/ops/multihead-attentiion.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_attribute_with_cache_key();\n init_types();\n init_attention();\n init_common();\n init_transpose();\n getInput = (inputs, i) => inputs.length > i && inputs[i].dims.length > 0 && ShapeUtil.size(inputs[i].dims) > 0 ? inputs[i] : void 0;\n validateInputs18 = (inputs, attributes) => {\n const query = inputs[0];\n const key = getInput(inputs, 1);\n const value = getInput(inputs, 2);\n const bias = getInput(inputs, 3);\n const keyPaddingMask = getInput(inputs, 4);\n const relativePositionBias = getInput(inputs, 5);\n const pastKey = getInput(inputs, 6);\n const pastValue = getInput(inputs, 7);\n if (query.dims.length !== 3 && query.dims.length !== 5) {\n throw new Error("Input query is expected to have 3 or 5 dimensions");\n }\n const dmmhaPacking = false;\n const batchSize = query.dims[0];\n const sequenceLength = query.dims[1];\n const hiddenSize = query.dims.length === 3 ? dmmhaPacking ? query.dims[2] / 3 : query.dims[2] : attributes.numHeads * query.dims[4];\n let kvSequenceLength = sequenceLength;\n let pastSequenceLength = 0;\n let maxSequenceLength = 0;\n const headSize = Math.floor(hiddenSize / attributes.numHeads);\n if (pastKey && pastValue) {\n if (pastKey.dims.length !== 4) {\n throw new Error(\'Input "past_key" is expected to have 4 dimensions\');\n }\n if (pastKey.dims[0] !== batchSize || pastKey.dims[1] !== attributes.numHeads || pastKey.dims[3] !== headSize) {\n throw new Error(\'Input "past_key" shape (batch_size, num_heads, past_sequence_length, head_size)\');\n }\n if (pastValue.dims[0] !== batchSize || pastValue.dims[1] !== attributes.numHeads || pastValue.dims[3] !== headSize) {\n throw new Error(\'Input "past_value" shape (batch_size, num_heads, past_sequence_length, head_size)\');\n }\n if (pastKey.dims[2] !== pastValue.dims[2]) {\n throw new Error(\'Input "past_key" and "past_value" shall have same dim 2 (past_sequence_length)\');\n }\n if (pastValue.dims.length !== 4) {\n throw new Error(\'Input "past_value" is expected to have 4 dimensions\');\n }\n pastSequenceLength = pastKey.dims[2];\n maxSequenceLength = pastKey.dims[2];\n } else if (pastKey || pastValue) {\n throw new Error(\'Input "past_key" and "past_value" shall be both present or both absent\');\n }\n let qkvFormat;\n if (key) {\n if (query.dims.length !== 3) {\n throw new Error(\'Input "query" is expected to have 3 dimensions when key is given\');\n }\n if (key.dims.length < 3 || key.dims.length > 5) {\n throw new Error(\'Input "key" is expected to have 3, 4, or 5 dimensions\');\n }\n if (query.dims[0] !== key.dims[0]) {\n throw new Error(\'Input "query" and "key" shall have same dim 0 (batch size)\');\n }\n if (key.dims.length === 3) {\n if (key.dims[2] !== query.dims[2]) {\n throw new Error(\'Input "query" and "key" shall have same dim 2 (hidden_size)\');\n }\n qkvFormat = 2 /* qkvBSNH */;\n kvSequenceLength = key.dims[1];\n } else if (key.dims.length === 5) {\n if (key.dims[2] !== attributes.numHeads || key.dims[3] !== 2 || key.dims[4] !== headSize) {\n throw new Error(\'Expect "key" shape (batch_size, kv_sequence_length, num_heads, 2, head_size) for packed kv\');\n }\n if (value) {\n throw new Error(\'Expect "value" be none when "key" has packed kv format.\');\n }\n qkvFormat = 5 /* qKvBSNHxBSN2H */;\n kvSequenceLength = key.dims[1];\n } else {\n if (key.dims[1] !== attributes.numHeads || key.dims[3] !== headSize) {\n throw new Error(\'Expect "key" shape (batch_size, num_heads, kv_sequence_length, head_size) for past_key\');\n }\n qkvFormat = 0 /* unknown */;\n kvSequenceLength = key.dims[2];\n }\n } else {\n if (query.dims.length !== 3 && query.dims.length !== 5) {\n throw new Error(\'Input "query" is expected to have 3 or 5 dimensions when key is empty\');\n }\n if (query.dims.length === 5 && (query.dims[2] !== attributes.numHeads || query.dims[3] !== 3)) {\n throw new Error(\'Expect "query" shape (batch_size, kv_sequence_length, num_heads, 3, head_size) for packed kv\');\n }\n qkvFormat = 3 /* qkvBSN3H */;\n }\n if (bias) {\n if (bias.dims.length !== 1) {\n throw new Error(\'Input "bias" is expected to have 1 dimension\');\n }\n if (value) {\n if (query.dims.length === 5 && query.dims[3] === 2) {\n throw new Error("bias is not allowed for packed kv.");\n }\n }\n }\n let maskType = 0 /* none */;\n if (keyPaddingMask) {\n maskType = 8 /* maskUnknown */;\n const maskDims = keyPaddingMask.dims;\n if (maskDims.length === 1) {\n if (maskDims[0] === batchSize) {\n maskType = 1 /* mask1dKeySeqLen */;\n } else if (maskDims[0] === 3 * batchSize + 2) {\n maskType = 3 /* mask1DKeySeqLenStart */;\n }\n } else if (maskDims.length === 2 && maskDims[0] === batchSize && maskDims[1] === kvSequenceLength) {\n maskType = 5 /* mask2dKeyPadding */;\n }\n if (maskType === 8 /* maskUnknown */) {\n throw new Error(\'Input "key_padding_mask" shape shall be (batch_size) or (batch_size, kv_sequence_length)\');\n }\n throw new Error("Mask not supported");\n }\n let passPastInKv = false;\n let vHiddenSize = hiddenSize;\n if (value) {\n if (value.dims.length !== 3 && value.dims.length !== 4) {\n throw new Error(\'Input "value" is expected to have 3 or 4 dimensions\');\n }\n if (query.dims[0] !== value.dims[0]) {\n throw new Error(\'Input "query" and "value" shall have same dim 0 (batch_size)\');\n }\n if (value.dims.length === 3) {\n if (kvSequenceLength !== value.dims[1]) {\n throw new Error(\'Input "key" and "value" shall have the same dim 1 (kv_sequence_length)\');\n }\n vHiddenSize = value.dims[2];\n } else {\n if (kvSequenceLength !== value.dims[2]) {\n throw new Error(\'Input "past_key" and "past_value" shall have the same dim 2 (kv_sequence_length)\');\n }\n vHiddenSize = value.dims[1] * value.dims[3];\n passPastInKv = true;\n }\n }\n const totalSequenceLength = pastSequenceLength + kvSequenceLength;\n const broadcastResPosBias = false;\n if (keyPaddingMask) {\n throw new Error("Key padding mask is not supported");\n }\n if (relativePositionBias) {\n if (relativePositionBias.dims.length !== 4) {\n throw new Error(\'Input "relative_position_bias" is expected to have 4 dimensions\');\n }\n if (relativePositionBias.dims[0] !== batchSize && relativePositionBias.dims[0] !== 1 || relativePositionBias.dims[1] !== attributes.numHeads || relativePositionBias.dims[2] !== sequenceLength || relativePositionBias.dims[3] !== totalSequenceLength) {\n throw new Error(\'Input "relative_position_bias" shape (batch_size, 1, sequence_length, kv_sequence_length)\');\n }\n }\n return {\n batchSize,\n sequenceLength,\n pastSequenceLength,\n kvSequenceLength,\n totalSequenceLength,\n maxSequenceLength,\n inputHiddenSize: 0,\n hiddenSize,\n vHiddenSize,\n headSize,\n vHeadSize: Math.floor(vHiddenSize / attributes.numHeads),\n numHeads: attributes.numHeads,\n isUnidirectional: false,\n pastPresentShareBuffer: false,\n maskFilterValue: attributes.maskFilterValue,\n maskType,\n scale: attributes.scale,\n broadcastResPosBias,\n passPastInKv,\n qkvFormat\n };\n };\n parseMultiHeadAttentionAttributes = (attributes) => createAttributeWithCacheKey({ ...attributes });\n weightTransposeAttribute2 = createAttributeWithCacheKey({ perm: [0, 2, 1, 3] });\n addBiasTranspose = (context, qkv, bias, batchSize, sequenceLength, hiddenSize, biasOffset) => {\n const outputShape = [batchSize, sequenceLength, hiddenSize];\n const outputSize = ShapeUtil.size(outputShape);\n const programUniforms = [\n { type: 12 /* uint32 */, data: outputSize },\n { type: 12 /* uint32 */, data: biasOffset },\n { type: 12 /* uint32 */, data: hiddenSize }\n ];\n const getShaderSource = (shaderHelper) => {\n const output = outputVariable("qkv_with_bias", qkv.dataType, outputShape);\n const qkvInput = inputVariable("qkv", qkv.dataType, outputShape);\n const biasInput = inputVariable("bias", bias.dataType, outputShape);\n const uniforms = [\n { name: "output_size", type: "u32" },\n { name: "bias_offset", type: "u32" },\n { name: "hidden_size", type: "u32" }\n ];\n return `\n ${shaderHelper.registerUniforms(uniforms).declareVariables(qkvInput, biasInput, output)}\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}\n let bias_offset_idx = (global_idx % uniforms.hidden_size) + uniforms.bias_offset;\n\n qkv_with_bias[global_idx] = qkv[global_idx] + bias[bias_offset_idx];\n }`;\n };\n return context.compute(\n {\n name: "MultiHeadAttentionAddBias",\n shaderCache: { inputDependencies: ["type", "type"] },\n getRunData: () => ({\n outputs: [{ dims: outputShape, dataType: qkv.dataType, gpuDataType: 0 /* default */ }],\n dispatchGroup: { x: Math.ceil(\n outputSize / 64\n /* workgroup size */\n ) },\n programUniforms\n }),\n getShaderSource\n },\n { inputs: [qkv, bias], outputs: [-1] }\n )[0];\n };\n maybeTransposeToBNSHAndAddBias = (context, batchSize, numHeads, sequenceLength, headSize, input, bias, biasOffset) => {\n let reshapedInput = input;\n if (!bias) {\n if (input.dims.length === 3) {\n reshapedInput = input.reshape([batchSize, sequenceLength, numHeads, headSize]);\n }\n return context.compute(\n createTransposeProgramInfo(reshapedInput, weightTransposeAttribute2.perm),\n { inputs: [reshapedInput], outputs: [-1] }\n )[0];\n } else {\n if (sequenceLength === 1) {\n throw new Error("AddBiasReshape is not implemented. Please export your model with packed QKV or KV");\n } else {\n reshapedInput = addBiasTranspose(context, input, bias, batchSize, sequenceLength, numHeads * headSize, biasOffset);\n reshapedInput = reshapedInput.reshape([batchSize, sequenceLength, numHeads, headSize]);\n return context.compute(\n createTransposeProgramInfo(reshapedInput, weightTransposeAttribute2.perm),\n { inputs: [reshapedInput], outputs: [-1] }\n )[0];\n }\n }\n };\n multiHeadAttention = (context, attributes) => {\n const params = validateInputs18(context.inputs, attributes);\n const query = context.inputs[0];\n const key = getInput(context.inputs, 1);\n const value = getInput(context.inputs, 2);\n const bias = getInput(context.inputs, 3);\n const keyPaddingMask = getInput(context.inputs, 4);\n const relativePositionBias = getInput(context.inputs, 5);\n const pastKey = getInput(context.inputs, 6);\n const pastValue = getInput(context.inputs, 7);\n if (query.dims.length === 5) {\n throw new Error("Packed QKV is not implemented");\n }\n if (key?.dims.length === 5) {\n throw new Error("Packed KV is not implemented");\n }\n const kvBNSH = key && value && key.dims.length === 4 && value.dims.length === 4;\n const Q = maybeTransposeToBNSHAndAddBias(\n context,\n params.batchSize,\n params.numHeads,\n params.sequenceLength,\n params.headSize,\n query,\n bias,\n 0\n );\n if (kvBNSH) {\n return applyAttention(\n context,\n Q,\n key,\n value,\n keyPaddingMask,\n void 0,\n pastKey,\n pastValue,\n relativePositionBias,\n params,\n attributes\n );\n }\n if (!key || !value) {\n throw new Error("key and value must be provided");\n }\n const K = maybeTransposeToBNSHAndAddBias(\n context,\n params.batchSize,\n params.numHeads,\n params.kvSequenceLength,\n params.headSize,\n key,\n bias,\n params.hiddenSize\n );\n const V = maybeTransposeToBNSHAndAddBias(\n context,\n params.batchSize,\n params.numHeads,\n params.kvSequenceLength,\n params.vHeadSize,\n value,\n bias,\n 2 * params.hiddenSize\n );\n applyAttention(\n context,\n Q,\n K,\n V,\n keyPaddingMask,\n void 0,\n pastKey,\n pastValue,\n relativePositionBias,\n params,\n attributes\n );\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/pad.ts\n var validateInputs19, getPadConstant, getPadReflect, getPadEdge, getPadWrap, getPadSnippet, createPadProgramInfo, createPadAttributesFromInputs, pad;\n var init_pad = __esm({\n "web/lib/wasm/jsep/webgpu/ops/pad.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_common();\n validateInputs19 = (inputs) => {\n if (!inputs || inputs.length < 1) {\n throw new Error("Too few inputs");\n }\n if (inputs[0].dataType !== 1 /* float */ && inputs[0].dataType !== 10 /* float16 */) {\n throw new Error("Input type must be float or float16.");\n }\n if (inputs.length >= 2) {\n let validPads = inputs[0].dims.length * 2 === inputs[1].dims[0];\n if (inputs.length === 4) {\n validPads = inputs[3].dims[0] * 2 === inputs[1].dims[0];\n }\n if (!validPads) {\n throw new Error("The pads should be a 1D tensor of shape [2 * input_rank] or [2 * num_axes].");\n }\n }\n };\n getPadConstant = (output, inputRank, padsLength) => {\n let block = "";\n for (let i = inputRank - 1; i >= 0; --i) {\n block += `\n k = i32(${output.indicesGet("indices", i)}) - ${getElementAt("uniforms.pads", i, padsLength)};\n if (k < 0) {\n break;\n }\n if (k >= i32(${getElementAt("uniforms.x_shape", i, inputRank)})) {\n break;\n }\n offset += k * i32(${getElementAt("uniforms.x_strides", i, inputRank)});\n `;\n }\n return `\n value = ${output.type.value}(uniforms.constant_value);\n for (var i = 0; i < 1; i++) {\n var offset = 0;\n var k = 0;\n ${block}\n value = x[offset];\n }\n `;\n };\n getPadReflect = (output, inputRank, padsLength) => {\n let block = "";\n for (let i = inputRank - 1; i >= 0; --i) {\n block += `\n k = i32(${output.indicesGet("indices", i)}) - ${getElementAt("uniforms.pads", i, padsLength)};\n if (k < 0) {\n k = -k;\n }\n {\n let _2n_1 = 2 * (i32(${getElementAt("uniforms.x_shape", i, inputRank)}) - 1);\n k = k % _2n_1;\n if(k >= i32(${getElementAt("uniforms.x_shape", i, inputRank)})) {\n k = _2n_1 - k;\n }\n }\n offset += k * i32(${getElementAt("uniforms.x_strides", i, inputRank)});\n `;\n }\n return `\n var offset = 0;\n var k = 0;\n ${block}\n value = x[offset];\n `;\n };\n getPadEdge = (output, inputRank, padsLength) => {\n let block = "";\n for (let i = inputRank - 1; i >= 0; --i) {\n block += `\n k = i32(${output.indicesGet("indices", i)}) - ${getElementAt("uniforms.pads", i, padsLength)};\n if (k < 0) {\n k = 0;\n }\n if (k >= i32(${getElementAt("uniforms.x_shape", i, inputRank)})) {\n k = i32(${getElementAt("uniforms.x_shape", i, inputRank)}) - 1;\n }\n offset += k * i32(${getElementAt("uniforms.x_strides", i, inputRank)});\n `;\n }\n return `\n var offset = 0;\n var k = 0;\n ${block}\n value = x[offset];\n `;\n };\n getPadWrap = (output, inputRank, padsLength) => {\n let block = "";\n for (let i = inputRank - 1; i >= 0; --i) {\n block += `\n k = i32(${output.indicesGet("indices", i)}) - ${getElementAt("uniforms.pads", i, padsLength)};\n if (k < 0) {\n k += i32(${getElementAt("uniforms.x_shape", i, inputRank)}]);\n }\n if (k >= i32(${getElementAt("uniforms.x_shape", i, inputRank)})) {\n k -= i32(${getElementAt("uniforms.x_shape", i, inputRank)});\n }\n offset += k * i32(${getElementAt("uniforms.x_strides", i, inputRank)});\n `;\n }\n return `\n var offset = 0;\n var k = 0;\n ${block}\n value = x[offset];\n `;\n };\n getPadSnippet = (output, inputRank, attributes) => {\n switch (attributes.mode) {\n case 0:\n return getPadConstant(output, inputRank, attributes.pads.length);\n case 1:\n return getPadReflect(output, inputRank, attributes.pads.length);\n case 2:\n return getPadEdge(output, inputRank, attributes.pads.length);\n case 3:\n return getPadWrap(output, inputRank, attributes.pads.length);\n default:\n throw new Error("Invalid mode");\n }\n };\n createPadProgramInfo = (inputs, attributes) => {\n const outputShape = ShapeUtil.padShape(inputs[0].dims.slice(), attributes.pads);\n const inputDims = inputs[0].dims;\n const outputSize = ShapeUtil.size(outputShape);\n const programUniforms = [{ type: 12 /* uint32 */, data: outputSize }, { type: 6 /* int32 */, data: attributes.pads }];\n if (attributes.mode === 0) {\n programUniforms.push({ type: inputs[0].dataType, data: attributes.value });\n }\n programUniforms.push(...createTensorShapeVariables(inputs[0].dims, outputShape));\n const inputDependencies = ["rank"];\n const getShaderSource = (shaderHelper) => {\n const output = outputVariable("output", inputs[0].dataType, outputShape.length);\n const input = inputVariable("x", inputs[0].dataType, inputDims.length);\n const dataType = input.type.value;\n const padSnippet = getPadSnippet(output, inputDims.length, attributes);\n const uniforms = [{ name: "output_size", type: "u32" }, { name: "pads", type: "i32", length: attributes.pads.length }];\n if (attributes.mode === 0) {\n uniforms.push({ name: "constant_value", type: dataType });\n }\n return `\n ${shaderHelper.registerUniforms(uniforms).declareVariables(input, output)}\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}\n\n let indices = ${output.offsetToIndices("global_idx")};\n\n var value = ${dataType}(0);\n ${padSnippet}\n output[global_idx] = value;\n }`;\n };\n return {\n name: "Pad",\n shaderCache: { hint: `${attributes.mode}`, inputDependencies },\n getRunData: () => ({\n outputs: [{ dims: outputShape, dataType: inputs[0].dataType }],\n dispatchGroup: { x: Math.ceil(\n ShapeUtil.size(outputShape) / 64\n /* workgroup size */\n ) },\n programUniforms\n }),\n getShaderSource\n };\n };\n createPadAttributesFromInputs = (inputs, attributes) => {\n if (inputs.length > 1) {\n const bigInt64Pads = inputs[1].getBigInt64Array();\n const value = inputs.length >= 3 && inputs[2].data ? inputs[2].getFloat32Array()[0] : 0;\n const inputRank = inputs[0].dims.length;\n const updatePads = new Int32Array(2 * inputRank).fill(0);\n if (inputs.length >= 4) {\n const axes = inputs[3].getBigInt64Array();\n for (let i = 0; i < axes.length; i++) {\n updatePads[Number(axes[i])] = Number(bigInt64Pads[i]);\n updatePads[Number(axes[i]) + inputRank] = Number(bigInt64Pads[i + axes.length]);\n }\n } else {\n bigInt64Pads.forEach((v, i) => updatePads[Number(i)] = Number(v));\n }\n const pads = [];\n updatePads.forEach((v) => pads.push(v));\n return { mode: attributes.mode, value, pads };\n } else {\n return attributes;\n }\n };\n pad = (context, attributes) => {\n validateInputs19(context.inputs);\n const updatedAttributes = createPadAttributesFromInputs(context.inputs, attributes);\n context.compute(createPadProgramInfo(context.inputs, updatedAttributes), { inputs: [0] });\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/pool.ts\n var validateInputs20, getAdjustedPoolAttributesAndOutputShape, getUniformAndPadInfo, generatePoolingCode, createShaderKeyFromAttributes, createAveragePoolShaderKeyFromAttributes, createMaxPoolShaderKeyFromAttributes, parsePoolCommonAttributes, createAveragePoolProgramInfo, parseAveragePoolAttributes, averagePool, globalPoolAttributes, parseGlobalAveragePoolAttributes, globalAveragePool, createMaxPoolProgramInfo, maxPool, parseMaxPoolAttributes, parseGlobalMaxPoolAttributes, globalMaxPool;\n var init_pool = __esm({\n "web/lib/wasm/jsep/webgpu/ops/pool.ts"() {\n "use strict";\n init_esm();\n init_wasm_common();\n init_util();\n init_common();\n validateInputs20 = (inputs) => {\n if (env2.webgpu.validateInputContent && (!inputs || inputs.length !== 1)) {\n throw new Error("Pool ops requires 1 input.");\n }\n };\n getAdjustedPoolAttributesAndOutputShape = (input, attributes, isGlobalOperator) => {\n const isChannelsLast = attributes.format === "NHWC";\n const inputShapeAsChannelFirst = input.dims.slice();\n if (isChannelsLast) {\n inputShapeAsChannelFirst.splice(1, 0, inputShapeAsChannelFirst.pop());\n }\n const hasDilations = Object.hasOwnProperty.call(attributes, "dilations");\n const kernelShape = attributes.kernelShape.slice();\n const strides = attributes.strides.slice();\n const dilations = hasDilations ? attributes.dilations.slice() : [];\n const pads = attributes.pads.slice();\n PoolConvUtil.adjustPoolAttributes(isGlobalOperator, inputShapeAsChannelFirst, kernelShape, strides, dilations, pads);\n const outputShapeAsChannelFirst = PoolConvUtil.computePoolOutputShape(\n isGlobalOperator,\n inputShapeAsChannelFirst,\n strides,\n dilations,\n kernelShape,\n pads,\n attributes.autoPad\n );\n const newAttributes = Object.assign({}, attributes);\n if (hasDilations) {\n Object.assign(newAttributes, { kernelShape, strides, pads, dilations, cacheKey: attributes.cacheKey });\n } else {\n Object.assign(newAttributes, { kernelShape, strides, pads, cacheKey: attributes.cacheKey });\n }\n const outputShapeAsChannelLast = outputShapeAsChannelFirst.slice();\n outputShapeAsChannelLast.push(outputShapeAsChannelLast.splice(1, 1)[0]);\n return [newAttributes, isChannelsLast ? outputShapeAsChannelLast : outputShapeAsChannelFirst];\n };\n getUniformAndPadInfo = (outputShape, attributes) => {\n const isChannelsLast = attributes.format === "NHWC";\n const outputSize = ShapeUtil.size(outputShape);\n const kernelSize = ShapeUtil.size(attributes.kernelShape);\n const programUniforms = [{ type: 12 /* uint32 */, data: outputSize }, { type: 12 /* uint32 */, data: kernelSize }];\n const uniforms = [{ name: "outputSize", type: "u32" }, { name: "kernelSize", type: "u32" }];\n if (attributes.kernelShape.length <= 2) {\n const kw = attributes.kernelShape[attributes.kernelShape.length - 1];\n const sw = attributes.strides[attributes.strides.length - 1];\n const pwStart = attributes.pads[attributes.pads.length / 2 - 1];\n const pwEnd = attributes.pads[attributes.pads.length - 1];\n const pwStartEndNotZero = !!(pwStart + pwEnd);\n programUniforms.push(\n { type: 12 /* uint32 */, data: kw },\n { type: 12 /* uint32 */, data: sw },\n { type: 12 /* uint32 */, data: pwStart },\n { type: 12 /* uint32 */, data: pwEnd }\n );\n uniforms.push(\n { name: "kw", type: "u32" },\n { name: "sw", type: "u32" },\n { name: "pwStart", type: "u32" },\n { name: "pwEnd", type: "u32" }\n );\n let phStartEndNotZero = false;\n if (attributes.kernelShape.length === 2) {\n const kh = attributes.kernelShape[attributes.kernelShape.length - 2];\n const sh = attributes.strides[attributes.strides.length - 2];\n const phStart = attributes.pads[attributes.pads.length / 2 - 2];\n const phEnd = attributes.pads[attributes.pads.length - 2];\n phStartEndNotZero = !!(phStart + phEnd);\n programUniforms.push(\n { type: 12 /* uint32 */, data: kh },\n { type: 12 /* uint32 */, data: sh },\n { type: 12 /* uint32 */, data: phStart },\n { type: 12 /* uint32 */, data: phEnd }\n );\n uniforms.push(\n { name: "kh", type: "u32" },\n { name: "sh", type: "u32" },\n { name: "phStart", type: "u32" },\n { name: "phEnd", type: "u32" }\n );\n }\n return [programUniforms, uniforms, true, pwStartEndNotZero, phStartEndNotZero];\n } else {\n if (isChannelsLast) {\n throw new Error("Pooling with kernelShape.length > 2 is not supported for NHWC format.");\n }\n const kernelStrides = ShapeUtil.computeStrides(attributes.kernelShape);\n programUniforms.push(\n { type: 12 /* uint32 */, data: kernelStrides },\n { type: 12 /* uint32 */, data: attributes.pads },\n { type: 12 /* uint32 */, data: attributes.strides }\n );\n uniforms.push(\n { name: "kernelStrides", type: "u32", length: kernelStrides.length },\n { name: "pads", type: "u32", length: attributes.pads.length },\n { name: "strides", type: "u32", length: attributes.strides.length }\n );\n const hasPads = attributes.pads.reduce((sum, cur) => sum + cur);\n return [programUniforms, uniforms, !!hasPads, false, false];\n }\n };\n generatePoolingCode = (shaderHelper, x, rank, outputShapeRank, attributes, op1, op2, start, uniforms, hasPads, pwStartEndNotZero, phStartEndNotZero) => {\n const isChannelsLast = attributes.format === "NHWC";\n const dataType = x.type.value;\n const output = outputVariable("output", x.type.tensor, outputShapeRank);\n if (attributes.kernelShape.length <= 2) {\n let codeW = "";\n let codeH = "";\n let codeHEnd = "";\n const dimIdxW = rank - (isChannelsLast ? 2 : 1);\n if (pwStartEndNotZero) {\n codeW = `\n for (var i: u32 = 0u; i < uniforms.kw; i++) {\n xIndices[${dimIdxW}] = indices[${dimIdxW}] * uniforms.sw - uniforms.pwStart + i;\n if (xIndices[${dimIdxW}] < 0 || xIndices[${dimIdxW}]\n >= uniforms.x_shape[${dimIdxW}]) {\n pad++;\n continue;\n }\n let x_val = x[${x.indicesToOffset("xIndices")}];\n ${op1}\n }`;\n } else {\n codeW = `\n for (var i: u32 = 0u; i < uniforms.kw; i++) {\n xIndices[${dimIdxW}] = indices[${dimIdxW}] * uniforms.sw - uniforms.pwStart + i;\n let x_val = x[${x.indicesToOffset("xIndices")}];\n ${op1}\n }`;\n }\n if (attributes.kernelShape.length === 2) {\n const dimIdxH = rank - (isChannelsLast ? 3 : 2);\n if (phStartEndNotZero) {\n codeH = `\n for (var j: u32 = 0u; j < uniforms.kh; j++) {\n xIndices[${dimIdxH}] = indices[${dimIdxH}] * uniforms.sh - uniforms.phStart + j;\n if (xIndices[${dimIdxH}] < 0 || xIndices[${dimIdxH}] >= uniforms.x_shape[${dimIdxH}]) {\n pad += i32(uniforms.kw);\n continue;\n }\n `;\n } else {\n codeH = `\n for (var j: u32 = 0u; j < uniforms.kh; j++) {\n xIndices[${dimIdxH}] = indices[${dimIdxH}] * uniforms.sh - uniforms.phStart + j;\n `;\n }\n codeHEnd = `\n }\n `;\n }\n const poolingCode = `\n ${shaderHelper.registerUniforms(uniforms).declareVariables(x, output)}\n\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")}\n\n let indices = ${output.offsetToIndices("global_idx")};\n var xIndices = ${output.offsetToIndices("global_idx")};\n\n var value = ${dataType}(${start});\n var pad = 0;\n ${codeH}\n ${codeW}\n ${codeHEnd}\n ${op2}\n\n output[global_idx] = value;\n }`;\n return poolingCode;\n } else {\n if (isChannelsLast) {\n throw new Error("Pooling with kernelShape.length > 2 is not supported for NHWC format.");\n }\n const stridesRank = attributes.kernelShape.length;\n const padsRank = attributes.pads.length;\n let padCode = "";\n if (hasPads) {\n padCode = `\n if (xIndices[j] >= uniforms.x_shape[j]) {\n pad++;\n isPad = true;\n break;\n }\n }\n if (!isPad) {\n let x_val = x[${x.indicesToOffset("xIndices")}];\n ${op1}\n }`;\n } else {\n padCode = `\n }\n let x_val = x[${x.indicesToOffset("xIndices")}];\n ${op1}\n `;\n }\n const poolingCode = `\n ${shaderHelper.registerUniforms(uniforms).declareVariables(x, output)}\n\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")}\n let indices = ${output.offsetToIndices("global_idx")};\n var xIndices = ${output.offsetToIndices("global_idx")};\n\n var offsets: array;\n\n var value = ${dataType}(${start});\n var pad = 0;\n var isPad = false;\n\n for (var i: u32 = 0u; i < uniforms.kernelSize; i++) {\n var offset = i;\n for (var j = 0u; j < ${stridesRank - 1}u; j++) {\n offsets[j] = offset / ${getElementAt("uniforms.kernelStrides", "j", stridesRank)};\n offset -= offsets[j] * ${getElementAt("uniforms.kernelStrides", "j", stridesRank)};\n }\n offsets[${stridesRank - 1}] = offset;\n\n isPad = false;\n for (var j = ${rank - stridesRank}u; j < ${rank}u; j++) {\n xIndices[j] = indices[j] * ${getElementAt("uniforms.strides", `j - ${rank - stridesRank}u`, stridesRank)}\n + offsets[j - ${rank - stridesRank}u] - ${getElementAt("uniforms.pads", "j - 2u", padsRank)};\n ${padCode}\n }\n ${op2}\n\n output[global_idx] = value;\n }`;\n return poolingCode;\n }\n };\n createShaderKeyFromAttributes = (attributes) => `${attributes.format};${attributes.ceilMode};${attributes.autoPad};${attributes.kernelShape.length}`;\n createAveragePoolShaderKeyFromAttributes = (attributes) => `${createShaderKeyFromAttributes(attributes)};${attributes.countIncludePad}`;\n createMaxPoolShaderKeyFromAttributes = (attributes) => `${createShaderKeyFromAttributes(attributes)};${attributes.storageOrder};${attributes.dilations}`;\n parsePoolCommonAttributes = (attributes) => ({\n format: attributes.format,\n autoPad: ["NOTSET", "VALID", "SAME_UPPER", "SAME_LOWER"][attributes.auto_pad],\n ceilMode: attributes.ceil_mode,\n kernelShape: attributes.kernel_shape,\n strides: attributes.strides,\n pads: attributes.pads\n });\n createAveragePoolProgramInfo = (name, input, isGlobalOperator, attributes) => {\n const [adjustedAttributes, outputShape] = getAdjustedPoolAttributesAndOutputShape(input, attributes, isGlobalOperator);\n const x = inputVariable("x", input.dataType, input.dims.length);\n const dataType = x.type.value;\n const op1 = "value += x_val;";\n let op2 = "";\n if (adjustedAttributes.countIncludePad) {\n op2 += `value /= ${dataType}(uniforms.kernelSize);`;\n } else {\n op2 += `value /= ${dataType}(i32(uniforms.kernelSize) - pad);`;\n }\n const [programUniforms, uniforms, hasPads, pwStartEndNotZero, phStartEndNotZero] = getUniformAndPadInfo(outputShape, adjustedAttributes);\n programUniforms.push(...createTensorShapeVariables(input.dims, outputShape));\n const inputDependencies = ["rank"];\n return {\n name,\n shaderCache: { hint: `${attributes.cacheKey};${hasPads};${pwStartEndNotZero};${phStartEndNotZero}`, inputDependencies },\n getRunData: () => ({\n outputs: [{ dims: outputShape, dataType: input.dataType }],\n dispatchGroup: { x: Math.ceil(\n ShapeUtil.size(outputShape) / 64\n /* workgroup size */\n ) },\n programUniforms\n }),\n getShaderSource: (shaderHelper) => generatePoolingCode(\n shaderHelper,\n x,\n input.dims.length,\n outputShape.length,\n adjustedAttributes,\n op1,\n op2,\n 0,\n uniforms,\n hasPads,\n pwStartEndNotZero,\n phStartEndNotZero\n )\n };\n };\n parseAveragePoolAttributes = (attributes) => {\n const countIncludePad = attributes.count_include_pad === 0 ? false : true;\n const attr = parsePoolCommonAttributes(attributes);\n if (attr.ceilMode !== 0) {\n throw new Error("using ceil() in shape computation is not yet supported for AveragePool");\n }\n const averagePoolAttributes = { countIncludePad, ...attr, cacheKey: "" };\n return { ...averagePoolAttributes, cacheKey: createAveragePoolShaderKeyFromAttributes(averagePoolAttributes) };\n };\n averagePool = (context, attributes) => {\n validateInputs20(context.inputs);\n context.compute(createAveragePoolProgramInfo("AveragePool", context.inputs[0], false, attributes));\n };\n globalPoolAttributes = {\n autoPad: "",\n ceilMode: 0,\n countIncludePad: false,\n kernelShape: [],\n strides: [],\n pads: [],\n storageOrder: 0,\n dilations: []\n };\n parseGlobalAveragePoolAttributes = (attributes) => {\n const format = attributes.format;\n return { format, ...globalPoolAttributes, cacheKey: format };\n };\n globalAveragePool = (context, attributes) => {\n validateInputs20(context.inputs);\n context.compute(createAveragePoolProgramInfo("GlobalAveragePool", context.inputs[0], true, attributes));\n };\n createMaxPoolProgramInfo = (name, input, isGlobalOperator, attributes) => {\n const [adjustedAttributes, outputShape] = getAdjustedPoolAttributesAndOutputShape(input, attributes, isGlobalOperator);\n const op1 = `\n value = max(x_val, value);\n `;\n const op2 = "";\n const x = inputVariable("x", input.dataType, input.dims.length);\n const inputDependencies = ["rank"];\n const [programUniforms, uniforms, hasPads, pwStartEndNotZero, phStartEndNotZero] = getUniformAndPadInfo(outputShape, adjustedAttributes);\n programUniforms.push(...createTensorShapeVariables(input.dims, outputShape));\n return {\n name,\n shaderCache: { hint: `${attributes.cacheKey};${hasPads};${pwStartEndNotZero};${phStartEndNotZero}`, inputDependencies },\n getRunData: () => ({\n outputs: [{ dims: outputShape, dataType: input.dataType }],\n dispatchGroup: { x: Math.ceil(\n ShapeUtil.size(outputShape) / 64\n /* workgroup size */\n ) },\n programUniforms\n }),\n getShaderSource: (shaderHelper) => generatePoolingCode(\n shaderHelper,\n x,\n input.dims.length,\n outputShape.length,\n adjustedAttributes,\n op1,\n op2,\n input.dataType === 10 /* float16 */ ? -65504 : -1e5,\n uniforms,\n hasPads,\n pwStartEndNotZero,\n phStartEndNotZero\n )\n };\n };\n maxPool = (context, attributes) => {\n validateInputs20(context.inputs);\n context.compute(createMaxPoolProgramInfo("MaxPool", context.inputs[0], false, attributes));\n };\n parseMaxPoolAttributes = (attributes) => {\n const storageOrder = attributes.storage_order;\n const dilations = attributes.dilations;\n const attr = parsePoolCommonAttributes(attributes);\n if (storageOrder !== 0) {\n throw new Error("column major storage order is not yet supported for MaxPool");\n }\n if (attr.ceilMode !== 0) {\n throw new Error("using ceil() in shape computation is not yet supported for MaxPool");\n }\n const maxPoolAttributes = { storageOrder, dilations, ...attr, cacheKey: "" };\n return { ...maxPoolAttributes, cacheKey: createMaxPoolShaderKeyFromAttributes(maxPoolAttributes) };\n };\n parseGlobalMaxPoolAttributes = (attributes) => {\n const format = attributes.format;\n return { format, ...globalPoolAttributes, cacheKey: format };\n };\n globalMaxPool = (context, attributes) => {\n validateInputs20(context.inputs);\n context.compute(createMaxPoolProgramInfo("GlobalMaxPool", context.inputs[0], true, attributes));\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/range.ts\n var validateInputsContent, createRangeProgramInfo, range;\n var init_range = __esm({\n "web/lib/wasm/jsep/webgpu/ops/range.ts"() {\n "use strict";\n init_esm();\n init_wasm_common();\n init_common();\n validateInputsContent = (start, limit, delta) => {\n const sameStartLimit = start === limit;\n const increasingRangeNegativeStep = start < limit && delta < 0;\n const decreasingRangePositiveStep = start > limit && delta > 0;\n if (sameStartLimit || increasingRangeNegativeStep || decreasingRangePositiveStep) {\n throw new Error("Range these inputs\' contents are invalid.");\n }\n };\n createRangeProgramInfo = (start, limit, delta, dataType) => {\n const numElements = Math.abs(Math.ceil((limit - start) / delta));\n const outputShape = [numElements];\n const outputSize = numElements;\n const programUniforms = [\n { type: 12 /* uint32 */, data: outputSize },\n { type: dataType, data: start },\n { type: dataType, data: delta },\n ...createTensorShapeVariables(outputShape)\n ];\n const getShaderSource = (shaderHelper) => {\n const output = outputVariable("output", dataType, outputShape.length);\n const wgslType = output.type.value;\n const uniforms = [\n { name: "outputSize", type: "u32" },\n { name: "start", type: wgslType },\n { name: "delta", type: wgslType }\n ];\n return `\n ${shaderHelper.registerUniforms(uniforms).declareVariables(output)}\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")}\n output[global_idx] = uniforms.start + ${wgslType}(global_idx) * uniforms.delta;\n }`;\n };\n return {\n name: "Range",\n shaderCache: { hint: `${dataType}` },\n getShaderSource,\n getRunData: () => ({\n outputs: [{ dims: outputShape, dataType }],\n dispatchGroup: { x: Math.ceil(\n outputSize / 64\n /* workgroup size */\n ) },\n programUniforms\n })\n };\n };\n range = (context) => {\n let start = 0;\n let limit = 0;\n let delta = 0;\n if (context.inputs[0].dataType === 6 /* int32 */) {\n start = context.inputs[0].getInt32Array()[0];\n limit = context.inputs[1].getInt32Array()[0];\n delta = context.inputs[2].getInt32Array()[0];\n } else if (context.inputs[0].dataType === 1 /* float */) {\n start = context.inputs[0].getFloat32Array()[0];\n limit = context.inputs[1].getFloat32Array()[0];\n delta = context.inputs[2].getFloat32Array()[0];\n }\n if (env2.webgpu.validateInputContent) {\n validateInputsContent(start, limit, delta);\n }\n context.compute(createRangeProgramInfo(start, limit, delta, context.inputs[0].dataType), { inputs: [] });\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/resize.ts\n var validateScales, updateScales, validateInputs21, getOriginalCoordinateFromResizedCoordinate, getNearestPixelFromOriginal, updateRoI, initOutputShape, adjustOutputShape, calculateOriginalIndicesFromOutputIndices, calculateInputIndicesFromOutputIndices, checkInputIndices, setChannelAndBatchIndices, bilinearInterpolation, bicubicInterpolation, trilinearInterpolation, createResizeProgramInfo, getOpsetVersionFromCustomDataBuffer, resize, parseResizeAttributes;\n var init_resize = __esm({\n "web/lib/wasm/jsep/webgpu/ops/resize.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_attribute_with_cache_key();\n init_common();\n validateScales = (scales, attributes) => {\n scales.every((value) => value > 0 || (() => {\n throw new Error("Resize requires scales input values to be positive");\n }));\n if (scales.length > 0) {\n if (attributes.mode === "linear") {\n if (!(scales.length === 2 || scales.length === 3 || scales.length === 4 && scales[0] === 1 && scales[1] === 1 || scales.length === 4 && scales[0] === 1 && scales[3] === 1 || scales.length === 5 && scales[0] === 1 && scales[1] === 1)) {\n throw new Error(\n `For linear mode, Resize requires scales to be 2D, 3D, 4D with either two outermost or one innermost and\n one outermost scale values equal to 1, or 5D with two outermost scale values equal to 1`\n );\n }\n } else if (attributes.mode === "cubic") {\n if (!(scales.length === 2 || scales.length === 4 && scales[0] === 1 && scales[1] === 1 || scales.length === 4 && scales[0] === 1 && scales[3] === 1)) {\n throw new Error("Resize requires scales input size to be 2 or 4 for cubic mode");\n }\n }\n }\n };\n updateScales = (scales, axes, rank) => {\n axes.every((value) => value >= 0 && value < rank || (() => {\n throw new Error("Resize requires axes input values to be positive and less than rank");\n }));\n const newScales = new Array(rank).fill(1);\n axes.forEach((value, index) => newScales[value] = scales[index]);\n return newScales;\n };\n validateInputs21 = (inputs, attributes, opsetVersion, scales, sizes, roi) => {\n const [roiInputIndex, scalesInputIndex, sizesInputIndex] = opsetVersion > 10 ? [1, 2, 3] : [-1, inputs.length > 1 ? 1 : -1, -1];\n const rank = inputs[0].dims.length;\n if (roiInputIndex > 0 && inputs.length > roiInputIndex && inputs[roiInputIndex].dims.length > 0) {\n inputs[roiInputIndex].getFloat32Array().forEach((value) => roi.push(value));\n } else if (attributes.coordinateTransformMode === "tf_crop_and_resize") {\n throw new Error("Resize requires RoI input to be specified when coordinateTransformMode is tfCropAndResize");\n }\n if (scalesInputIndex > 0 && inputs.length > scalesInputIndex && inputs[scalesInputIndex].dims.length > 0) {\n inputs[scalesInputIndex].getFloat32Array().forEach((value) => scales.push(value));\n if (scales.length !== 0 && (scales.length !== rank && (opsetVersion >= 18 && scales.length !== attributes.axes.length))) {\n throw new Error(\n "Resize requires scales input size to be same as input rank or axes size for opset 18 and up"\n );\n }\n validateScales(scales, attributes);\n if (attributes.axes.length > 0) {\n updateScales(scales, attributes.axes, rank).forEach((value, index) => scales[index] = value);\n }\n }\n if (sizesInputIndex > 0 && inputs.length > sizesInputIndex) {\n inputs[sizesInputIndex].getBigInt64Array().forEach((value) => sizes.push(Number(value)));\n if (sizes.length !== rank || opsetVersion >= 18 && sizes.length === attributes.axes.length) {\n throw new Error("Resize requires sizes input size to be same as input rank or axes size for opset 18 and up");\n }\n }\n if (attributes.axes.length > 0) {\n if (scales.length !== attributes.axes.length) {\n throw new Error(\'Resize requires "scales" input size to be of axes rank when axes attributes is specified\');\n }\n if (sizes.length !== attributes.axes.length) {\n throw new Error(\n \'Resize requires "sizes" input size to be of rank axes rank when axes attributes is specified\'\n );\n }\n }\n if (typeof scales !== "undefined" && typeof sizes !== "undefined" && scales.length > 0 && sizes.length > rank) {\n throw new Error("Resize requires only of scales or sizes to be specified");\n }\n };\n getOriginalCoordinateFromResizedCoordinate = (coordinateTransferMode, dType) => `fn getOriginalCoordinateFromResizedCoordinate(xResized: u32, xScale: f32, lengthResized: u32,\n lengthOriginal: u32, roiStart: f32, roiEnd: f32) -> ${dType} { ` + (() => {\n switch (coordinateTransferMode) {\n case "asymmetric":\n return `return ${dType}(xResized) / ${dType}(xScale);`;\n case "pytorch_half_pixel":\n return `if (lengthResized > 1) {\n return (${dType}(xResized) + 0.5) / ${dType}(xScale) - 0.5;\n } else {\n return 0.0;\n }`;\n case "tf_half_pixel_for_nn":\n return `return (${dType}(xResized) + 0.5) / ${dType}(xScale);`;\n case "align_corners":\n return `if (lengthResized == 1) {\n return 0.0;\n } else {\n // The whole part and the fractional part are calculated separately due to inaccuracy of floating\n // point division. As an example, f32(21) / f32(7) may evaluate to 2.99... instead of 3, causing an\n // offset-by-one error later in floor().\n let whole = ${dType}(xResized * (lengthOriginal - 1) / (lengthResized - 1));\n let fract =\n ${dType}(xResized * (lengthOriginal - 1) % (lengthResized - 1)) / ${dType}(lengthResized - 1);\n return whole + fract;\n }`;\n case "tf_crop_and_resize":\n return `if (lengthResized > 1) {\n return ${dType}(roiStart) * ${dType}(lengthOriginal - 1) +\n (${dType}(xResized) * ${dType}(roiEnd - roiStart) * ${dType}(lengthOriginal - 1)) /\n ${dType}(lengthResized - 1);\n } else {\n return 0.5 * ${dType}(roiStart + roiEnd) * ${dType}(lengthOriginal - 1);\n }`;\n case "half_pixel_symmetric":\n return `const outputWidth = ${dType}xScale * ${dType}(lengthResized);\n const adjustment = ${dType}(lengthResized) / outputWidth;\n const center = ${dType}(lengthOriginal) / 2;\n const offset = center * (1 - adjustment);\n return offset + ((${dType}(xResized) + 0.5) / ${dType}(xScale)) - 0.5;`;\n case "half_pixel":\n return `return ((${dType}(xResized) + 0.5) / ${dType}(xScale)) - 0.5;`;\n default:\n throw new Error(`Coordinate transform mode ${coordinateTransferMode} is not supported`);\n }\n })() + "}";\n getNearestPixelFromOriginal = (nearestMode, opsetVersion, dType) => `fn getNearestPixelFromOriginal(xOriginal: ${dType}, isDownSample: bool) -> ${dType} {` + (() => {\n switch (nearestMode) {\n case "round_prefer_ceil":\n return "if (fract(xOriginal) == 0.5) { return ceil(xOriginal); } else { return round(xOriginal); }";\n case "floor":\n return "return floor(xOriginal);";\n case "ceil":\n return "return ceil(xOriginal);";\n case "round_prefer_floor":\n return "if (fract(xOriginal) == 0.5) { return floor(xOriginal); } else { return round(xOriginal); }";\n case "simple":\n default:\n if (opsetVersion < 11) {\n return "if (isDownSample) { return ceil(xOriginal); } else { return xOriginal; }";\n }\n throw new Error(`Nearest mode ${nearestMode} is not supported`);\n }\n })() + "}";\n updateRoI = (roi, axes, rank) => {\n const roiTmp = new Array(rank).fill(0).concat(new Array(rank).fill(1));\n const roiLocal = roi.length === 0 ? roiTmp : roi.slice();\n if (axes.length > 0) {\n axes.forEach((v, i) => {\n roiTmp[v] = roiLocal[i];\n roiTmp[i + rank] = roiLocal[axes.length + i];\n });\n return roiTmp;\n }\n return roiLocal;\n };\n initOutputShape = (inputShape, scales, sizes, axes) => {\n let outputShape = [];\n if (sizes.length > 0) {\n if (axes.length > 0) {\n inputShape.forEach((v) => outputShape.push(v));\n if (Math.max(...axes) > inputShape.length) {\n throw new Error("axes is out of bound");\n }\n axes.forEach((v, i) => outputShape[v] = sizes[i]);\n } else {\n sizes.forEach((v) => outputShape.push(v));\n }\n } else {\n if (scales.length === 0) {\n throw new Error("Resize requires either scales or sizes.");\n } else {\n outputShape = inputShape.map((value, index) => Math.round(value * scales[index]));\n }\n }\n return outputShape;\n };\n adjustOutputShape = (inputShape, scales, attributes) => {\n const scaleInPolicy = (() => {\n switch (attributes.keepAspectRatioPolicy) {\n case "not_larger":\n return attributes.axes.length > 0 ? Math.min(...attributes.axes.map((i) => scales[i]), Number.MAX_VALUE) : Math.min(...scales, Number.MAX_VALUE);\n case "not_smaller":\n return attributes.axes.length > 0 ? Math.max(...attributes.axes.map((i) => scales[i]), Number.MIN_VALUE) : Math.max(...scales, Number.MIN_VALUE);\n default:\n throw new Error(`Keep aspect ratio policy ${attributes.keepAspectRatioPolicy} is not supported`);\n }\n })();\n scales.fill(1, 0, scales.length);\n const adjustedOutputShape = inputShape.slice();\n if (attributes.axes.length > 0) {\n attributes.axes.forEach((v) => scales[v] = scaleInPolicy);\n attributes.axes.forEach((v) => adjustedOutputShape[v] = Math.round(inputShape[v] * scales[v]));\n } else {\n scales.fill(scaleInPolicy, 0, scales.length);\n adjustedOutputShape.forEach((v, i) => adjustedOutputShape[i] = Math.round(v * scales[i]));\n }\n return adjustedOutputShape;\n };\n calculateOriginalIndicesFromOutputIndices = (output, inputShape, outputShape, scalesLength, roiLength) => `\n fn calculateOriginalIndicesFromOutputIndices(output_indices: ${output.type.indices}) -> array<${output.type.value}, ${outputShape.length}> {\n var original_indices: array<${output.type.value}, ${outputShape.length}>;\n for (var i:u32 = 0; i < ${outputShape.length}; i++) {\n var output_index = ${output.indicesGet("output_indices", "i")};\n var scale = ${getElementAt("uniforms.scales", "i", scalesLength)};\n var roi_low = ${getElementAt("uniforms.roi", "i", roiLength)};\n var roi_hi = ${getElementAt("uniforms.roi", `i + ${inputShape.length}`, roiLength)};\n if (scale == 1.0) {\n original_indices[i] = ${output.type.value}(output_index);\n } else {\n var input_shape_i = ${getElementAt("uniforms.input_shape", "i", inputShape.length)};\n var output_shape_i = ${getElementAt("uniforms.output_shape", "i", outputShape.length)};\n original_indices[i] = getOriginalCoordinateFromResizedCoordinate(output_index, scale, output_shape_i,\n input_shape_i, roi_low, roi_hi);\n }\n }\n return original_indices;\n }`;\n calculateInputIndicesFromOutputIndices = (input, output, inputShape, outputShape, scalesLength, roiLength, useExtrapolation) => `\n fn calculateInputIndicesFromOutputIndices(output_indices: ${output.type.indices}) -> ${input.type.indices} {\n var input_indices: ${input.type.indices};\n for (var i:u32 = 0; i < ${outputShape.length}; i++) {\n var output_index = ${output.indicesGet("output_indices", "i")};\n var input_index: u32;\n var scale = ${getElementAt("uniforms.scales", "i", scalesLength)};\n if (scale == 1.0) {\n input_index = output_index;\n } else {\n var roi_low = ${getElementAt("uniforms.roi", "i", roiLength)};\n var roi_hi = ${getElementAt("uniforms.roi", `i + ${inputShape.length}`, roiLength)};\n var input_shape_i = ${getElementAt("uniforms.input_shape", "i", inputShape.length)};\n var output_shape_i = ${getElementAt("uniforms.output_shape", "i", outputShape.length)};\n var original_idx = getOriginalCoordinateFromResizedCoordinate(output_index, scale, output_shape_i,\n input_shape_i, roi_low, roi_hi);\n if (!${useExtrapolation} || (original_idx >= 0 && original_idx < ${output.type.value}(input_shape_i))) {\n if (original_idx < 0) {\n input_index = 0;\n } else if (original_idx > ${output.type.value}(input_shape_i - 1)) {\n input_index = input_shape_i - 1;\n } else {\n input_index = u32(getNearestPixelFromOriginal(original_idx, scale < 1));\n }\n } else {\n input_index = u32(original_idx);\n }\n }\n ${input.indicesSet("input_indices", "i", " input_index")}\n }\n return input_indices;\n }`;\n checkInputIndices = (input, inputShape) => `\n fn checkInputIndices(input_indices: ${input.type.indices}) -> bool {\n for (var i:u32 = 0; i < ${inputShape.length}; i++) {\n var input_index = ${input.indicesGet("input_indices", "i")};\n if (input_index < 0 || input_index >= ${getElementAt("uniforms.input_shape", "i", inputShape.length)}) {\n return false;\n }\n }\n return true;\n }`;\n setChannelAndBatchIndices = (input, channelIdx, batchIdx, spacialDims) => input.rank > spacialDims ? `\n ${input.indicesSet("input_indices", channelIdx, "channel")};\n ${input.indicesSet("input_indices", batchIdx, "batch")};\n` : "";\n bilinearInterpolation = (input, output, inputShape, useExtrapolation, extrapolationValue) => {\n const isNchw = true;\n const [batchIdx, heightIdx, widthIdx, channelIdx] = inputShape.length === 2 ? [-1, 0, 1, -1] : isNchw ? [0, 2, 3, 1] : [0, 1, 2, 3];\n const dType = input.type.value;\n return `\n fn getInputValue(batch: u32, channel: u32, row: u32, col: u32) -> ${dType} {\n var input_indices: ${input.type.indices};\n ${input.indicesSet("input_indices", heightIdx, `max(0, min(row, ${inputShape[heightIdx]} - 1))`)};\n ${input.indicesSet("input_indices", widthIdx, `max(0, min(col, ${inputShape[widthIdx]} - 1))`)};\n ${setChannelAndBatchIndices(input, channelIdx, batchIdx, 2)}\n return ${input.getByIndices("input_indices")};\n }\n\n fn bilinearInterpolation(output_indices: ${output.type.indices}) -> ${dType} {\n var originalIndices = calculateOriginalIndicesFromOutputIndices(output_indices);\n var row:${dType} = originalIndices[${heightIdx}];\n var col:${dType} = originalIndices[${widthIdx}];\n ${useExtrapolation ? `if (row < 0 || row > (${inputShape[heightIdx]} - 1) || col < 0 || col > (${inputShape[widthIdx]} - 1)) {\n return ${extrapolationValue};\n }` : ""};\n row = max(0, min(row, ${inputShape[heightIdx]} - 1));\n col = max(0, min(col, ${inputShape[widthIdx]} - 1));\n var row1: u32 = u32(row);\n var col1: u32 = u32(col);\n var row2: u32 = u32(row + 1);\n var col2: u32 = u32(col + 1);\n var channel: u32 = ${inputShape.length > 2 ? `u32(originalIndices[${channelIdx}])` : "0"};\n var batch: u32 = ${inputShape.length > 2 ? `u32(originalIndices[${batchIdx}])` : "0"};\n var x11: ${dType} = getInputValue(batch, channel, row1, col1);\n var x12: ${dType} = getInputValue(batch, channel, row1, col2);\n var x21: ${dType} = getInputValue(batch, channel, row2, col1);\n var x22: ${dType} = getInputValue(batch, channel, row2, col2);\n var dx1: ${dType} = abs(row - ${dType}(row1));\n var dx2: ${dType} = abs(${dType}(row2) - row);\n var dy1: ${dType} = abs(col - ${dType}(col1));\n var dy2: ${dType} = abs(${dType}(col2) - col);\n if (row1 == row2) {\n dx1 = 0.5;\n dx2 = 0.5;\n }\n if (col1 == col2) {\n dy1 = 0.5;\n dy2 = 0.5;\n }\n return (x11 * dx2 * dy2 + x12 * dx2 * dy1 + x21 * dx1 * dy2 + x22 * dx1 * dy1);\n }`;\n };\n bicubicInterpolation = (input, output, inputShape, outputShape, scales, roi, cubicCoeffA, useExtrapolation, extrapolationValue, excludeOutside) => {\n const is2D = inputShape.length === 2;\n const isNchw = true;\n const [heightIdx, widthIdx] = is2D ? [0, 1] : isNchw ? [2, 3] : [1, 2];\n const dType = input.type.value;\n const createCubicInterpolationFunction = (idx) => {\n const direction = idx === heightIdx ? "row" : "col";\n return `\n fn ${direction}CubicInterpolation(input_indices: ${input.type.indices}, output_indices: ${output.type.indices}) -> ${dType} {\n var output_index = ${output.indicesGet("output_indices", idx)};\n var originalIdx: ${dType} = getOriginalCoordinateFromResizedCoordinate(output_index, ${scales[idx]},\n ${outputShape[idx]}, ${inputShape[idx]}, ${roi[idx]}, ${roi[idx]} + ${inputShape.length});\n var fractOriginalIdx: ${dType} = originalIdx - floor(originalIdx);\n var coefs = getCubicInterpolationCoefs(fractOriginalIdx);\n\n if (${useExtrapolation} && (originalIdx < 0 || originalIdx > (${inputShape[idx]} - 1))) {\n return ${extrapolationValue};\n }\n var data: array<${dType}, 4> = array<${dType}, 4>(0.0, 0.0, 0.0, 0.0);\n for (var i: i32 = -1; i < 3; i++) {\n var ${direction}: ${dType} = originalIdx + ${dType}(i);\n if (${direction} < 0 || ${direction} >= ${inputShape[idx]}) {\n ${(() => {\n if (excludeOutside) {\n return `coefs[i + 1] = 0.0;\n continue;`;\n } else if (useExtrapolation) {\n return `return ${extrapolationValue};`;\n } else {\n return `${direction} = max(0, min(${direction}, ${inputShape[idx]} - 1));`;\n }\n })()};\n }\n var input_indices_copy: ${input.type.indices} = input_indices;\n ${input.indicesSet("input_indices_copy", idx, `u32(${direction})`)};\n data[i + 1] = ${idx === heightIdx ? input.getByIndices("input_indices_copy") : "rowCubicInterpolation(input_indices_copy, output_indices)"};\n }\n return cubicInterpolation1D(data, coefs);\n }`;\n };\n return `\n ${createCubicInterpolationFunction(heightIdx)};\n ${createCubicInterpolationFunction(widthIdx)};\n fn getCubicInterpolationCoefs(s: ${dType}) -> array<${dType}, 4> {\n var absS = abs(s);\n var coeffs: array<${dType}, 4> = array<${dType}, 4>(0.0, 0.0, 0.0, 0.0);\n var oneMinusAbsS: ${dType} = 1.0 - absS;\n var twoMinusAbsS: ${dType} = 2.0 - absS;\n var onePlusAbsS: ${dType} = 1.0 + absS;\n coeffs[0] = ((${cubicCoeffA} * onePlusAbsS - 5 * ${cubicCoeffA}) * onePlusAbsS + 8 * ${cubicCoeffA}) * onePlusAbsS - 4 * ${cubicCoeffA};\n coeffs[1] = ((${cubicCoeffA} + 2) * absS - (${cubicCoeffA} + 3)) * absS * absS + 1;\n coeffs[2] = ((${cubicCoeffA} + 2) * oneMinusAbsS - (${cubicCoeffA} + 3)) * oneMinusAbsS * oneMinusAbsS + 1;\n coeffs[3] = ((${cubicCoeffA} * twoMinusAbsS - 5 * ${cubicCoeffA}) * twoMinusAbsS + 8 * ${cubicCoeffA}) * twoMinusAbsS - 4 * ${cubicCoeffA};\n return coeffs;\n }\n\n fn cubicInterpolation1D(x: array<${dType}, 4>, coefs: array<${dType}, 4>) -> ${dType} {\n var coefsSum: ${dType} = coefs[0] + coefs[1] + coefs[2] + coefs[3];\n return (x[0] * coefs[0] + x[1] * coefs[1]+ x[2] * coefs[2]+ x[3] * coefs[3]) / coefsSum;\n }\n\n fn bicubicInterpolation(output_indices: ${output.type.indices}) -> ${dType} {\n var input_indices: ${input.type.indices} = output_indices;\n return colCubicInterpolation(input_indices, output_indices);\n }\n `;\n };\n trilinearInterpolation = (input, output, inputShape, useExtrapolation, extrapolationValue) => {\n const isNchw = true;\n const [batchIdx, depthIdx, heightIdx, widthIdx, channelIdx] = inputShape.length === 3 ? [-1, 0, 1, 2, -1] : isNchw ? [0, 2, 3, 4, 1] : [0, 1, 2, 3, 4];\n const dType = input.type.value;\n return `\n fn getInputValue(batch: u32, channel: u32, depth:u32, height: u32, width: u32) -> ${dType} {\n var input_indices: ${input.type.indices};\n ${input.indicesSet("input_indices", depthIdx, `max(0, min(depth, ${inputShape[depthIdx]} - 1))`)};\n ${input.indicesSet("input_indices", heightIdx, `max(0, min(height, ${inputShape[heightIdx]} - 1))`)};\n ${input.indicesSet("input_indices", widthIdx, `max(0, min(width, ${inputShape[widthIdx]} - 1))`)};\n ${setChannelAndBatchIndices(input, channelIdx, batchIdx, 3)}\n return ${input.getByIndices("input_indices")};\n }\n\n fn trilinearInterpolation(output_indices: ${output.type.indices}) -> ${dType} {\n var originalIndices = calculateOriginalIndicesFromOutputIndices(output_indices);\n var depth:${dType} = originalIndices[${depthIdx}];\n var height:${dType} = originalIndices[${heightIdx}];\n var width:${dType} = originalIndices[${widthIdx}];\n ${useExtrapolation ? `if (depth < 0 || depth > (${inputShape[depthIdx]} - 1) || height < 0 || height > (${inputShape[heightIdx]} - 1) || width < 0 || (width > ${inputShape[widthIdx]} - 1)) {\n return ${extrapolationValue};\n }` : ""};\n\n depth = max(0, min(depth, ${inputShape[depthIdx]} - 1));\n height = max(0, min(height, ${inputShape[heightIdx]} - 1));\n width = max(0, min(width, ${inputShape[widthIdx]} - 1));\n var depth1: u32 = u32(depth);\n var height1: u32 = u32(height);\n var width1: u32 = u32(width);\n var depth2: u32 = u32(depth + 1);\n var height2: u32 = u32(height + 1);\n var width2: u32 = u32(width + 1);\n var channel: u32 = ${inputShape.length > 3 ? `u32(originalIndices[${channelIdx}])` : "0"};\n var batch: u32 = ${inputShape.length > 3 ? `u32(originalIndices[${batchIdx}])` : "0"};\n\n var x111: ${dType} = getInputValue(batch, channel, depth1, height1, width1);\n var x112: ${dType} = getInputValue(batch, channel, depth1, height1, width2);\n var x121: ${dType} = getInputValue(batch, channel, depth1, height2, width1);\n var x122: ${dType} = getInputValue(batch, channel, depth1, height2, width2);\n var x211: ${dType} = getInputValue(batch, channel, depth2, height1, width1);\n var x212: ${dType} = getInputValue(batch, channel, depth2, height1, width2);\n var x221: ${dType} = getInputValue(batch, channel, depth2, height2, width1);\n var x222: ${dType} = getInputValue(batch, channel, depth2, height2, width2);\n var dx1: ${dType} = abs(depth - ${dType}(depth1));\n var dx2: ${dType} = abs(${dType}(depth2) - depth);\n var dy1: ${dType} = abs(height - ${dType}(height1));\n var dy2: ${dType} = abs(${dType}(height2) - height);\n var dz1: ${dType} = abs(width - ${dType}(width1));\n var dz2: ${dType} = abs(${dType}(width2) - width);\n if (depth1 == depth2) {\n dx1 = 0.5;\n dx2 = 0.5;\n }\n if (height1 == height2) {\n dy1 = 0.5;\n dy2 = 0.5;\n }\n if (width1 == width2) {\n dz1 = 0.5;\n dz2 = 0.5;\n }\n return (x111 * dx2 * dy2 * dz2 + x112 * dx2 * dy2 * dz1 + x121 * dx2 * dy1 *dz2 + x122 * dx2 * dy1 * dz1 +\n x211 * dx1 * dy2 * dz2 + x212 * dx1 * dy2 * dz1 + x221 * dx1 * dy1 *dz2 + x222 * dx1 * dy1 * dz1);\n }`;\n };\n createResizeProgramInfo = (inputTensor, attributes, opsetVersion, scalesInput, sizes, roiInput) => {\n const inputShape = inputTensor.dims;\n const roi = updateRoI(roiInput, attributes.axes, inputShape.length);\n let outputShape = initOutputShape(inputShape, scalesInput, sizes, attributes.axes);\n let scales = scalesInput.slice();\n if (scalesInput.length === 0) {\n scales = inputShape.map((value, index) => value === 0 ? 1 : outputShape[index] / value);\n if (attributes.keepAspectRatioPolicy !== "stretch") {\n outputShape = adjustOutputShape(inputShape, scales, attributes);\n }\n }\n const output = outputVariable("output", inputTensor.dataType, outputShape.length);\n const input = inputVariable("input", inputTensor.dataType, inputShape.length);\n const outputSize = ShapeUtil.size(outputShape);\n const noScale = inputShape.length === outputShape.length && inputShape.every((d, i) => d === outputShape[i]);\n const useExtrapolation = attributes.coordinateTransformMode === "tf_crop_and_resize";\n const extrapolationValue = attributes.extrapolationValue;\n const dataType = input.type.value;\n const getShaderSource = (shaderHelper) => `\n ${noScale ? "" : `\n ${getOriginalCoordinateFromResizedCoordinate(attributes.coordinateTransformMode, dataType)};\n ${(() => {\n switch (attributes.mode) {\n case "nearest":\n return `\n ${checkInputIndices(input, inputShape)};\n ${getNearestPixelFromOriginal(attributes.nearestMode, opsetVersion, dataType)};\n ${calculateInputIndicesFromOutputIndices(\n input,\n output,\n inputShape,\n outputShape,\n scales.length,\n roi.length,\n useExtrapolation\n )};\n `;\n case "linear":\n return `\n ${calculateOriginalIndicesFromOutputIndices(output, inputShape, outputShape, scales.length, roi.length)};\n ${(() => {\n if (inputShape.length === 2 || inputShape.length === 4) {\n return `${bilinearInterpolation(input, output, inputShape, useExtrapolation, extrapolationValue)}`;\n } else if (inputShape.length === 3 || inputShape.length === 5) {\n return `${trilinearInterpolation(input, output, inputShape, useExtrapolation, extrapolationValue)}`;\n } else {\n throw Error("Linear mode only supports input dims 2, 3, 4 and 5 are supported in linear mode.");\n }\n })()};\n `;\n case "cubic":\n return `\n ${(() => {\n if (inputShape.length === 2 || inputShape.length === 4) {\n return `${bicubicInterpolation(\n input,\n output,\n inputShape,\n outputShape,\n scales,\n roi,\n attributes.cubicCoeffA,\n useExtrapolation,\n attributes.extrapolationValue,\n attributes.excludeOutside\n )}`;\n } else {\n throw Error("Cubic mode only supports input dims 2 and 4 are supported in linear mode.");\n }\n })()};\n `;\n default:\n throw Error("Invalid resize mode");\n }\n })()};\n `}\n ${shaderHelper.registerUniform("output_size", "u32").registerUniform("scales", "f32", scales.length).registerUniform("roi", "f32", roi.length).declareVariables(input, output)}\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}\n ${noScale ? "output[global_idx] = input[global_idx];" : `\n let output_indices = ${output.offsetToIndices("global_idx")};\n var input_indices: ${input.type.indices};\n ${(() => {\n switch (attributes.mode) {\n case "nearest":\n return `input_indices = calculateInputIndicesFromOutputIndices(output_indices);\n if (checkInputIndices(input_indices)) {\n output[global_idx] = ${input.getByIndices("input_indices")};\n } else {\n output[global_idx] = ${attributes.extrapolationValue};\n }`;\n case "linear":\n return `output[global_idx] = ${inputShape.length === 2 || inputShape.length === 4 ? "bilinearInterpolation" : "trilinearInterpolation"}(output_indices);`;\n case "cubic":\n return "output[global_idx] = bicubicInterpolation(output_indices);";\n default:\n throw Error(`Unsupported resize mode: ${attributes.mode}`);\n }\n })()};\n`}\n }`;\n return {\n name: "Resize",\n shaderCache: {\n hint: `${attributes.cacheKey}|${opsetVersion}|${scales.length > 0 ? scales : ""}|${sizes.length > 0 ? sizes : ""}|${roi.length > 0 ? roi : ""}|${noScale}|${inputShape}`,\n inputDependencies: ["rank"]\n },\n getShaderSource,\n getRunData: () => ({\n outputs: [{ dims: outputShape, dataType: inputTensor.dataType }],\n dispatchGroup: { x: Math.ceil(\n outputSize / 64\n /* workgroup size */\n ) },\n programUniforms: [\n { type: 12 /* uint32 */, data: outputSize },\n { type: 1 /* float */, data: scales },\n { type: 1 /* float */, data: roi },\n ...createTensorShapeVariables(inputShape, outputShape)\n ]\n })\n };\n };\n getOpsetVersionFromCustomDataBuffer = (context) => {\n const customDataBuffer = context.customDataBuffer;\n const customDataBuffer32 = new Uint32Array(customDataBuffer, customDataBuffer.byteOffset, 1);\n const opsetVersion = customDataBuffer32[0];\n return opsetVersion;\n };\n resize = (context, attributes) => {\n const scales = [];\n const sizes = [];\n const roi = [];\n const opsetVersion = getOpsetVersionFromCustomDataBuffer(context);\n if (attributes.antialias !== 0) {\n throw Error("Only default value (0) for Antialias attribute is supported");\n }\n validateInputs21(context.inputs, attributes, opsetVersion, scales, sizes, roi);\n context.compute(\n createResizeProgramInfo(context.inputs[0], attributes, opsetVersion, scales, sizes, roi),\n { inputs: [0] }\n );\n };\n parseResizeAttributes = (attributes) => {\n const antialias = attributes.antialias;\n const axes = attributes.axes;\n const coordinateTransformMode = attributes.coordinateTransformMode;\n const cubicCoeffA = attributes.cubicCoeffA;\n const excludeOutside = attributes.excludeOutside !== 0;\n const extrapolationValue = attributes.extrapolationValue;\n const keepAspectRatioPolicy = attributes.keepAspectRatioPolicy;\n const mode = attributes.mode;\n const nearestMode = attributes.nearestMode === "" ? "simple" : attributes.nearestMode;\n return createAttributeWithCacheKey({\n antialias,\n axes,\n coordinateTransformMode,\n cubicCoeffA,\n excludeOutside,\n extrapolationValue,\n keepAspectRatioPolicy,\n mode,\n nearestMode\n });\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/rotary-embedding.ts\n var validateInputs22, createRotaryEmbeddingProgramInfo, rotaryEmbedding;\n var init_rotary_embedding = __esm({\n "web/lib/wasm/jsep/webgpu/ops/rotary-embedding.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_attribute_with_cache_key();\n init_common();\n validateInputs22 = (inputs, attributes) => {\n const [input, positionIds, cosCache, sinCache] = inputs;\n const { numHeads, rotaryEmbeddingDim } = attributes;\n if (input.dims.length !== 3 && input.dims.length !== 4) {\n throw new Error(`Input \'x\' is expected to have 3 or 4 dimensions, got ${input.dims.length}`);\n }\n if (!ShapeUtil.areEqual(positionIds.dims, []) && !ShapeUtil.areEqual(positionIds.dims, [1]) && positionIds.dims.length !== 2) {\n throw new Error(`Input \'position_ids\' is expected to have 0, 1, or 2 dimensions, got ${positionIds.dims.length}`);\n }\n if (cosCache.dims.length !== 2) {\n throw new Error(`Input \'cos_cache\' is expected to have 2 dimensions, got ${cosCache.dims.length}`);\n }\n if (sinCache.dims.length !== 2) {\n throw new Error(`Input \'sin_cache\' is expected to have 2 dimensions, got ${sinCache.dims.length}`);\n }\n if (!ShapeUtil.areEqual(cosCache.dims, sinCache.dims)) {\n throw new Error("Inputs \'cos_cache\' and \'sin_cache\' are expected to have the same shape");\n }\n if (rotaryEmbeddingDim > 0 && numHeads === 0) {\n throw new Error("num_heads must be provided if rotary_embedding_dim is specified");\n }\n const batchSize = input.dims[0];\n const sequenceLength = input.dims[input.dims.length - 2];\n const maxSequenceLength = cosCache.dims[0];\n const hiddenSize = ShapeUtil.sizeFromDimension(input.dims, 1) / sequenceLength;\n const headSize = rotaryEmbeddingDim === 0 ? cosCache.dims[1] * 2 : hiddenSize / numHeads;\n if (rotaryEmbeddingDim > headSize) {\n throw new Error("rotary_embedding_dim must be less than or equal to head_size");\n }\n if (positionIds.dims.length === 2) {\n if (batchSize !== positionIds.dims[0]) {\n throw new Error(`Input \'position_ids\' dimension 0 should be of size batch_size, got ${positionIds.dims[0]}`);\n }\n if (sequenceLength !== positionIds.dims[1]) {\n throw new Error(`Input \'position_ids\' dimension 1 should be of size sequence_length, got ${positionIds.dims[1]}`);\n }\n }\n if (headSize / 2 !== cosCache.dims[1] && rotaryEmbeddingDim / 2 !== cosCache.dims[1]) {\n throw new Error(`Input \'cos_cache\' dimension 1 should be same as head_size / 2 or rotary_embedding_dim / 2, got ${cosCache.dims[1]}`);\n }\n if (sequenceLength > maxSequenceLength) {\n throw new Error("Updating cos_cache and sin_cache in RotaryEmbedding is not currently supported");\n }\n };\n createRotaryEmbeddingProgramInfo = (inputs, attributes) => {\n const { interleaved, numHeads, rotaryEmbeddingDim, scale } = attributes;\n const batchSize = inputs[0].dims[0];\n const batchStride = ShapeUtil.sizeFromDimension(inputs[0].dims, 1);\n const sequenceLength = inputs[0].dims[inputs[0].dims.length - 2];\n const hiddenSize = batchStride / sequenceLength;\n const halfRotaryEmbeddingDim = inputs[2].dims[1];\n const headSize = rotaryEmbeddingDim === 0 ? halfRotaryEmbeddingDim * 2 : hiddenSize / numHeads;\n const globalShape = new Array(batchSize, sequenceLength, hiddenSize / headSize, headSize - halfRotaryEmbeddingDim);\n const globalStrides = ShapeUtil.computeStrides(globalShape);\n const programUniforms = [\n { type: 1 /* float */, data: scale },\n { type: 12 /* uint32 */, data: globalShape },\n { type: 12 /* uint32 */, data: globalStrides },\n // strides for addressing the input/output tensor, in permutated order to align with the unfolded global index,\n // i.e. BSNH\n ...inputs[0].dims.length === 3 ? new Array({ type: 12 /* uint32 */, data: [batchStride, hiddenSize, headSize, 1] }) : [],\n ...inputs[0].dims.length === 4 ? new Array(\n { type: 12 /* uint32 */, data: [batchStride, headSize, sequenceLength * headSize, 1] }\n ) : [],\n ...createTensorShapeVariables(inputs[0].dims, inputs[1].dims, inputs[2].dims, inputs[3].dims, inputs[0].dims)\n ];\n const getShaderSource = (shaderHelper) => {\n const input = inputVariable("input", inputs[0].dataType, inputs[0].dims.length);\n const positionIds = inputVariable("position_ids", inputs[1].dataType, inputs[1].dims.length);\n const cosCache = inputVariable("cos_cache", inputs[2].dataType, inputs[2].dims.length);\n const sinCache = inputVariable("sin_cache", inputs[3].dataType, inputs[3].dims.length);\n const output = outputVariable("output", inputs[0].dataType, inputs[0].dims.length);\n shaderHelper.registerUniforms([\n { name: "scale", type: "f32" },\n { name: "global_shape", type: "u32", length: globalShape.length },\n { name: "global_strides", type: "u32", length: globalStrides.length },\n { name: "input_output_strides", type: "u32", length: globalStrides.length }\n ]);\n return `\n ${shaderHelper.declareVariables(input, positionIds, cosCache, sinCache, output)}\n\n ${shaderHelper.mainStart(WORKGROUP_SIZE)}\n let half_rotary_emb_dim = uniforms.${cosCache.name}_shape[1];\n let bsnh = global_idx / uniforms.global_strides % uniforms.global_shape;\n let size = uniforms.global_shape[0] * uniforms.global_strides[0];\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("size")}\n\n if (bsnh[3] < half_rotary_emb_dim) {\n let position_ids_idx =\n ${positionIds.broadcastedIndicesToOffset("bsnh.xy", outputVariable("", positionIds.type.tensor, 2))};\n let position_id =\n u32(${positionIds.getByOffset("position_ids_idx")}) + select(0, bsnh[1], position_ids_idx == 0);\n let i = dot(bsnh, uniforms.input_output_strides) + select(0, bsnh[3], ${interleaved});\n let j = i + select(half_rotary_emb_dim, 1, ${interleaved});\n let re = ${input.getByOffset("i")} * ${cosCache.get("position_id", "bsnh[3]")} -\n ${input.getByOffset("j")} * ${sinCache.get("position_id", "bsnh[3]")};\n ${output.setByOffset("i", "re")}\n let im = ${input.getByOffset("i")} * ${sinCache.get("position_id", "bsnh[3]")} +\n ${input.getByOffset("j")} * ${cosCache.get("position_id", "bsnh[3]")};\n ${output.setByOffset("j", "im")}\n } else {\n let k = dot(bsnh, uniforms.input_output_strides) + half_rotary_emb_dim;\n ${output.setByOffset("k", input.getByOffset("k"))}\n }\n }`;\n };\n return {\n name: "RotaryEmbedding",\n shaderCache: {\n hint: createAttributeWithCacheKey({\n interleaved\n }).cacheKey,\n inputDependencies: ["rank", "rank", "rank", "rank"]\n },\n getShaderSource,\n getRunData: () => ({\n outputs: [{ dims: inputs[0].dims, dataType: inputs[0].dataType }],\n dispatchGroup: { x: Math.ceil(ShapeUtil.size(globalShape) / WORKGROUP_SIZE) },\n programUniforms\n })\n };\n };\n rotaryEmbedding = (context, attributes) => {\n validateInputs22(context.inputs, attributes);\n context.compute(createRotaryEmbeddingProgramInfo(context.inputs, attributes));\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/skip-layer-norm.ts\n var validateInputs23, createSkipLayerNormProgramInfo, skipLayerNorm;\n var init_skip_layer_norm = __esm({\n "web/lib/wasm/jsep/webgpu/ops/skip-layer-norm.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_common();\n validateInputs23 = (inputs) => {\n if (!inputs || inputs.length < 3) {\n throw new Error("layerNorm requires at least 3 inputs.");\n }\n const input = inputs[0];\n const skip = inputs[1];\n const gamma = inputs[2];\n if (input.dataType !== skip.dataType || input.dataType !== gamma.dataType) {\n throw new Error("All inputs must have the same data type");\n }\n if (input.dims.length !== 3 && input.dims.length !== 2) {\n throw new Error("Input must be 2D or 3D");\n }\n if (skip.dims.length !== 3 && skip.dims.length !== 2) {\n throw new Error("Skip must be 2D or 3D");\n }\n const hiddenSize = input.dims[input.dims.length - 1];\n const sequenceLength = input.dims[input.dims.length - 2];\n if (skip.dims[skip.dims.length - 1] !== hiddenSize) {\n throw new Error("Skip must have the same hidden size as input");\n }\n if (skip.dims[skip.dims.length - 2] !== sequenceLength) {\n throw new Error("Skip must have the same sequence length as input");\n }\n if (gamma.dims.length !== 1) {\n throw new Error("Gamma must be 1D");\n }\n if (gamma.dims[gamma.dims.length - 1] !== hiddenSize) {\n throw new Error("Gamma must have the same hidden size as input");\n }\n if (inputs.length > 3) {\n const beta = inputs[3];\n if (beta.dims.length !== 1) {\n throw new Error("Beta must be 1D");\n }\n if (beta.dims[beta.dims.length - 1] !== hiddenSize) {\n throw new Error("Beta must have the same hidden size as input");\n }\n }\n if (inputs.length > 4) {\n const bias = inputs[4];\n if (bias.dims.length !== 1) {\n throw new Error("Bias must be 1D");\n }\n if (bias.dims[bias.dims.length - 1] !== hiddenSize) {\n throw new Error("Bias must have the same hidden size as input");\n }\n }\n };\n createSkipLayerNormProgramInfo = (inputs, attributes, outputCount, isTraining) => {\n const simplified = attributes.simplified;\n const inputShape = inputs[0].dims;\n const inputSize = ShapeUtil.size(inputShape);\n const outputShape = inputShape;\n const outputSize = inputSize;\n const hiddenSize = inputShape.slice(-1)[0];\n const meanInvStdDevDim = isTraining ? inputShape.slice(0, -1).concat(1) : [];\n const hasBetaInput = !simplified && inputs.length > 3;\n const hasBiasInput = inputs.length > 4;\n const hasMeanOutput = isTraining && outputCount > 1;\n const hasInvStdDevOutput = isTraining && outputCount > 2;\n const hasInputSkipBiasSumOutput = outputCount > 3;\n const components = getMaxComponents(hiddenSize);\n const programUniforms = [\n { type: 12 /* uint32 */, data: outputSize },\n { type: 12 /* uint32 */, data: components },\n { type: 12 /* uint32 */, data: hiddenSize },\n { type: 1 /* float */, data: attributes.epsilon }\n ];\n const getShaderSource = (shaderHelper) => {\n const uniformsArray = [\n { name: "output_size", type: "u32" },\n { name: "components", type: "u32" },\n { name: "hidden_size", type: "u32" },\n { name: "epsilon", type: "f32" }\n ];\n const variables = [\n inputVariable("x", inputs[0].dataType, inputs[0].dims, components),\n inputVariable("skip", inputs[1].dataType, inputs[1].dims, components),\n inputVariable("gamma", inputs[2].dataType, inputs[2].dims, components)\n ];\n if (hasBetaInput) {\n variables.push(inputVariable("beta", inputs[3].dataType, inputs[3].dims, components));\n }\n if (hasBiasInput) {\n variables.push(inputVariable("bias", inputs[4].dataType, inputs[4].dims, components));\n }\n variables.push(outputVariable("output", inputs[0].dataType, outputShape, components));\n if (hasMeanOutput) {\n variables.push(outputVariable("mean_output", 1 /* float */, meanInvStdDevDim));\n }\n if (hasInvStdDevOutput) {\n variables.push(outputVariable("inv_std_output", 1 /* float */, meanInvStdDevDim));\n }\n if (hasInputSkipBiasSumOutput) {\n variables.push(outputVariable("input_skip_bias_sum", inputs[0].dataType, outputShape, components));\n }\n const dataType = tensorTypeToWsglStorageType(inputs[0].dataType);\n return `\n\n ${shaderHelper.registerUniforms(uniformsArray).declareVariables(...variables)}\n\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size / uniforms.hidden_size")}\n let hidden_size_vectorized: u32 = uniforms.hidden_size / uniforms.components;\n let offset = global_idx * hidden_size_vectorized;\n var sum = ${fillVector("f32", components)};\n var squareSum = ${fillVector("f32", components)};\n for (var i: u32 = 0; i < hidden_size_vectorized; i++) {\n let skip_value = skip[offset + i];\n let bias_value = ${hasBiasInput ? "bias[i]" : dataType + "(0.0)"};\n let input_value = x[offset + i];\n let value = input_value + skip_value + bias_value;\n ${hasInputSkipBiasSumOutput ? "input_skip_bias_sum[offset + i] = value;" : ""}\n output[offset + i] = value;\n let f32_value = ${castToF32(dataType, components, "value")};\n sum += f32_value;\n squareSum += f32_value * f32_value;\n }\n let mean = ${sumVector("sum", components)} / f32(uniforms.hidden_size);\n let inv_std_dev = inverseSqrt(${sumVector("squareSum", components)} / f32(uniforms.hidden_size) ${simplified ? "" : "- mean * mean"} + uniforms.epsilon);\n ${hasMeanOutput ? "mean_output[global_idx] = mean;" : ""}\n ${hasInvStdDevOutput ? "inv_std_output[global_idx] = inv_std_dev;" : ""}\n for (var i: u32 = 0; i < hidden_size_vectorized; i++) {\n output[offset + i] = (output[offset + i] ${simplified ? "" : `- ${dataType}(mean)`}) * ${dataType}(inv_std_dev) * gamma[i] ${hasBetaInput ? "+ beta[i]" : ""};\n }\n }`;\n };\n const outputs = [{ dims: outputShape, dataType: inputs[0].dataType }];\n if (outputCount > 1) {\n outputs.push({ dims: meanInvStdDevDim, dataType: 1 /* float */ });\n }\n if (outputCount > 2) {\n outputs.push({ dims: meanInvStdDevDim, dataType: 1 /* float */ });\n }\n if (outputCount > 3) {\n outputs.push({ dims: inputShape, dataType: inputs[0].dataType });\n }\n return {\n name: "SkipLayerNormalization",\n shaderCache: {\n hint: `${components};${hasMeanOutput};${hasInvStdDevOutput};${hasInputSkipBiasSumOutput}`,\n inputDependencies: inputs.map((_input, _index) => "type")\n },\n getShaderSource,\n getRunData: () => ({ outputs, dispatchGroup: { x: Math.ceil(outputSize / hiddenSize / 64) }, programUniforms })\n };\n };\n skipLayerNorm = (context, attributes) => {\n const isTraining = false;\n validateInputs23(context.inputs);\n const outputs = [0];\n if (context.outputCount > 1) {\n outputs.push(isTraining ? 1 : -3);\n }\n if (context.outputCount > 2) {\n outputs.push(isTraining ? 2 : -3);\n }\n if (context.outputCount > 3) {\n outputs.push(3);\n }\n context.compute(\n createSkipLayerNormProgramInfo(context.inputs, attributes, context.outputCount, isTraining),\n { outputs }\n );\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/slice.ts\n var validateInputs24, readInput, createSliceAttributesFromInputs, fixStartEndValues, calculateInputIndicesImpl, createSliceProgramInfo, slice, parseSliceAttributes;\n var init_slice = __esm({\n "web/lib/wasm/jsep/webgpu/ops/slice.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_attribute_with_cache_key();\n init_common();\n validateInputs24 = (inputs, attributes) => {\n if (!inputs || inputs.length < 1) {\n throw new Error("too few inputs");\n }\n if (attributes.axes.length !== 0) {\n if (attributes.axes.length !== attributes.starts.length || attributes.axes.length !== attributes.ends.length) {\n throw new Error("axes, starts and ends must have the same length");\n }\n } else if (attributes.starts.length !== attributes.ends.length) {\n throw new Error("starts and ends must have the same length");\n }\n inputs.slice(1).forEach((_, idx) => {\n if (inputs[idx + 1].dataType !== 6 /* int32 */ && inputs[idx + 1].dataType !== 7 /* int64 */) {\n throw new Error(`Input ${idx} must be an array of int32 or int64`);\n }\n });\n };\n readInput = (inputs, idx) => {\n const input = [];\n if (inputs.length > idx) {\n if (inputs[idx].dataType === 7 /* int64 */) {\n inputs[idx].getBigInt64Array().forEach((v) => input.push(Number(v)));\n } else if (inputs[idx].dataType === 6 /* int32 */) {\n inputs[idx].getInt32Array().forEach((v) => input.push(Number(v)));\n } else {\n throw new Error(`Input ${idx} must be an array of int32 or int64`);\n }\n }\n return input;\n };\n createSliceAttributesFromInputs = (inputs, attributes) => {\n if (inputs.length > 1) {\n const starts = readInput(inputs, 1);\n const ends = readInput(inputs, 2);\n let axes = readInput(inputs, 3);\n if (axes.length === 0) {\n axes = [...Array(inputs[0].dims.length).keys()];\n }\n return createAttributeWithCacheKey({ starts, ends, axes });\n } else {\n return attributes;\n }\n };\n fixStartEndValues = (value, index, inputShape, axes, steps) => {\n let newValue = value;\n if (value < 0) {\n newValue += inputShape[axes[index]];\n }\n if (steps[index] < 0) {\n return Math.max(0, Math.min(newValue, inputShape[axes[index]] - 1));\n } else {\n return Math.max(0, Math.min(newValue, inputShape[axes[index]]));\n }\n };\n calculateInputIndicesImpl = (input, output, inputShape) => `fn calculateInputIndices(output_indices: ${output.type.indices}) -> ${input.type.indices} {\n var input_indices: ${input.type.indices};\n var carry = 0u;\n for (var i = ${inputShape.length}; i >= 0; i--) {\n let input_shape_i = ${getElementAt("uniforms.input_shape", "i", inputShape.length)};\n let steps_i = ${getElementAt("uniforms.steps", "i", inputShape.length)};\n let signs_i = ${getElementAt("uniforms.signs", "i", inputShape.length)};\n let starts_i = ${getElementAt("uniforms.starts", "i", inputShape.length)};\n var output_index = ${output.indicesGet("output_indices", "i")};\n var input_index = output_index * steps_i + starts_i + carry;\n carry = input_index / input_shape_i;\n input_index = input_index % input_shape_i;\n if (signs_i < 0) {\n input_index = input_shape_i - input_index - 1u + starts_i;\n }\n ${input.indicesSet("input_indices", "i", "input_index")};\n }\n return input_indices;\n }`;\n createSliceProgramInfo = (inputs, attributes) => {\n const inputShape = inputs[0].dims;\n const inputSize = ShapeUtil.size(inputShape);\n const axes = attributes.axes.length > 0 ? ShapeUtil.normalizeAxes(attributes.axes, inputShape.length) : [...Array(inputShape.length).keys()];\n let steps = readInput(inputs, 4);\n steps.forEach((step) => step !== 0 || (() => {\n throw new Error("step cannot be 0");\n }));\n if (steps.length === 0) {\n steps = Array(axes.length).fill(1);\n }\n const starts = attributes.starts.map((start, i) => fixStartEndValues(start, i, inputShape, axes, steps));\n const ends = attributes.ends.map((end, i) => fixStartEndValues(end, i, inputShape, axes, steps));\n if (axes.length !== starts.length || axes.length !== ends.length) {\n throw new Error("start, ends and axes should have the same number of elements");\n }\n if (axes.length !== inputShape.length) {\n for (let i = 0; i < inputShape.length; ++i) {\n if (!axes.includes(i)) {\n starts.splice(i, 0, 0);\n ends.splice(i, 0, inputShape[i]);\n steps.splice(i, 0, 1);\n }\n }\n }\n const signs = steps.map((step) => Math.sign(step));\n steps.forEach((step, i, array) => {\n if (step < 0) {\n const numSteps = (ends[i] - starts[i]) / step;\n const newEnd = starts[i];\n const newStart = newEnd + numSteps * steps[i];\n starts[i] = newStart;\n ends[i] = newEnd;\n array[i] = -step;\n }\n });\n const outputShape = inputShape.slice(0);\n axes.forEach((axis, _) => {\n outputShape[axis] = Math.ceil((ends[axis] - starts[axis]) / steps[axis]);\n });\n const outputTensorInfo = { dims: outputShape, dataType: inputs[0].dataType };\n const output = outputVariable("output", inputs[0].dataType, outputShape.length);\n const input = inputVariable("input", inputs[0].dataType, inputs[0].dims.length);\n const outputSize = ShapeUtil.size(outputShape);\n const uniforms = [\n { name: "outputSize", type: "u32" },\n { name: "starts", type: "u32", length: starts.length },\n { name: "signs", type: "i32", length: signs.length },\n { name: "steps", type: "u32", length: steps.length }\n ];\n const programUniforms = [\n { type: 12 /* uint32 */, data: outputSize },\n { type: 12 /* uint32 */, data: starts },\n { type: 6 /* int32 */, data: signs },\n { type: 12 /* uint32 */, data: steps },\n ...createTensorShapeVariables(inputs[0].dims, outputShape)\n ];\n const getShaderSource = (shaderHelper) => `\n ${shaderHelper.registerUniforms(uniforms).declareVariables(input, output)}\n ${calculateInputIndicesImpl(input, output, inputShape)}\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.outputSize")}\n let output_indices = ${output.offsetToIndices("global_idx")};\n let input_indices = calculateInputIndices(output_indices);\n ${output.setByOffset("global_idx", input.getByIndices("input_indices"))}\n }`;\n return {\n name: "Slice",\n shaderCache: { hint: `${signs.length}_${starts.length}_${steps.length}`, inputDependencies: ["rank"] },\n getShaderSource,\n getRunData: () => ({\n outputs: [outputTensorInfo],\n dispatchGroup: { x: Math.ceil(\n inputSize / 64\n /* workgroup size */\n ) },\n programUniforms\n })\n };\n };\n slice = (context, attributes) => {\n validateInputs24(context.inputs, attributes);\n const updatedAttributes = createSliceAttributesFromInputs(context.inputs, attributes);\n context.compute(createSliceProgramInfo(context.inputs, updatedAttributes), { inputs: [0] });\n };\n parseSliceAttributes = (attributes) => {\n const starts = attributes.starts;\n const ends = attributes.ends;\n const axes = attributes.axes;\n return createAttributeWithCacheKey({ starts, ends, axes });\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/softmax.ts\n var validateInputs25, createSoftmaxProgramInfo, softmax, parseSoftmaxAttributes;\n var init_softmax = __esm({\n "web/lib/wasm/jsep/webgpu/ops/softmax.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_attribute_with_cache_key();\n init_common();\n validateInputs25 = (inputs) => {\n if (!inputs || inputs.length !== 1) {\n throw new Error("Softmax op requires 1 input.");\n }\n };\n createSoftmaxProgramInfo = (input, attributes) => {\n const shape = input.dims;\n const outputSize = ShapeUtil.size(shape);\n const WG = 64;\n let axis = attributes.axis;\n if (axis < 0) {\n axis = shape.length + axis;\n }\n if (axis < shape.length - 1) {\n throw new Error("softmax only supports last axis for now.");\n }\n const cols = shape[axis];\n const rows = outputSize / cols;\n const components = getMaxComponents(cols);\n const packedCols = cols / components;\n const maxVector = (name, components2) => {\n if (components2 === 4) {\n return `max(max(${name}.x, ${name}.y), max(${name}.z, ${name}.w))`;\n } else if (components2 === 2) {\n return `max(${name}.x, ${name}.y)`;\n } else if (components2 === 3) {\n return `max(max(${name}.x, ${name}.y), ${name}.z)`;\n }\n return name;\n };\n const x = inputVariable("x", input.dataType, input.dims, components);\n const output = outputVariable("result", input.dataType, input.dims, components);\n const valueType = x.type.value;\n const threadMaxDecl = tensorTypeToWsglStorageType(input.dataType) === "f32" ? `var threadMax = ${valueType}(-3.402823e+38f);` : `var threadMax = ${valueType}(-65504.0h);`;\n const getShaderSource = (shaderHelper) => `\n var rowMaxShared : ${valueType};\n var rowSumShared : ${valueType};\n var threadShared : array<${valueType}, ${WG}>;\n\n fn getValue(row: i32, col: i32, row_stride: i32) -> ${valueType} {\n let index = row * row_stride + col;\n return x[index];\n }\n\n fn setValue(row: i32, col: i32, row_stride: i32, value: ${valueType}) {\n let index = row * row_stride + col;\n result[index] = value;\n }\n ${shaderHelper.registerUniform("packedCols", "i32").declareVariables(x, output)}\n ${shaderHelper.mainStart()}\n let gindex = i32(global_idx);\n let lindex = i32(local_idx);\n const wg = ${WG};\n let row = gindex / wg;\n let cols = uniforms.packedCols;\n let row_stride : i32 = uniforms.packedCols;\n\n // find the rows max\n ${threadMaxDecl}\n for (var col = lindex; col < cols; col += wg) {\n let value = getValue(row, col, row_stride);\n threadMax = max(threadMax, value);\n }\n if (lindex < cols) {\n threadShared[lindex] = threadMax;\n }\n workgroupBarrier();\n\n var reduceSize = min(cols, wg);\n for (var currSize = reduceSize >> 1; currSize > 0; currSize = reduceSize >> 1) {\n reduceSize = currSize + (reduceSize & 1);\n if (lindex < currSize) {\n threadShared[lindex] = max(threadShared[lindex], threadShared[lindex + reduceSize]);\n }\n workgroupBarrier();\n }\n if (lindex == 0) {\n rowMaxShared = ${valueType}(${maxVector("threadShared[0]", components)});\n }\n workgroupBarrier();\n\n // find the rows sum\n var threadSum = ${valueType}(0.0);\n for (var col = lindex; col < cols; col += wg) {\n let subExp = exp(getValue(row, col, row_stride) - rowMaxShared);\n threadSum += subExp;\n }\n threadShared[lindex] = threadSum;\n workgroupBarrier();\n\n for (var currSize = wg >> 1; currSize > 0; currSize = currSize >> 1) {\n if (lindex < currSize) {\n threadShared[lindex] = threadShared[lindex] + threadShared[lindex + currSize];\n }\n workgroupBarrier();\n }\n if (lindex == 0) {\n rowSumShared = ${valueType}(${sumVector("threadShared[0]", components)});\n }\n workgroupBarrier();\n\n // calculate final value for each element in the row\n for (var col = lindex; col < cols; col += wg) {\n let value = exp(getValue(row, col, row_stride) - rowMaxShared) / rowSumShared;\n setValue(row, col, row_stride, value);\n }\n }`;\n return {\n name: "Softmax",\n shaderCache: { hint: `${components}`, inputDependencies: ["type"] },\n getRunData: () => ({\n outputs: [{ dims: shape, dataType: input.dataType }],\n dispatchGroup: { x: rows },\n programUniforms: [{ type: 6 /* int32 */, data: packedCols }]\n }),\n getShaderSource\n };\n };\n softmax = (context, attributes) => {\n validateInputs25(context.inputs);\n context.compute(createSoftmaxProgramInfo(context.inputs[0], attributes));\n };\n parseSoftmaxAttributes = (attributes) => createAttributeWithCacheKey({ axis: attributes.axis });\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/split.ts\n var validateInputs26, createSplitAttributesFromInputs, calculateOutputIndexImpl, writeBufferDataImpl, createSplitProgramInfo, split, parseSplitAttributes;\n var init_split = __esm({\n "web/lib/wasm/jsep/webgpu/ops/split.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_attribute_with_cache_key();\n init_common();\n validateInputs26 = (inputs) => {\n if (!inputs || inputs.length < 1) {\n throw new Error("too few inputs");\n }\n };\n createSplitAttributesFromInputs = (inputs, attributes) => {\n const splitSizes = [];\n let numOutputs = attributes.numOutputs;\n if (inputs[1].dims[0] > 0) {\n inputs[1].getBigInt64Array().forEach((v) => splitSizes.push(Number(v)));\n numOutputs = splitSizes.length;\n }\n return createAttributeWithCacheKey({ numOutputs, axis: attributes.axis, splitSizes });\n };\n calculateOutputIndexImpl = (numberOfTensors) => `\nfn calculateOutputIndex(index: u32) -> u32 {\n for (var i: u32 = 0u; i < ${numberOfTensors}u; i += 1u ) {\n if (index < ${getElementAt("uniforms.size_in_split_axis", "i", numberOfTensors)}) {\n return i;\n }\n }\n return ${numberOfTensors}u;\n}`;\n writeBufferDataImpl = (outputs) => {\n const numberOfTensors = outputs.length;\n const codeLines = [];\n for (let i = 0; i < numberOfTensors; ++i) {\n const returnSnippet = outputs[i].setByIndices("indices", "input[global_idx]");\n if (numberOfTensors === 1) {\n codeLines.push(returnSnippet);\n } else if (i === 0) {\n codeLines.push(`if (output_number == ${i}u) { ${returnSnippet} }`);\n } else if (i === numberOfTensors - 1) {\n codeLines.push(`else { ${returnSnippet} }`);\n } else {\n codeLines.push(`else if (output_number == ${i}) { ${returnSnippet} }`);\n }\n }\n return `\n fn writeBufferData(output_number: u32, indices: ${outputs[0].type.indices}, global_idx: u32) {\n ${codeLines.join("\\n")}\n }`;\n };\n createSplitProgramInfo = (inputs, attributes) => {\n const inputShape = inputs[0].dims;\n const inputSize = ShapeUtil.size(inputShape);\n const dataType = inputs[0].dataType;\n const axis = ShapeUtil.normalizeAxis(attributes.axis, inputShape.length);\n const outputs = new Array(attributes.numOutputs);\n const input = inputVariable("input", dataType, inputShape.length);\n const sizeInSplitAxis = new Array(attributes.numOutputs);\n const outputsTensorInfo = [];\n const outputShapes = [];\n let previousSum = 0;\n const programUniforms = [{ type: 12 /* uint32 */, data: inputSize }];\n for (let i = 0; i < attributes.numOutputs; i++) {\n previousSum += attributes.splitSizes[i];\n sizeInSplitAxis[i] = previousSum;\n const outputShape = inputShape.slice();\n outputShape[attributes.axis] = attributes.splitSizes[i];\n outputShapes.push(outputShape);\n outputs[i] = outputVariable(`output${i}`, dataType, outputShape.length);\n outputsTensorInfo.push({ dims: outputShapes[i], dataType: inputs[0].dataType });\n }\n programUniforms.push(\n { type: 12 /* uint32 */, data: sizeInSplitAxis },\n ...createTensorShapeVariables(inputShape, ...outputShapes)\n );\n const getShaderSource = (shaderHelper) => `\n ${shaderHelper.registerUniform("input_size", "u32").registerUniform("size_in_split_axis", "u32", sizeInSplitAxis.length).declareVariables(input, ...outputs)}\n ${calculateOutputIndexImpl(sizeInSplitAxis.length)}\n ${writeBufferDataImpl(outputs)}\n\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.input_size")}\n\n var indices = ${input.offsetToIndices("global_idx")};\n var index = ${input.indicesGet("indices", axis)};\n let output_number = calculateOutputIndex(index);\n if (output_number != 0) {\n index -= ${getElementAt("uniforms.size_in_split_axis", "output_number - 1u", sizeInSplitAxis.length)};\n ${input.indicesSet("indices", axis, "index")};\n }\n writeBufferData(output_number, indices, global_idx);\n }`;\n return {\n name: "Split",\n shaderCache: { hint: attributes.cacheKey, inputDependencies: ["rank"] },\n getShaderSource,\n getRunData: () => ({\n outputs: outputsTensorInfo,\n dispatchGroup: { x: Math.ceil(\n inputSize / 64\n /* workgroup size */\n ) },\n programUniforms\n })\n };\n };\n split = (context, attributes) => {\n validateInputs26(context.inputs);\n const updatedAttributes = context.inputs.length === 1 ? attributes : createSplitAttributesFromInputs(context.inputs, attributes);\n context.compute(createSplitProgramInfo(context.inputs, updatedAttributes), { inputs: [0] });\n };\n parseSplitAttributes = (attributes) => {\n const axis = attributes.axis;\n const splitSizes = attributes.splitSizes;\n const numOutputs = attributes.numOutputs < 0 ? splitSizes.length : attributes.numOutputs;\n if (numOutputs !== splitSizes.length) {\n throw new Error("numOutputs and splitSizes lengh must be equal");\n }\n return createAttributeWithCacheKey({ axis, numOutputs, splitSizes });\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/tile.ts\n var getRepeats, validateInputs27, getOutputShape2, createTileProgramInfo, tile;\n var init_tile = __esm({\n "web/lib/wasm/jsep/webgpu/ops/tile.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_common();\n getRepeats = (repeatsTensorView) => Array.from(repeatsTensorView.getBigInt64Array(), Number);\n validateInputs27 = (inputs) => {\n if (!inputs || inputs.length !== 2) {\n throw new Error("Tile requires 2 inputs.");\n }\n if (inputs[0].dataType !== 1 /* float */ && inputs[0].dataType !== 6 /* int32 */ && inputs[0].dataType !== 12 /* uint32 */) {\n throw new Error("Tile only support float, int32, and uint32 data types");\n }\n if (inputs[1].dataType !== 7 /* int64 */) {\n throw new Error("Tile `repeats` input should be of int64 data type");\n }\n if (inputs[1].dims.length !== 1) {\n throw new Error("Tile `repeats` input should be 1-D");\n }\n const repeats = getRepeats(inputs[1]);\n if (repeats.length !== inputs[0].dims.length) {\n throw new Error("Tile `repeats` input should have same number of elements as rank of input data tensor");\n }\n };\n getOutputShape2 = (inputShape, repeats) => {\n const outputShape = [];\n for (let i = 0; i < inputShape.length; ++i) {\n outputShape.push(inputShape[i] * repeats[i]);\n }\n return outputShape;\n };\n createTileProgramInfo = (inputs) => {\n const inputShape = inputs[0].dims;\n const repeats = getRepeats(inputs[1]);\n const outputShape = getOutputShape2(inputShape, repeats);\n const outputSize = ShapeUtil.size(outputShape);\n const dataType = inputs[0].dataType;\n const input = inputVariable("input", dataType, inputShape.length);\n const output = outputVariable("output", dataType, outputShape.length);\n const getShaderSource = (shaderHelper) => `\n const inputShape = ${input.indices(...inputShape)};\n ${shaderHelper.registerUniform("output_size", "u32").declareVariables(input, output)}\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size")}\n let output_indices = ${output.offsetToIndices("global_idx")};\n var input_indices: ${input.type.indices};\n for (var i = 0; i < ${inputShape.length}; i++) {\n let input_dim_i = ${input.indicesGet("uniforms.input_shape", "i")};\n let input_dim_value = ${output.indicesGet("output_indices", "i")} % input_dim_i;\n\n ${input.indicesSet("input_indices", "i", "input_dim_value")}\n }\n ${output.setByOffset("global_idx", input.getByIndices("input_indices"))}\n }`;\n return {\n name: "Tile",\n shaderCache: { hint: `${repeats}`, inputDependencies: ["rank"] },\n getRunData: () => ({\n outputs: [{ dims: outputShape, dataType: inputs[0].dataType }],\n dispatchGroup: { x: Math.ceil(\n outputSize / 64\n /* workgroup size */\n ) },\n programUniforms: [{ type: 12 /* uint32 */, data: outputSize }, ...createTensorShapeVariables(inputs[0].dims, outputShape)]\n }),\n getShaderSource\n };\n };\n tile = (context) => {\n validateInputs27(context.inputs);\n context.compute(createTileProgramInfo(context.inputs), { inputs: [0] });\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/ops/where.ts\n var createWhereOpProgramShader, createWhereOpProgramInfo, where;\n var init_where = __esm({\n "web/lib/wasm/jsep/webgpu/ops/where.ts"() {\n "use strict";\n init_wasm_common();\n init_util();\n init_common();\n createWhereOpProgramShader = (shaderHelper, inputs, dimsOutput, isBroadcast, typeOutput) => {\n const output = outputVariable("output_data", typeOutput, dimsOutput.length, 4);\n const a = inputVariable("a_data", inputs[1].dataType, inputs[1].dims.length, 4);\n const b = inputVariable("b_data", inputs[2].dataType, inputs[2].dims.length, 4);\n const c = inputVariable("c_data", inputs[0].dataType, inputs[0].dims.length, 4);\n let assignment;\n const expression = (a2, b2, c2) => `select(${b2}, ${a2}, ${c2})`;\n if (!isBroadcast) {\n assignment = output.setByOffset(\n "global_idx",\n expression(a.getByOffset("global_idx"), b.getByOffset("global_idx"), c.getByOffset("global_idx"))\n );\n } else {\n const singleAssignment = (resStr, x, typeCast = "") => {\n const expressionA = `a_data[index_a${x}][component_a${x}]`;\n const expressionB = `b_data[index_b${x}][component_b${x}]`;\n const expressionC = `bool(c_data[index_c${x}] & (0xffu << (component_c${x} * 8)))`;\n return `\n let output_indices${x} = ${output.offsetToIndices(`global_idx * 4u + ${x}u`)};\n let offset_a${x} = ${a.broadcastedIndicesToOffset(`output_indices${x}`, output)};\n let offset_b${x} = ${b.broadcastedIndicesToOffset(`output_indices${x}`, output)};\n let offset_c${x} = ${c.broadcastedIndicesToOffset(`output_indices${x}`, output)};\n let index_a${x} = offset_a${x} / 4u;\n let index_b${x} = offset_b${x} / 4u;\n let index_c${x} = offset_c${x} / 4u;\n let component_a${x} = offset_a${x} % 4u;\n let component_b${x} = offset_b${x} % 4u;\n let component_c${x} = offset_c${x} % 4u;\n ${resStr}[${x}] = ${typeCast}(${expression(expressionA, expressionB, expressionC)});\n `;\n };\n if (typeOutput === 9 /* bool */) {\n assignment = `\n var data = vec4(0);\n ${singleAssignment("data", 0, "u32")}\n ${singleAssignment("data", 1, "u32")}\n ${singleAssignment("data", 2, "u32")}\n ${singleAssignment("data", 3, "u32")}\n output_data[global_idx] = dot(vec4(0x1, 0x100, 0x10000, 0x1000000), vec4(data));`;\n } else {\n assignment = `\n ${singleAssignment("output_data[global_idx]", 0)}\n ${singleAssignment("output_data[global_idx]", 1)}\n ${singleAssignment("output_data[global_idx]", 2)}\n ${singleAssignment("output_data[global_idx]", 3)}\n `;\n }\n }\n return `\n ${shaderHelper.registerUniform("vec_size", "u32").declareVariables(c, a, b, output)}\n ${shaderHelper.mainStart()}\n ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes("uniforms.vec_size")}\n ${assignment}\n }`;\n };\n createWhereOpProgramInfo = (inputs) => {\n const dimsA = inputs[1].dims;\n const dimsB = inputs[2].dims;\n const dimsC = inputs[0].dims;\n const outputDataType = inputs[1].dataType;\n const isBroadcast = !(ShapeUtil.areEqual(dimsA, dimsB) && ShapeUtil.areEqual(dimsB, dimsC));\n let outputShape = dimsA;\n let outputSize = ShapeUtil.size(dimsA);\n if (isBroadcast) {\n const calculatedShape = BroadcastUtil.calcShape(BroadcastUtil.calcShape(dimsA, dimsB, false), dimsC, false);\n if (!calculatedShape) {\n throw new Error("Can\'t perform where op on the given tensors");\n }\n outputShape = calculatedShape;\n outputSize = ShapeUtil.size(outputShape);\n }\n const vecSize = Math.ceil(outputSize / 4);\n return {\n name: "Where",\n shaderCache: { inputDependencies: ["rank", "rank", "rank"] },\n getShaderSource: (shaderHelper) => createWhereOpProgramShader(shaderHelper, inputs, outputShape, isBroadcast, outputDataType),\n getRunData: () => ({\n outputs: [{ dims: outputShape, dataType: outputDataType }],\n dispatchGroup: { x: Math.ceil(\n outputSize / 64 / 4\n /* vec size */\n ) },\n programUniforms: [{ type: 12 /* uint32 */, data: vecSize }, ...createTensorShapeVariables(dimsC, dimsA, dimsB, outputShape)]\n })\n };\n };\n where = (context) => {\n context.compute(createWhereOpProgramInfo(context.inputs));\n };\n }\n });\n\n // web/lib/wasm/jsep/webgpu/op-resolve-rules.ts\n var WEBGPU_OP_RESOLVE_RULES;\n var init_op_resolve_rules = __esm({\n "web/lib/wasm/jsep/webgpu/op-resolve-rules.ts"() {\n "use strict";\n init_argminmax();\n init_attention();\n init_batch_norm();\n init_bias_add();\n init_bias_split_gelu();\n init_binary_op();\n init_concat();\n init_conv();\n init_conv_transpose();\n init_cumsum();\n init_depth_to_space();\n init_einsum();\n init_expand();\n init_fast_gelu();\n init_gather();\n init_gather_elements();\n init_gemm();\n init_instance_norm();\n init_layer_norm();\n init_matmul();\n init_matmulnbits();\n init_multihead_attentiion();\n init_pad();\n init_pool();\n init_range();\n init_reduce();\n init_resize();\n init_rotary_embedding();\n init_skip_layer_norm();\n init_slice();\n init_softmax();\n init_split();\n init_tile();\n init_transpose();\n init_unary_op();\n init_where();\n WEBGPU_OP_RESOLVE_RULES = /* @__PURE__ */ new Map([\n ["Abs", [abs]],\n ["Acos", [acos]],\n ["Acosh", [acosh]],\n ["Add", [add]],\n ["ArgMax", [argMax, parseArgMinMaxAttributes]],\n ["ArgMin", [argMin, parseArgMinMaxAttributes]],\n ["Asin", [asin]],\n ["Asinh", [asinh]],\n ["Atan", [atan]],\n ["Atanh", [atanh]],\n ["Attention", [attention]],\n // TODO: support new attributes for AveragePool-10\n ["AveragePool", [averagePool, parseAveragePoolAttributes]],\n ["BatchNormalization", [batchNorm]],\n ["BiasAdd", [biasAdd]],\n ["BiasSplitGelu", [biasSplitGelu]],\n ["Cast", [cast, parseCastAttributes]],\n ["Ceil", [ceil]],\n ["Clip", [clip]],\n ["Concat", [concat, parseConcatAttributes]],\n ["Conv", [conv, parseConvAttributes]],\n ["ConvTranspose", [convTranspose, parseConvTransposeAttributes]],\n ["Cos", [cos]],\n ["Cosh", [cosh]],\n ["CumSum", [cumsum, parseCumSumAttributes]],\n ["DepthToSpace", [depthToSpace, parseDepthToSpaceAttributes]],\n ["Div", [div]],\n ["Einsum", [einsum, parseEinsumAttributes]],\n ["Elu", [elu, parseAlphaAttributes]],\n ["Equal", [equal]],\n ["Erf", [erf]],\n ["Exp", [exp]],\n ["Expand", [expand]],\n ["FastGelu", [fastGelu2]],\n ["Floor", [floor]],\n ["FusedConv", [conv, parseConvAttributes]],\n ["Gather", [gather, parseGatherAttributes]],\n ["GatherElements", [gatherElements, parseGatherElementsAttributes]],\n ["Gelu", [gelu]],\n ["Gemm", [gemm, parseGemmAttributes]],\n ["GlobalAveragePool", [globalAveragePool, parseGlobalAveragePoolAttributes]],\n ["GlobalMaxPool", [globalMaxPool, parseGlobalMaxPoolAttributes]],\n ["Greater", [greater]],\n ["GreaterOrEqual", [greaterOrEqual]],\n ["HardSigmoid", [hardSigmoid, parseHardSigmoidAttributes]],\n ["InstanceNormalization", [instanceNorm]],\n ["LayerNormalization", [layerNorm]],\n ["LeakyRelu", [leakyRelu, parseAlphaAttributes]],\n ["Less", [less]],\n ["LessOrEqual", [lessOrEqual]],\n ["Log", [log]],\n ["MatMul", [matMul]],\n ["MatMulNBits", [matMulNBits, parseMatMulNBitsAttributes]],\n // TODO: support new attributes for MaxPool-8 and MaxPool-10\n ["MaxPool", [maxPool, parseMaxPoolAttributes]],\n ["Mul", [mul]],\n ["MultiHeadAttention", [multiHeadAttention, parseMultiHeadAttentionAttributes]],\n ["Neg", [neg]],\n ["Not", [not]],\n ["Pad", [pad]],\n ["Pow", [pow]],\n ["Range", [range]],\n ["Reciprocal", [reciprocal]],\n ["ReduceMin", [reduceMin]],\n ["ReduceMean", [reduceMean]],\n ["ReduceMax", [reduceMax]],\n ["ReduceSum", [reduceSum]],\n ["ReduceProd", [reduceProd]],\n ["ReduceL1", [reduceL1]],\n ["ReduceL2", [reduceL2]],\n ["ReduceLogSum", [reduceLogSum]],\n ["ReduceLogSumExp", [reduceLogSumExp]],\n ["ReduceSumSquare", [reduceSumSquare]],\n ["Relu", [relu]],\n ["Resize", [resize, parseResizeAttributes]],\n ["RotaryEmbedding", [rotaryEmbedding]],\n ["Sigmoid", [sigmoid]],\n ["Sin", [sin]],\n ["Sinh", [sinh]],\n ["Slice", [slice, parseSliceAttributes]],\n ["SkipLayerNormalization", [skipLayerNorm]],\n ["Split", [split, parseSplitAttributes]],\n ["Sqrt", [sqrt]],\n ["Softmax", [softmax, parseSoftmaxAttributes]],\n ["Sub", [sub]],\n ["Tan", [tan]],\n ["Tanh", [tanh]],\n ["ThresholdedRelu", [thresholdedRelu, parseAlphaAttributes]],\n ["Tile", [tile]],\n ["Transpose", [transpose, parseTransposeAttributes]],\n ["Where", [where]]\n ]);\n }\n });\n\n // web/lib/wasm/jsep/webgpu/program-manager.ts\n var ProgramManager;\n var init_program_manager = __esm({\n "web/lib/wasm/jsep/webgpu/program-manager.ts"() {\n "use strict";\n init_esm();\n init_log();\n init_common();\n ProgramManager = class {\n constructor(backend) {\n this.backend = backend;\n this.repo = /* @__PURE__ */ new Map();\n this.attributesBound = false;\n }\n getArtifact(key) {\n return this.repo.get(key);\n }\n setArtifact(key, artifact) {\n this.repo.set(key, artifact);\n }\n run(buildArtifact, inputs, outputs, dispatchGroup, uniformBufferBinding) {\n TRACE_FUNC_BEGIN(buildArtifact.programInfo.name);\n const device = this.backend.device;\n const computePassEncoder = this.backend.getComputePassEncoder();\n this.backend.writeTimestamp(this.backend.pendingDispatchNumber * 2);\n const entries = [];\n for (const input of inputs) {\n entries.push({ binding: entries.length, resource: { buffer: input.buffer } });\n }\n for (const output of outputs) {\n entries.push({ binding: entries.length, resource: { buffer: output.buffer } });\n }\n if (uniformBufferBinding) {\n entries.push({ binding: entries.length, resource: uniformBufferBinding });\n }\n const bindGroup = device.createBindGroup(\n { layout: buildArtifact.computePipeline.getBindGroupLayout(0), entries, label: buildArtifact.programInfo.name }\n );\n if (this.backend.sessionStatus === "capturing") {\n const commandInfo = {\n kernelId: this.backend.currentKernelId,\n computePipeline: buildArtifact.computePipeline,\n bindGroup,\n dispatchGroup\n };\n const sessionCommandList = this.backend.capturedCommandList.get(this.backend.currentSessionId);\n sessionCommandList.push(commandInfo);\n }\n computePassEncoder.setPipeline(buildArtifact.computePipeline);\n computePassEncoder.setBindGroup(0, bindGroup);\n computePassEncoder.dispatchWorkgroups(...dispatchGroup);\n this.backend.writeTimestamp(this.backend.pendingDispatchNumber * 2 + 1);\n this.backend.pendingDispatchNumber++;\n if (this.backend.pendingDispatchNumber >= this.backend.maxDispatchNumber || this.backend.queryType === "at-passes") {\n this.backend.endComputePass();\n }\n if (this.backend.pendingDispatchNumber >= this.backend.maxDispatchNumber) {\n this.backend.flush();\n }\n TRACE_FUNC_END(buildArtifact.programInfo.name);\n }\n dispose() {\n }\n build(programInfo, normalizedDispatchGroupSize) {\n TRACE_FUNC_BEGIN(programInfo.name);\n const device = this.backend.device;\n const extensions = [];\n if (device.features.has("shader-f16")) {\n extensions.push("enable f16;");\n }\n const shaderHelper = createShaderHelper(normalizedDispatchGroupSize, this.backend.device.limits);\n const userCode = programInfo.getShaderSource(shaderHelper);\n const code = `${extensions.join("\\n")}\n${shaderHelper.additionalImplementations}\n${userCode}`;\n const shaderModule = device.createShaderModule({ code, label: programInfo.name });\n LOG_DEBUG("verbose", () => `[WebGPU] ${programInfo.name} shader code: ${code}`);\n const computePipeline = device.createComputePipeline(\n { compute: { module: shaderModule, entryPoint: "main" }, layout: "auto", label: programInfo.name }\n );\n TRACE_FUNC_END(programInfo.name);\n return { programInfo, computePipeline, uniformVariablesInfo: shaderHelper.variablesInfo };\n }\n normalizeDispatchGroupSize(dispatchGroup) {\n const x = typeof dispatchGroup === "number" ? dispatchGroup : dispatchGroup.x;\n const y = typeof dispatchGroup === "number" ? 1 : dispatchGroup.y || 1;\n const z = typeof dispatchGroup === "number" ? 1 : dispatchGroup.z || 1;\n const limitPerDimension = this.backend.device.limits.maxComputeWorkgroupsPerDimension;\n if (x <= limitPerDimension && y <= limitPerDimension && z <= limitPerDimension) {\n return [x, y, z];\n }\n const size = x * y * z;\n let dispatchAverage = Math.ceil(Math.sqrt(size));\n if (dispatchAverage > limitPerDimension) {\n dispatchAverage = Math.ceil(Math.cbrt(size));\n if (dispatchAverage > limitPerDimension) {\n throw new Error("Total dispatch size exceeds WebGPU maximum.");\n }\n return [dispatchAverage, dispatchAverage, dispatchAverage];\n } else {\n return [dispatchAverage, dispatchAverage, 1];\n }\n }\n };\n }\n });\n\n // web/lib/wasm/jsep/backend-webgpu.ts\n var getProgramInputTensorInfoDependencyKey, getProgramInfoUniqueKey, AdapterInfoImpl, WebGpuBackend;\n var init_backend_webgpu = __esm({\n "web/lib/wasm/jsep/backend-webgpu.ts"() {\n "use strict";\n init_esm();\n init_wasm_common();\n init_log();\n init_tensor_view();\n init_gpu_data_manager();\n init_op_resolve_rules();\n init_program_manager();\n getProgramInputTensorInfoDependencyKey = (inputTensors, inputDependencies) => {\n if (inputDependencies.length !== inputTensors.length) {\n throw new Error(`inputDependencies length ${inputDependencies.length} is not equal to inputTensors length ${inputTensors.length}.`);\n }\n const inputInfos = [];\n for (let i = 0; i < inputTensors.length; ++i) {\n const type = inputTensors[i].dataType;\n switch (inputDependencies[i]) {\n case "none": {\n inputInfos.push("");\n break;\n }\n case "type": {\n inputInfos.push(`${type}`);\n break;\n }\n case "rank": {\n const rank = inputTensors[i].dims.length;\n inputInfos.push(`${type};${rank}`);\n break;\n }\n case "dims": {\n const dims = inputTensors[i].dims.join(",");\n inputInfos.push(`${type};${dims}`);\n break;\n }\n default:\n throw new Error(`unsupported input dependency: ${inputDependencies[i]}`);\n }\n }\n return inputInfos.join("|");\n };\n getProgramInfoUniqueKey = (programInfo, inputTensors, is1DimensionDispatch) => {\n let key = programInfo.name;\n if (programInfo.shaderCache?.hint) {\n key += "[" + programInfo.shaderCache.hint + "]";\n }\n key += ":" + is1DimensionDispatch + `:${getProgramInputTensorInfoDependencyKey(\n inputTensors,\n programInfo.shaderCache?.inputDependencies ?? new Array(inputTensors.length).fill("dims")\n )}`;\n return key;\n };\n AdapterInfoImpl = class {\n constructor(adapterInfo) {\n if (adapterInfo) {\n this.architecture = adapterInfo.architecture;\n this.vendor = adapterInfo.vendor;\n }\n }\n isArchitecture(architecture) {\n return this.architecture === architecture;\n }\n isVendor(vendor) {\n return this.vendor === vendor;\n }\n };\n WebGpuBackend = class {\n constructor() {\n /**\n * representing the session ID of which is currently being run.\n * `null` means no session is being run.\n * only valid when session.run is executed.\n */\n this.currentSessionId = null;\n /**\n * representing the kernel ID of which is currently being computed (CPU code perspective).\n * `null` means no kernel is being computed.\n * only one kernel can be computed at a moment.\n */\n this.currentKernelId = null;\n this.commandEncoder = null;\n this.computePassEncoder = null;\n this.maxDispatchNumber = 16;\n this.pendingDispatchNumber = 0;\n // info of kernels pending submission for a single batch\n this.pendingKernels = [];\n // queryReadBuffer -> pendingKernels mapping for all the batches\n this.pendingQueries = /* @__PURE__ */ new Map();\n this.sessionStatus = "default";\n /**\n * a SessionID -> CommandInfo[] mapping. It\'s used to record all GPU commands for corresponding session.\n */\n this.capturedCommandList = /* @__PURE__ */ new Map();\n /**\n * a SessionID -> PendingKernelInfo[] mapping for profiling.\n */\n this.capturedPendingKernels = /* @__PURE__ */ new Map();\n /**\n * a SessionID -> a Map of (InputOutputIndex -> [ID, GPUBuffer]) mapping.\n */\n this.sessionExternalDataMapping = /* @__PURE__ */ new Map();\n }\n /**\n * get the custom data of the current kernel\n */\n get currentKernelCustomData() {\n if (this.currentKernelId === null) {\n throw new Error("currentKernelCustomData(): currentKernelId is null. (should not happen)");\n }\n let data = this.kernelCustomData.get(this.currentKernelId);\n if (!data) {\n data = {};\n this.kernelCustomData.set(this.currentKernelId, data);\n }\n return data;\n }\n async initialize(env3, adapter) {\n this.env = env3;\n const requiredFeatures = [];\n const deviceDescriptor = {\n requiredLimits: {\n maxComputeWorkgroupStorageSize: adapter.limits.maxComputeWorkgroupStorageSize,\n maxComputeWorkgroupsPerDimension: adapter.limits.maxComputeWorkgroupsPerDimension,\n maxStorageBufferBindingSize: adapter.limits.maxStorageBufferBindingSize,\n maxBufferSize: adapter.limits.maxBufferSize,\n maxComputeInvocationsPerWorkgroup: adapter.limits.maxComputeInvocationsPerWorkgroup,\n maxComputeWorkgroupSizeX: adapter.limits.maxComputeWorkgroupSizeX,\n maxComputeWorkgroupSizeY: adapter.limits.maxComputeWorkgroupSizeY,\n maxComputeWorkgroupSizeZ: adapter.limits.maxComputeWorkgroupSizeZ\n },\n requiredFeatures\n };\n if (adapter.features.has("chromium-experimental-timestamp-query-inside-passes")) {\n requiredFeatures.push("chromium-experimental-timestamp-query-inside-passes");\n } else if (adapter.features.has("timestamp-query")) {\n requiredFeatures.push("timestamp-query");\n }\n if (adapter.features.has("shader-f16")) {\n requiredFeatures.push("shader-f16");\n }\n this.device = await adapter.requestDevice(deviceDescriptor);\n this.adapterInfo = new AdapterInfoImpl(await adapter.requestAdapterInfo());\n this.gpuDataManager = createGpuDataManager(this);\n this.programManager = new ProgramManager(this);\n this.kernels = /* @__PURE__ */ new Map();\n this.kernelPersistentData = /* @__PURE__ */ new Map();\n this.kernelCustomData = /* @__PURE__ */ new Map();\n configureLogger(env3.logLevel, !!env3.debug);\n this.device.onuncapturederror = (ev) => {\n if (ev.error instanceof GPUValidationError) {\n console.error(`An uncaught WebGPU validation error was raised: ${ev.error.message}`);\n }\n };\n Object.defineProperty(\n this.env.webgpu,\n "device",\n { value: this.device, writable: false, enumerable: true, configurable: false }\n );\n Object.defineProperty(\n this.env.webgpu,\n "adapter",\n { value: adapter, writable: false, enumerable: true, configurable: false }\n );\n this.setQueryType();\n }\n dispose() {\n if (typeof this.querySet !== "undefined") {\n this.querySet.destroy();\n }\n this.gpuDataManager.dispose();\n }\n getCommandEncoder() {\n if (!this.commandEncoder) {\n this.commandEncoder = this.device.createCommandEncoder();\n }\n return this.commandEncoder;\n }\n getComputePassEncoder() {\n if (!this.computePassEncoder) {\n const commandEncoder = this.getCommandEncoder();\n const computePassDescriptor = {};\n if (this.queryType === "at-passes") {\n computePassDescriptor.timestampWrites = {\n querySet: this.querySet,\n beginningOfPassWriteIndex: this.pendingDispatchNumber * 2,\n endOfPassWriteIndex: this.pendingDispatchNumber * 2 + 1\n };\n }\n this.computePassEncoder = commandEncoder.beginComputePass(computePassDescriptor);\n }\n return this.computePassEncoder;\n }\n endComputePass() {\n if (this.computePassEncoder) {\n this.computePassEncoder.end();\n this.computePassEncoder = null;\n }\n }\n flush() {\n if (!this.commandEncoder) {\n return;\n }\n TRACE_FUNC_BEGIN();\n this.endComputePass();\n let queryReadBuffer;\n if (this.queryType !== "none") {\n this.commandEncoder.resolveQuerySet(\n this.querySet,\n 0,\n this.pendingDispatchNumber * 2,\n this.queryResolveBuffer,\n 0\n );\n queryReadBuffer = this.device.createBuffer(\n // eslint-disable-next-line no-bitwise\n { size: this.pendingDispatchNumber * 2 * 8, usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST }\n );\n this.pendingQueries.set(queryReadBuffer, this.pendingKernels);\n this.pendingKernels = [];\n this.commandEncoder.copyBufferToBuffer(\n this.queryResolveBuffer,\n 0,\n queryReadBuffer,\n 0,\n this.pendingDispatchNumber * 2 * 8\n );\n }\n this.device.queue.submit([this.commandEncoder.finish()]);\n this.gpuDataManager.refreshPendingBuffers();\n this.commandEncoder = null;\n this.pendingDispatchNumber = 0;\n if (this.queryType !== "none") {\n void queryReadBuffer.mapAsync(GPUMapMode.READ).then(() => {\n const mappedData = new BigUint64Array(queryReadBuffer.getMappedRange());\n const pendingKernels = this.pendingQueries.get(queryReadBuffer);\n for (let i = 0; i < mappedData.length / 2; i++) {\n const pendingKernelInfo = pendingKernels[i];\n const kernelId = pendingKernelInfo.kernelId;\n const kernelInfo = this.kernels.get(kernelId);\n const kernelType = kernelInfo.kernelType;\n const kernelName = kernelInfo.kernelName;\n const programName = pendingKernelInfo.programName;\n const inputTensorViews = pendingKernelInfo.inputTensorViews;\n const outputTensorViews = pendingKernelInfo.outputTensorViews;\n const startTimeU64 = mappedData[i * 2];\n const endTimeU64 = mappedData[i * 2 + 1];\n if (typeof this.queryTimeBase === "undefined") {\n this.queryTimeBase = startTimeU64;\n }\n const startTime = Number(startTimeU64 - this.queryTimeBase);\n const endTime = Number(endTimeU64 - this.queryTimeBase);\n if (!Number.isSafeInteger(startTime) || !Number.isSafeInteger(endTime)) {\n throw new RangeError("incorrect timestamp range");\n }\n if (this.env.webgpu.profiling?.ondata) {\n this.env.webgpu.profiling.ondata({\n version: 1,\n inputsMetadata: inputTensorViews.map(\n (value) => ({ dims: value.dims, dataType: tensorDataTypeEnumToString(value.dataType) })\n ),\n outputsMetadata: outputTensorViews.map(\n (value) => ({ dims: value.dims, dataType: tensorDataTypeEnumToString(value.dataType) })\n ),\n kernelId,\n kernelType,\n kernelName,\n programName,\n startTime,\n endTime\n });\n } else {\n let inputShapes = "";\n inputTensorViews.forEach((value, i2) => {\n inputShapes += `input[${i2}]: [${value.dims}] | ${tensorDataTypeEnumToString(value.dataType)}, `;\n });\n let outputShapes = "";\n outputTensorViews.forEach((value, i2) => {\n outputShapes += `output[${i2}]: [${value.dims}] | ${tensorDataTypeEnumToString(value.dataType)}, `;\n });\n console.log(`[profiling] kernel "${kernelId}|${kernelType}|${kernelName}|${programName}" ${inputShapes}${outputShapes}execution time: ${endTime - startTime} ns`);\n }\n TRACE("GPU", `${programName}::${startTimeU64}::${endTimeU64}`);\n }\n queryReadBuffer.unmap();\n this.pendingQueries.delete(queryReadBuffer);\n });\n }\n TRACE_FUNC_END();\n }\n /**\n * run a WebGPU program.\n * @param program a ProgramInfo instance\n * @param inputTensorViews a TensorView array. each element represents a value already exists in GPU.\n * @param outputIndices an indices array. each element can be either -1 (temporary data), -2 (persistent data) or an\n * index to the kernel\'s output.\n * @param createKernelOutput a callback function that create a value to kernel\'s output with the given index\n * @param createIntermediateOutput a callback function that create a value as a intermediate value, either temporary\n * or persistent (owned by the current kernel)\n * @returns a TensorView array representing the result.\n */\n run(program, inputTensorViews, outputIndices, createKernelOutput, createIntermediateOutput, outputCount) {\n TRACE_FUNC_BEGIN(program.name);\n const inputDatas = [];\n for (let i = 0; i < inputTensorViews.length; ++i) {\n const data = inputTensorViews[i].data;\n if (data === 0) {\n continue;\n }\n const gpuData = this.gpuDataManager.get(data);\n if (!gpuData) {\n throw new Error(`no GPU data for input: ${data}`);\n }\n inputDatas.push(gpuData);\n }\n const { outputs, dispatchGroup, programUniforms } = program.getRunData(inputTensorViews);\n const validatedOutputIndices = outputIndices.length === 0 ? outputs.map((_, i) => i) : outputIndices;\n if (validatedOutputIndices.length !== outputs.length) {\n throw new Error(`Output size ${validatedOutputIndices.length} must be equal to ${outputs.length}.`);\n }\n const outputTensorViews = [];\n const outputDatas = [];\n for (let i = 0; i < outputs.length; ++i) {\n if (!Number.isInteger(validatedOutputIndices[i]) || validatedOutputIndices[i] < -3 || validatedOutputIndices[i] >= outputCount) {\n throw new Error(`Invalid output index: ${validatedOutputIndices[i]}`);\n }\n if (validatedOutputIndices[i] === -3) {\n continue;\n }\n const isTemporary = validatedOutputIndices[i] === -1;\n const isPersistent = validatedOutputIndices[i] === -2;\n const tensorView = isTemporary || isPersistent ? createIntermediateOutput(outputs[i].dataType, outputs[i].dims) : createKernelOutput(validatedOutputIndices[i], outputs[i].dataType, outputs[i].dims);\n outputTensorViews.push(tensorView);\n if (tensorView.data === 0) {\n continue;\n }\n const gpuData = this.gpuDataManager.get(tensorView.data);\n if (!gpuData) {\n throw new Error(`no GPU data for output: ${tensorView.data}`);\n }\n if (isTemporary) {\n this.temporaryData.push(gpuData);\n }\n if (isPersistent) {\n let persistentData = this.kernelPersistentData.get(this.currentKernelId);\n if (!persistentData) {\n persistentData = [];\n this.kernelPersistentData.set(this.currentKernelId, persistentData);\n }\n persistentData.push(gpuData);\n }\n outputDatas.push(gpuData);\n }\n if (inputDatas.length !== inputTensorViews.length || outputDatas.length !== outputTensorViews.length) {\n if (outputDatas.length === 0) {\n TRACE_FUNC_END(program.name);\n return outputTensorViews;\n }\n throw new Error(\n `Program ${program.name} has zero-sized tensor(s) in inputs or outputs. This is not supported now.`\n );\n }\n let uniformBufferBinding;\n if (programUniforms) {\n let currentOffset = 0;\n const offsets = [];\n programUniforms.forEach((v) => {\n const data = typeof v.data === "number" ? [v.data] : v.data;\n if (data.length === 0) {\n return;\n }\n const sizeOfElement = v.type === 10 /* float16 */ ? 2 : 4;\n let sizeOfVecOrMat;\n let baseAlignment;\n if (v.type === 10 /* float16 */) {\n baseAlignment = data.length > 4 ? 16 : data.length > 2 ? 8 : data.length * sizeOfElement;\n sizeOfVecOrMat = data.length > 4 ? 16 : sizeOfElement * data.length;\n } else {\n baseAlignment = data.length <= 2 ? data.length * sizeOfElement : 16;\n sizeOfVecOrMat = 16;\n }\n currentOffset = Math.ceil(currentOffset / baseAlignment) * baseAlignment;\n offsets.push(currentOffset);\n const elementPerVecOrMat = v.type === 10 /* float16 */ ? 8 : 4;\n currentOffset += data.length > 4 ? Math.ceil(data.length / elementPerVecOrMat) * sizeOfVecOrMat : data.length * sizeOfElement;\n });\n const maxAlignmentOfField = 16;\n currentOffset = Math.ceil(currentOffset / maxAlignmentOfField) * maxAlignmentOfField;\n const arrayBuffer = new ArrayBuffer(currentOffset);\n programUniforms.forEach((v, i) => {\n const offset = offsets[i];\n const data = typeof v.data === "number" ? [v.data] : v.data;\n if (v.type === 6 /* int32 */) {\n new Int32Array(arrayBuffer, offset, data.length).set(data);\n } else if (v.type === 12 /* uint32 */) {\n new Uint32Array(arrayBuffer, offset, data.length).set(data);\n } else if (v.type === 10 /* float16 */) {\n new Uint16Array(arrayBuffer, offset, data.length).set(data);\n } else if (v.type === 1 /* float */) {\n new Float32Array(arrayBuffer, offset, data.length).set(data);\n } else {\n throw new Error(`Unsupported uniform type: ${tensorDataTypeEnumToString(v.type)}`);\n }\n });\n const uniformBufferData = (\n // eslint-disable-next-line no-bitwise\n this.gpuDataManager.create(currentOffset, GPUBufferUsage.COPY_DST | GPUBufferUsage.UNIFORM)\n );\n this.device.queue.writeBuffer(uniformBufferData.buffer, 0, arrayBuffer, 0, currentOffset);\n this.gpuDataManager.release(uniformBufferData.id);\n uniformBufferBinding = { offset: 0, size: currentOffset, buffer: uniformBufferData.buffer };\n }\n const normalizedDispatchGroup = this.programManager.normalizeDispatchGroupSize(dispatchGroup);\n const is1DimensionDispatch = normalizedDispatchGroup[1] === 1 && normalizedDispatchGroup[2] === 1;\n const key = getProgramInfoUniqueKey(program, inputTensorViews, is1DimensionDispatch);\n let artifact = this.programManager.getArtifact(key);\n if (!artifact) {\n artifact = this.programManager.build(program, normalizedDispatchGroup);\n this.programManager.setArtifact(key, artifact);\n LOG_DEBUG("info", () => `[artifact] key: ${key}, programName: ${program.name}`);\n }\n if (programUniforms && artifact.uniformVariablesInfo) {\n if (programUniforms.length !== artifact.uniformVariablesInfo.length) {\n throw new Error(`Uniform variables count mismatch: expect ${artifact.uniformVariablesInfo.length}, got ${programUniforms.length} in program "${artifact.programInfo.name}".`);\n }\n for (let i = 0; i < programUniforms.length; i++) {\n const uniform = programUniforms[i];\n const actualType = uniform.type;\n const actualLength = typeof uniform.data === "number" ? 1 : uniform.data.length;\n const [type, length] = artifact.uniformVariablesInfo[i];\n if (actualType !== type || actualLength !== length) {\n throw new Error(`Uniform variable ${i} mismatch: expect type ${type} with size ${length}, got type ${actualType} with size ${actualLength} in program "${artifact.programInfo.name}".`);\n }\n }\n }\n LOG_DEBUG(\n "info",\n () => `[ProgramManager] run "${program.name}" (key=${key}) with ${normalizedDispatchGroup[0]}x${normalizedDispatchGroup[1]}x${normalizedDispatchGroup[2]}`\n );\n if (this.queryType !== "none" || this.sessionStatus === "capturing") {\n const pendingKernelInfo = {\n kernelId: this.currentKernelId,\n programName: artifact.programInfo.name,\n inputTensorViews,\n outputTensorViews\n };\n this.pendingKernels.push(pendingKernelInfo);\n if (this.sessionStatus === "capturing") {\n const sessionPendingKernels = this.capturedPendingKernels.get(this.currentSessionId);\n sessionPendingKernels.push(pendingKernelInfo);\n }\n }\n this.programManager.run(artifact, inputDatas, outputDatas, normalizedDispatchGroup, uniformBufferBinding);\n TRACE_FUNC_END(program.name);\n return outputTensorViews;\n }\n upload(gpuDataId, data) {\n this.gpuDataManager.upload(gpuDataId, data);\n }\n memcpy(src, dst) {\n this.gpuDataManager.memcpy(src, dst);\n }\n async download(gpuDataId, getTargetBuffer) {\n await this.gpuDataManager.download(gpuDataId, getTargetBuffer);\n }\n alloc(size) {\n return this.gpuDataManager.create(size).id;\n }\n free(ptr) {\n return this.gpuDataManager.release(ptr);\n }\n createKernel(kernelType, kernelId, attribute, kernelName) {\n const op = WEBGPU_OP_RESOLVE_RULES.get(kernelType);\n if (!op) {\n throw new Error(`kernel not implemented: ${kernelType}`);\n }\n const kernelInfo = {\n kernelType,\n kernelName,\n kernelEntry: op[0],\n attributes: [op[1], attribute]\n };\n this.kernels.set(kernelId, kernelInfo);\n }\n releaseKernel(kernelId) {\n const persistentData = this.kernelPersistentData.get(kernelId);\n if (persistentData) {\n for (const data of persistentData) {\n this.gpuDataManager.release(data.id);\n }\n this.kernelPersistentData.delete(kernelId);\n }\n this.kernelCustomData.delete(kernelId);\n this.kernels.delete(kernelId);\n }\n computeKernel(kernelId, context, errors) {\n const kernel = this.kernels.get(kernelId);\n if (!kernel) {\n throw new Error(`kernel not created: ${kernelId}`);\n }\n const kernelType = kernel.kernelType;\n const kernelName = kernel.kernelName;\n const kernelEntry = kernel.kernelEntry;\n const attributes = kernel.attributes;\n if (this.currentKernelId !== null) {\n throw new Error(`kernel "[${kernelType}] ${kernelName}" is not allowed to be called recursively`);\n }\n this.currentKernelId = kernelId;\n if (attributes[0]) {\n attributes[1] = attributes[0](attributes[1]);\n attributes[0] = void 0;\n }\n LOG_DEBUG("info", () => `[WebGPU] Start to run kernel "[${kernelType}] ${kernelName}"...`);\n const useErrorScope = this.env.debug;\n this.temporaryData = [];\n try {\n if (useErrorScope) {\n this.device.pushErrorScope("validation");\n }\n kernelEntry(context, attributes[1]);\n return 0;\n } catch (e) {\n errors.push(Promise.resolve(`[WebGPU] Kernel "[${kernelType}] ${kernelName}" failed. ${e}`));\n return 1;\n } finally {\n if (useErrorScope) {\n errors.push(this.device.popErrorScope().then(\n (err) => err ? `GPU validation error for kernel "[${kernelType}] ${kernelName}": ${err.message}` : null\n ));\n }\n for (const data of this.temporaryData) {\n this.gpuDataManager.release(data.id);\n }\n this.temporaryData = [];\n this.currentKernelId = null;\n }\n }\n // #region external buffer\n registerBuffer(sessionId, index, buffer, size) {\n let sessionInputOutputMapping = this.sessionExternalDataMapping.get(sessionId);\n if (!sessionInputOutputMapping) {\n sessionInputOutputMapping = /* @__PURE__ */ new Map();\n this.sessionExternalDataMapping.set(sessionId, sessionInputOutputMapping);\n }\n const previousBuffer = sessionInputOutputMapping.get(index);\n const id = this.gpuDataManager.registerExternalBuffer(buffer, size, previousBuffer?.[1]);\n sessionInputOutputMapping.set(index, [id, buffer]);\n return id;\n }\n unregisterBuffers(sessionId) {\n const sessionInputOutputMapping = this.sessionExternalDataMapping.get(sessionId);\n if (sessionInputOutputMapping) {\n sessionInputOutputMapping.forEach((bufferInfo) => this.gpuDataManager.unregisterExternalBuffer(bufferInfo[1]));\n this.sessionExternalDataMapping.delete(sessionId);\n }\n }\n getBuffer(gpuDataId) {\n const gpuData = this.gpuDataManager.get(gpuDataId);\n if (!gpuData) {\n throw new Error(`no GPU data for buffer: ${gpuDataId}`);\n }\n return gpuData.buffer;\n }\n createDownloader(gpuBuffer, size, type) {\n return async () => {\n const data = await downloadGpuData(this, gpuBuffer, size);\n return createView(data.buffer, type);\n };\n }\n // #endregion\n writeTimestamp(index) {\n if (this.queryType !== "inside-passes") {\n return;\n }\n this.computePassEncoder.writeTimestamp(this.querySet, index);\n }\n setQueryType() {\n this.queryType = "none";\n if (this.env.webgpu.profiling?.mode === "default" || (typeof this.env.trace === "undefined" ? this.env.wasm.trace : this.env.trace)) {\n if (this.device.features.has("chromium-experimental-timestamp-query-inside-passes")) {\n this.queryType = "inside-passes";\n } else if (this.device.features.has("timestamp-query")) {\n this.queryType = "at-passes";\n }\n if (this.queryType !== "none" && typeof this.querySet === "undefined") {\n this.querySet = this.device.createQuerySet({\n type: "timestamp",\n count: this.maxDispatchNumber * 2\n });\n this.queryResolveBuffer = this.device.createBuffer(\n // eslint-disable-next-line no-bitwise\n { size: this.maxDispatchNumber * 2 * 8, usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.QUERY_RESOLVE }\n );\n }\n }\n }\n captureBegin() {\n LOG_DEBUG("info", "captureBegin");\n if (!this.capturedCommandList.get(this.currentSessionId)) {\n this.capturedCommandList.set(this.currentSessionId, []);\n }\n if (!this.capturedPendingKernels.get(this.currentSessionId)) {\n this.capturedPendingKernels.set(this.currentSessionId, []);\n }\n this.flush();\n this.sessionStatus = "capturing";\n }\n captureEnd() {\n LOG_DEBUG("info", "captureEnd");\n this.flush();\n this.sessionStatus = "default";\n }\n replay() {\n LOG_DEBUG("info", "replay");\n this.sessionStatus = "replaying";\n const sessionCommandList = this.capturedCommandList.get(this.currentSessionId);\n const sessionPendingKernels = this.capturedPendingKernels.get(this.currentSessionId);\n const length = sessionCommandList.length;\n this.pendingKernels = [];\n for (let i = 0; i < length; i++) {\n const computePassEncoder = this.getComputePassEncoder();\n const command = sessionCommandList[i];\n this.writeTimestamp(this.pendingDispatchNumber * 2);\n computePassEncoder.setPipeline(command.computePipeline);\n computePassEncoder.setBindGroup(0, command.bindGroup);\n computePassEncoder.dispatchWorkgroups(...command.dispatchGroup);\n this.writeTimestamp(this.pendingDispatchNumber * 2 + 1);\n this.pendingDispatchNumber++;\n if (this.queryType !== "none") {\n this.pendingKernels.push(sessionPendingKernels[i]);\n }\n if (this.pendingDispatchNumber >= this.maxDispatchNumber || this.queryType === "at-passes") {\n this.endComputePass();\n }\n if (this.pendingDispatchNumber >= this.maxDispatchNumber) {\n this.flush();\n }\n }\n this.flush();\n this.sessionStatus = "default";\n }\n onReleaseSession(sessionId) {\n this.unregisterBuffers(sessionId);\n if (this.capturedCommandList.has(sessionId)) {\n this.capturedCommandList.delete(sessionId);\n }\n if (this.capturedPendingKernels.has(sessionId)) {\n this.capturedPendingKernels.delete(sessionId);\n }\n this.gpuDataManager.onReleaseSession(sessionId);\n }\n onRunStart(sessionId) {\n this.currentSessionId = sessionId;\n this.setQueryType();\n }\n };\n }\n });\n\n // web/lib/wasm/jsep/init.ts\n var init_exports = {};\n __export(init_exports, {\n init: () => init\n });\n var TensorViewImpl, ComputeContextImpl, init;\n var init_init = __esm({\n "web/lib/wasm/jsep/init.ts"() {\n "use strict";\n init_wasm_common();\n init_backend_webgpu();\n init_log();\n init_util();\n TensorViewImpl = class _TensorViewImpl {\n constructor(module, dataType, data, dims) {\n this.module = module;\n this.dataType = dataType;\n this.data = data;\n this.dims = dims;\n }\n getFloat32Array() {\n if (this.dataType !== 1 /* float */) {\n throw new Error("Invalid data type");\n }\n const elementCount = ShapeUtil.size(this.dims);\n return elementCount === 0 ? new Float32Array() : new Float32Array(this.module.HEAP8.buffer, this.data, elementCount);\n }\n getBigInt64Array() {\n if (this.dataType !== 7 /* int64 */) {\n throw new Error("Invalid data type");\n }\n const elementCount = ShapeUtil.size(this.dims);\n return elementCount === 0 ? new BigInt64Array() : new BigInt64Array(this.module.HEAP8.buffer, this.data, elementCount);\n }\n getInt32Array() {\n if (this.dataType !== 6 /* int32 */) {\n throw new Error("Invalid data type");\n }\n const elementCount = ShapeUtil.size(this.dims);\n return elementCount === 0 ? new Int32Array() : new Int32Array(this.module.HEAP8.buffer, this.data, elementCount);\n }\n reshape(newDims) {\n if (ShapeUtil.size(newDims) !== ShapeUtil.size(this.dims)) {\n throw new Error("Invalid new shape");\n }\n return new _TensorViewImpl(this.module, this.dataType, this.data, newDims);\n }\n };\n ComputeContextImpl = class {\n constructor(module, backend, contextDataOffset) {\n this.module = module;\n this.backend = backend;\n this.customDataOffset = 0;\n this.customDataSize = 0;\n this.adapterInfo = backend.adapterInfo;\n const heapU32 = module.HEAPU32;\n let dataIndex = contextDataOffset >>> 2;\n this.opKernelContext = heapU32[dataIndex++];\n const inputCount = heapU32[dataIndex++];\n this.outputCount = heapU32[dataIndex++];\n this.customDataOffset = heapU32[dataIndex++];\n this.customDataSize = heapU32[dataIndex++];\n const inputs = [];\n for (let i = 0; i < inputCount; i++) {\n const dataType = heapU32[dataIndex++];\n const data = heapU32[dataIndex++];\n const dim = heapU32[dataIndex++];\n const dims = [];\n for (let d = 0; d < dim; d++) {\n dims.push(heapU32[dataIndex++]);\n }\n inputs.push(new TensorViewImpl(module, dataType, data, dims));\n }\n this.inputs = inputs;\n }\n get kernelCustomData() {\n return this.backend.currentKernelCustomData;\n }\n get customDataBuffer() {\n return this.module.HEAPU8.subarray(this.customDataOffset, this.customDataOffset + this.customDataSize);\n }\n getMaxComputeWorkgroupSizes() {\n return [\n this.backend.device.limits.maxComputeWorkgroupSizeX,\n this.backend.device.limits.maxComputeWorkgroupSizeY,\n this.backend.device.limits.maxComputeWorkgroupSizeZ\n ];\n }\n getMaxComputeWorkgroupStoragesize() {\n return this.backend.device.limits.maxComputeWorkgroupStorageSize;\n }\n compute(program, inputsOutputsMapping) {\n const mappedInputs = inputsOutputsMapping?.inputs?.map((i) => typeof i === "number" ? this.inputs[i] : i) ?? this.inputs;\n const outputIndices = inputsOutputsMapping?.outputs ?? [];\n const createKernelOutput = (index, dataType, dims) => new TensorViewImpl(this.module, dataType, this.output(index, dims), dims);\n const createTemporaryOutput = (dataType, dims) => {\n const elementSize = getTensorElementSize(dataType);\n if (!elementSize) {\n throw new Error(`Unsupported data type: ${dataType}`);\n }\n const bufferSize = elementSize * ShapeUtil.size(dims);\n const gpuDataId = bufferSize > 0 ? this.backend.gpuDataManager.create(bufferSize).id : 0;\n return new TensorViewImpl(this.module, dataType, gpuDataId, dims);\n };\n return this.backend.run(\n program,\n mappedInputs,\n outputIndices,\n createKernelOutput,\n createTemporaryOutput,\n this.outputCount\n );\n }\n output(index, dims) {\n const stack = this.module.stackSave();\n try {\n const data = this.module.stackAlloc(\n (1 + dims.length) * 4\n /* sizeof(size_t) */\n );\n let offset = data >> 2;\n this.module.HEAPU32[offset++] = dims.length;\n for (let i = 0; i < dims.length; i++) {\n this.module.HEAPU32[offset++] = dims[i];\n }\n return this.module._JsepOutput(this.opKernelContext, index, data);\n } catch (e) {\n throw new Error(\n `Failed to generate kernel\'s output[${index}] with dims [${dims}]. If you are running with pre-allocated output, please make sure the output type/dims are correct. Error: ${e}`\n );\n } finally {\n this.module.stackRestore(stack);\n }\n }\n };\n init = async (name, module, env3, gpuAdapter) => {\n const jsepInit = module.jsepInit;\n if (!jsepInit) {\n throw new Error("Failed to initialize JSEP. The WebAssembly module is not built with JSEP support.");\n }\n if (name === "webgpu") {\n const backend = new WebGpuBackend();\n await backend.initialize(env3, gpuAdapter);\n jsepInit("webgpu", [\n // backend\n backend,\n // jsepAlloc()\n (size) => backend.alloc(size),\n // jsepFree()\n (ptr) => backend.free(ptr),\n // jsepCopy(src, dst, size, isSourceGpu)\n (src, dst, size, isSourceGpu = false) => {\n if (isSourceGpu) {\n LOG_DEBUG("verbose", () => `[WebGPU] jsepCopyGpuToGpu: src=${src}, dst=${dst}, size=${size}`);\n backend.memcpy(src, dst);\n } else {\n LOG_DEBUG("verbose", () => `[WebGPU] jsepCopyCpuToGpu: dataOffset=${src}, gpuDataId=${dst}, size=${size}`);\n const data = module.HEAPU8.subarray(src >>> 0, (src >>> 0) + size);\n backend.upload(dst, data);\n }\n },\n // jsepCopyAsync(src, dst, size)\n async (gpuDataId, dataOffset, size) => {\n LOG_DEBUG(\n "verbose",\n () => `[WebGPU] jsepCopyGpuToCpu: gpuDataId=${gpuDataId}, dataOffset=${dataOffset}, size=${size}`\n );\n await backend.download(\n gpuDataId,\n () => module.HEAPU8.subarray(dataOffset >>> 0, (dataOffset >>> 0) + size)\n );\n },\n // jsepCreateKernel\n (kernelType, kernelId, attribute) => backend.createKernel(\n kernelType,\n kernelId,\n attribute,\n module.UTF8ToString(module._JsepGetNodeName(kernelId))\n ),\n // jsepReleaseKernel\n (kernel) => backend.releaseKernel(kernel),\n // jsepRun\n (kernel, contextDataOffset, sessionHandle, errors) => {\n LOG_DEBUG(\n "verbose",\n () => `[WebGPU] jsepRun: sessionHandle=${sessionHandle}, kernel=${kernel}, contextDataOffset=${contextDataOffset}`\n );\n const context = new ComputeContextImpl(module, backend, contextDataOffset);\n return backend.computeKernel(kernel, context, errors);\n },\n // jsepCaptureBegin\n () => backend.captureBegin(),\n // jsepCaptureEnd\n () => backend.captureEnd(),\n // jsepReplay\n () => backend.replay()\n ]);\n } else {\n jsepInit("webnn");\n }\n };\n }\n });\n\n // nodejs-ignore:node:path\n var join = void 0;\n\n // web/lib/wasm/wasm-factory.ts\n var ortWasmFactory;\n if (false) {\n ortWasmFactory = null;\n } else {\n ortWasmFactory = false ? null : require_ort_wasm_simd_jsep();\n }\n var ortWasmFactoryThreaded = true ? false ? null : require_ort_wasm_simd_threaded_jsep() : ortWasmFactory;\n var wasm;\n var initialized = false;\n var initializing = false;\n var aborted = false;\n var isMultiThreadSupported = (numThreads) => {\n if (numThreads === 1) {\n return false;\n }\n if (typeof SharedArrayBuffer === "undefined") {\n if (typeof self !== "undefined" && !self.crossOriginIsolated) {\n console.warn(\n "env.wasm.numThreads is set to " + numThreads + ", but this will not work unless you enable crossOriginIsolated mode. See https://web.dev/cross-origin-isolation-guide/ for more info."\n );\n }\n return false;\n }\n if (typeof process !== "undefined" && process.versions && process.versions.node) {\n console.warn(\n "env.wasm.numThreads is set to " + numThreads + ", however, currently onnxruntime-web does not support multi-threads in Node.js. Please consider using onnxruntime-node for performance critical scenarios."\n );\n }\n try {\n if (typeof MessageChannel !== "undefined") {\n new MessageChannel().port1.postMessage(new SharedArrayBuffer(1));\n }\n return WebAssembly.validate(new Uint8Array([\n 0,\n 97,\n 115,\n 109,\n 1,\n 0,\n 0,\n 0,\n 1,\n 4,\n 1,\n 96,\n 0,\n 0,\n 3,\n 2,\n 1,\n 0,\n 5,\n 4,\n 1,\n 3,\n 1,\n 1,\n 10,\n 11,\n 1,\n 9,\n 0,\n 65,\n 0,\n 254,\n 16,\n 2,\n 0,\n 26,\n 11\n ]));\n } catch (e) {\n return false;\n }\n };\n var isSimdSupported = () => {\n try {\n return WebAssembly.validate(new Uint8Array([\n 0,\n 97,\n 115,\n 109,\n 1,\n 0,\n 0,\n 0,\n 1,\n 4,\n 1,\n 96,\n 0,\n 0,\n 3,\n 2,\n 1,\n 0,\n 10,\n 30,\n 1,\n 28,\n 0,\n 65,\n 0,\n 253,\n 15,\n 253,\n 12,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 0,\n 253,\n 186,\n 1,\n 26,\n 11\n ]));\n } catch (e) {\n return false;\n }\n };\n var getWasmFileName = (useSimd, useThreads) => {\n if (useSimd) {\n if (false) {\n return "ort-training-wasm-simd.wasm";\n }\n return useThreads ? "ort-wasm-simd-threaded.wasm" : "ort-wasm-simd.wasm";\n } else {\n return useThreads ? "ort-wasm-threaded.wasm" : "ort-wasm.wasm";\n }\n };\n var initializeWebAssembly = async (flags) => {\n if (initialized) {\n return Promise.resolve();\n }\n if (initializing) {\n throw new Error("multiple calls to \'initializeWebAssembly()\' detected.");\n }\n if (aborted) {\n throw new Error("previous call to \'initializeWebAssembly()\' failed.");\n }\n initializing = true;\n const timeout = flags.initTimeout;\n const numThreads = flags.numThreads;\n const simd = flags.simd;\n const useThreads = isMultiThreadSupported(numThreads);\n const useSimd = simd && isSimdSupported();\n const wasmPaths = flags.wasmPaths;\n const wasmPrefixOverride = typeof wasmPaths === "string" ? wasmPaths : void 0;\n const wasmFileName = getWasmFileName(useSimd, useThreads);\n const wasmPathOverride = typeof wasmPaths === "object" ? wasmPaths[wasmFileName] : void 0;\n let isTimeout = false;\n const tasks = [];\n if (timeout > 0) {\n tasks.push(new Promise((resolve) => {\n setTimeout(() => {\n isTimeout = true;\n resolve();\n }, timeout);\n }));\n }\n tasks.push(new Promise((resolve, reject) => {\n const factory = useThreads ? ortWasmFactoryThreaded : ortWasmFactory;\n const config = {\n locateFile: (fileName, scriptDirectory) => {\n if (useThreads && fileName.endsWith(".worker.js") && typeof Blob !== "undefined") {\n return URL.createObjectURL(new Blob(\n [\n // This require() function is handled by esbuild plugin to load file content as string.\n // eslint-disable-next-line @typescript-eslint/no-require-imports\n require_ort_wasm_threaded_worker()\n ],\n { type: "text/javascript" }\n ));\n }\n if (fileName.endsWith(".wasm")) {\n if (wasmPathOverride) {\n return wasmPathOverride;\n }\n const prefix = wasmPrefixOverride ?? scriptDirectory;\n if (true) {\n if (wasmFileName === "ort-wasm-simd.wasm") {\n return prefix + "ort-wasm-simd.jsep.wasm";\n } else if (wasmFileName === "ort-wasm-simd-threaded.wasm") {\n return prefix + "ort-wasm-simd-threaded.jsep.wasm";\n }\n }\n return prefix + wasmFileName;\n }\n return scriptDirectory + fileName;\n }\n };\n if (useThreads) {\n config.numThreads = numThreads;\n if (typeof Blob === "undefined") {\n config.mainScriptUrlOrBlob = join(__dirname, "ort-wasm-threaded.js");\n } else {\n const scriptSourceCode = `var ortWasmThreaded=${factory.toString()};`;\n config.mainScriptUrlOrBlob = new Blob([scriptSourceCode], { type: "text/javascript" });\n }\n }\n factory(config).then(\n // wasm module initialized successfully\n (module) => {\n initializing = false;\n initialized = true;\n wasm = module;\n resolve();\n },\n // wasm module failed to initialize\n (what) => {\n initializing = false;\n aborted = true;\n reject(what);\n }\n );\n }));\n await Promise.race(tasks);\n if (isTimeout) {\n throw new Error(`WebAssembly backend initializing failed due to timeout: ${timeout}ms`);\n }\n };\n var getInstance = () => {\n if (initialized && wasm) {\n return wasm;\n }\n throw new Error("WebAssembly is not initialized yet.");\n };\n\n // web/lib/wasm/wasm-utils.ts\n var allocWasmString = (data, allocs) => {\n const wasm2 = getInstance();\n const dataLength = wasm2.lengthBytesUTF8(data) + 1;\n const dataOffset = wasm2._malloc(dataLength);\n wasm2.stringToUTF8(data, dataOffset, dataLength);\n allocs.push(dataOffset);\n return dataOffset;\n };\n var iterateExtraOptions = (options, prefix, seen, handler) => {\n if (typeof options == "object" && options !== null) {\n if (seen.has(options)) {\n throw new Error("Circular reference in options");\n } else {\n seen.add(options);\n }\n }\n Object.entries(options).forEach(([key, value]) => {\n const name = prefix ? prefix + key : key;\n if (typeof value === "object") {\n iterateExtraOptions(value, name + ".", seen, handler);\n } else if (typeof value === "string" || typeof value === "number") {\n handler(name, value.toString());\n } else if (typeof value === "boolean") {\n handler(name, value ? "1" : "0");\n } else {\n throw new Error(`Can\'t handle extra config type: ${typeof value}`);\n }\n });\n };\n var checkLastError = (message) => {\n const wasm2 = getInstance();\n const stack = wasm2.stackSave();\n try {\n const paramsOffset = wasm2.stackAlloc(8);\n wasm2._OrtGetLastError(paramsOffset, paramsOffset + 4);\n const errorCode = wasm2.HEAP32[paramsOffset / 4];\n const errorMessagePointer = wasm2.HEAPU32[paramsOffset / 4 + 1];\n const errorMessage = errorMessagePointer ? wasm2.UTF8ToString(errorMessagePointer) : "";\n throw new Error(`${message} ERROR_CODE: ${errorCode}, ERROR_MESSAGE: ${errorMessage}`);\n } finally {\n wasm2.stackRestore(stack);\n }\n };\n\n // web/lib/wasm/run-options.ts\n var setRunOptions = (options) => {\n const wasm2 = getInstance();\n let runOptionsHandle = 0;\n const allocs = [];\n const runOptions = options || {};\n try {\n if (options?.logSeverityLevel === void 0) {\n runOptions.logSeverityLevel = 2;\n } else if (typeof options.logSeverityLevel !== "number" || !Number.isInteger(options.logSeverityLevel) || options.logSeverityLevel < 0 || options.logSeverityLevel > 4) {\n throw new Error(`log serverity level is not valid: ${options.logSeverityLevel}`);\n }\n if (options?.logVerbosityLevel === void 0) {\n runOptions.logVerbosityLevel = 0;\n } else if (typeof options.logVerbosityLevel !== "number" || !Number.isInteger(options.logVerbosityLevel)) {\n throw new Error(`log verbosity level is not valid: ${options.logVerbosityLevel}`);\n }\n if (options?.terminate === void 0) {\n runOptions.terminate = false;\n }\n let tagDataOffset = 0;\n if (options?.tag !== void 0) {\n tagDataOffset = allocWasmString(options.tag, allocs);\n }\n runOptionsHandle = wasm2._OrtCreateRunOptions(\n runOptions.logSeverityLevel,\n runOptions.logVerbosityLevel,\n !!runOptions.terminate,\n tagDataOffset\n );\n if (runOptionsHandle === 0) {\n checkLastError("Can\'t create run options.");\n }\n if (options?.extra !== void 0) {\n iterateExtraOptions(options.extra, "", /* @__PURE__ */ new WeakSet(), (key, value) => {\n const keyDataOffset = allocWasmString(key, allocs);\n const valueDataOffset = allocWasmString(value, allocs);\n if (wasm2._OrtAddRunConfigEntry(runOptionsHandle, keyDataOffset, valueDataOffset) !== 0) {\n checkLastError(`Can\'t set a run config entry: ${key} - ${value}.`);\n }\n });\n }\n return [runOptionsHandle, allocs];\n } catch (e) {\n if (runOptionsHandle !== 0) {\n wasm2._OrtReleaseRunOptions(runOptionsHandle);\n }\n allocs.forEach((alloc) => wasm2._free(alloc));\n throw e;\n }\n };\n\n // web/lib/wasm/session-options.ts\n var getGraphOptimzationLevel = (graphOptimizationLevel) => {\n switch (graphOptimizationLevel) {\n case "disabled":\n return 0;\n case "basic":\n return 1;\n case "extended":\n return 2;\n case "all":\n return 99;\n default:\n throw new Error(`unsupported graph optimization level: ${graphOptimizationLevel}`);\n }\n };\n var getExecutionMode = (executionMode) => {\n switch (executionMode) {\n case "sequential":\n return 0;\n case "parallel":\n return 1;\n default:\n throw new Error(`unsupported execution mode: ${executionMode}`);\n }\n };\n var appendDefaultOptions = (options) => {\n if (!options.extra) {\n options.extra = {};\n }\n if (!options.extra.session) {\n options.extra.session = {};\n }\n const session = options.extra.session;\n if (!session.use_ort_model_bytes_directly) {\n session.use_ort_model_bytes_directly = "1";\n }\n if (options.executionProviders && options.executionProviders.some((ep) => (typeof ep === "string" ? ep : ep.name) === "webgpu")) {\n options.enableMemPattern = false;\n }\n };\n var setExecutionProviders = (sessionOptionsHandle, executionProviders, allocs) => {\n for (const ep of executionProviders) {\n let epName = typeof ep === "string" ? ep : ep.name;\n switch (epName) {\n case "webnn":\n epName = "WEBNN";\n if (typeof ep !== "string") {\n const webnnOptions = ep;\n if (webnnOptions?.deviceType) {\n const keyDataOffset = allocWasmString("deviceType", allocs);\n const valueDataOffset = allocWasmString(webnnOptions.deviceType, allocs);\n if (getInstance()._OrtAddSessionConfigEntry(sessionOptionsHandle, keyDataOffset, valueDataOffset) !== 0) {\n checkLastError(`Can\'t set a session config entry: \'deviceType\' - ${webnnOptions.deviceType}.`);\n }\n }\n if (webnnOptions?.numThreads) {\n let numThreads = webnnOptions.numThreads;\n if (typeof numThreads != "number" || !Number.isInteger(numThreads) || numThreads < 0) {\n numThreads = 0;\n }\n const keyDataOffset = allocWasmString("numThreads", allocs);\n const valueDataOffset = allocWasmString(numThreads.toString(), allocs);\n if (getInstance()._OrtAddSessionConfigEntry(sessionOptionsHandle, keyDataOffset, valueDataOffset) !== 0) {\n checkLastError(`Can\'t set a session config entry: \'numThreads\' - ${webnnOptions.numThreads}.`);\n }\n }\n if (webnnOptions?.powerPreference) {\n const keyDataOffset = allocWasmString("powerPreference", allocs);\n const valueDataOffset = allocWasmString(webnnOptions.powerPreference, allocs);\n if (getInstance()._OrtAddSessionConfigEntry(sessionOptionsHandle, keyDataOffset, valueDataOffset) !== 0) {\n checkLastError(\n `Can\'t set a session config entry: \'powerPreference\' - ${webnnOptions.powerPreference}.`\n );\n }\n }\n }\n break;\n case "webgpu":\n epName = "JS";\n if (typeof ep !== "string") {\n const webgpuOptions = ep;\n if (webgpuOptions?.preferredLayout) {\n if (webgpuOptions.preferredLayout !== "NCHW" && webgpuOptions.preferredLayout !== "NHWC") {\n throw new Error(`preferredLayout must be either \'NCHW\' or \'NHWC\': ${webgpuOptions.preferredLayout}`);\n }\n const keyDataOffset = allocWasmString("preferredLayout", allocs);\n const valueDataOffset = allocWasmString(webgpuOptions.preferredLayout, allocs);\n if (getInstance()._OrtAddSessionConfigEntry(sessionOptionsHandle, keyDataOffset, valueDataOffset) !== 0) {\n checkLastError(\n `Can\'t set a session config entry: \'preferredLayout\' - ${webgpuOptions.preferredLayout}.`\n );\n }\n }\n }\n break;\n case "wasm":\n case "cpu":\n continue;\n default:\n throw new Error(`not supported execution provider: ${epName}`);\n }\n const epNameDataOffset = allocWasmString(epName, allocs);\n if (getInstance()._OrtAppendExecutionProvider(sessionOptionsHandle, epNameDataOffset) !== 0) {\n checkLastError(`Can\'t append execution provider: ${epName}.`);\n }\n }\n };\n var setSessionOptions = (options) => {\n const wasm2 = getInstance();\n let sessionOptionsHandle = 0;\n const allocs = [];\n const sessionOptions = options || {};\n appendDefaultOptions(sessionOptions);\n try {\n const graphOptimizationLevel = getGraphOptimzationLevel(sessionOptions.graphOptimizationLevel ?? "all");\n const executionMode = getExecutionMode(sessionOptions.executionMode ?? "sequential");\n const logIdDataOffset = typeof sessionOptions.logId === "string" ? allocWasmString(sessionOptions.logId, allocs) : 0;\n const logSeverityLevel = sessionOptions.logSeverityLevel ?? 2;\n if (!Number.isInteger(logSeverityLevel) || logSeverityLevel < 0 || logSeverityLevel > 4) {\n throw new Error(`log serverity level is not valid: ${logSeverityLevel}`);\n }\n const logVerbosityLevel = sessionOptions.logVerbosityLevel ?? 0;\n if (!Number.isInteger(logVerbosityLevel) || logVerbosityLevel < 0 || logVerbosityLevel > 4) {\n throw new Error(`log verbosity level is not valid: ${logVerbosityLevel}`);\n }\n const optimizedModelFilePathOffset = typeof sessionOptions.optimizedModelFilePath === "string" ? allocWasmString(sessionOptions.optimizedModelFilePath, allocs) : 0;\n sessionOptionsHandle = wasm2._OrtCreateSessionOptions(\n graphOptimizationLevel,\n !!sessionOptions.enableCpuMemArena,\n !!sessionOptions.enableMemPattern,\n executionMode,\n !!sessionOptions.enableProfiling,\n 0,\n logIdDataOffset,\n logSeverityLevel,\n logVerbosityLevel,\n optimizedModelFilePathOffset\n );\n if (sessionOptionsHandle === 0) {\n checkLastError("Can\'t create session options.");\n }\n if (sessionOptions.executionProviders) {\n setExecutionProviders(sessionOptionsHandle, sessionOptions.executionProviders, allocs);\n }\n if (sessionOptions.enableGraphCapture !== void 0) {\n if (typeof sessionOptions.enableGraphCapture !== "boolean") {\n throw new Error(`enableGraphCapture must be a boolean value: ${sessionOptions.enableGraphCapture}`);\n }\n const keyDataOffset = allocWasmString("enableGraphCapture", allocs);\n const valueDataOffset = allocWasmString(sessionOptions.enableGraphCapture.toString(), allocs);\n if (wasm2._OrtAddSessionConfigEntry(sessionOptionsHandle, keyDataOffset, valueDataOffset) !== 0) {\n checkLastError(\n `Can\'t set a session config entry: \'enableGraphCapture\' - ${sessionOptions.enableGraphCapture}.`\n );\n }\n }\n if (sessionOptions.freeDimensionOverrides) {\n for (const [name, value] of Object.entries(sessionOptions.freeDimensionOverrides)) {\n if (typeof name !== "string") {\n throw new Error(`free dimension override name must be a string: ${name}`);\n }\n if (typeof value !== "number" || !Number.isInteger(value) || value < 0) {\n throw new Error(`free dimension override value must be a non-negative integer: ${value}`);\n }\n const nameOffset = allocWasmString(name, allocs);\n if (wasm2._OrtAddFreeDimensionOverride(sessionOptionsHandle, nameOffset, value) !== 0) {\n checkLastError(`Can\'t set a free dimension override: ${name} - ${value}.`);\n }\n }\n }\n if (sessionOptions.extra !== void 0) {\n iterateExtraOptions(sessionOptions.extra, "", /* @__PURE__ */ new WeakSet(), (key, value) => {\n const keyDataOffset = allocWasmString(key, allocs);\n const valueDataOffset = allocWasmString(value, allocs);\n if (wasm2._OrtAddSessionConfigEntry(sessionOptionsHandle, keyDataOffset, valueDataOffset) !== 0) {\n checkLastError(`Can\'t set a session config entry: ${key} - ${value}.`);\n }\n });\n }\n return [sessionOptionsHandle, allocs];\n } catch (e) {\n if (sessionOptionsHandle !== 0) {\n wasm2._OrtReleaseSessionOptions(sessionOptionsHandle);\n }\n allocs.forEach((alloc) => wasm2._free(alloc));\n throw e;\n }\n };\n\n // web/lib/wasm/wasm-core-impl.ts\n init_wasm_common();\n\n // web/lib/wasm/wasm-utils-load-file.ts\n init_fs();\n\n // nodejs-ignore:node:fs/promises\n var readFile2 = void 0;\n\n // web/lib/wasm/wasm-utils-load-file.ts\n var loadFile = async (file) => {\n if (typeof file === "string") {\n if (typeof process !== "undefined" && process.versions && process.versions.node) {\n try {\n return new Uint8Array(await readFile2(file));\n } catch (e) {\n if (e.code === "ERR_FS_FILE_TOO_LARGE") {\n const stream = createReadStream(file);\n const chunks = [];\n for await (const chunk of stream) {\n chunks.push(chunk);\n }\n return new Uint8Array(Buffer.concat(chunks));\n }\n throw e;\n }\n } else {\n const response = await fetch(file);\n if (!response.ok) {\n throw new Error(`failed to load external data file: ${file}`);\n }\n const contentLengthHeader = response.headers.get("Content-Length");\n const fileSize = contentLengthHeader ? parseInt(contentLengthHeader, 10) : 0;\n if (fileSize < 1073741824) {\n return new Uint8Array(await response.arrayBuffer());\n } else {\n if (!response.body) {\n throw new Error(`failed to load external data file: ${file}, no response body.`);\n }\n const reader = response.body.getReader();\n let buffer;\n try {\n buffer = new ArrayBuffer(fileSize);\n } catch (e) {\n if (e instanceof RangeError) {\n const pages = Math.ceil(fileSize / 65536);\n buffer = new WebAssembly.Memory({ initial: pages, maximum: pages }).buffer;\n } else {\n throw e;\n }\n }\n let offset = 0;\n while (true) {\n const { done, value } = await reader.read();\n if (done) {\n break;\n }\n const chunkSize = value.byteLength;\n const chunk = new Uint8Array(buffer, offset, chunkSize);\n chunk.set(value);\n offset += chunkSize;\n }\n return new Uint8Array(buffer, 0, fileSize);\n }\n }\n } else if (file instanceof Blob) {\n return new Uint8Array(await file.arrayBuffer());\n } else if (file instanceof Uint8Array) {\n return file;\n } else {\n return new Uint8Array(file);\n }\n };\n\n // web/lib/wasm/wasm-core-impl.ts\n var initOrt = (numThreads, loggingLevel) => {\n const errorCode = getInstance()._OrtInit(numThreads, loggingLevel);\n if (errorCode !== 0) {\n checkLastError("Can\'t initialize onnxruntime.");\n }\n };\n var initRuntime = async (env3) => {\n initOrt(env3.wasm.numThreads, logLevelStringToEnum(env3.logLevel));\n };\n var initEp = async (env3, epName) => {\n if (true) {\n const initJsep = (init_init(), __toCommonJS(init_exports)).init;\n if (epName === "webgpu") {\n if (typeof navigator === "undefined" || !navigator.gpu) {\n throw new Error("WebGPU is not supported in current environment");\n }\n let adapter = env3.webgpu.adapter;\n if (!adapter) {\n const powerPreference = env3.webgpu.powerPreference;\n if (powerPreference !== void 0 && powerPreference !== "low-power" && powerPreference !== "high-performance") {\n throw new Error(`Invalid powerPreference setting: "${powerPreference}"`);\n }\n const forceFallbackAdapter = env3.webgpu.forceFallbackAdapter;\n if (forceFallbackAdapter !== void 0 && typeof forceFallbackAdapter !== "boolean") {\n throw new Error(`Invalid forceFallbackAdapter setting: "${forceFallbackAdapter}"`);\n }\n adapter = await navigator.gpu.requestAdapter({ powerPreference, forceFallbackAdapter });\n if (!adapter) {\n throw new Error(\n \'Failed to get GPU adapter. You may need to enable flag "--enable-unsafe-webgpu" if you are using Chrome.\'\n );\n }\n } else {\n if (typeof adapter.limits !== "object" || typeof adapter.features !== "object" || typeof adapter.requestDevice !== "function") {\n throw new Error("Invalid GPU adapter set in `env.webgpu.adapter`. It must be a GPUAdapter object.");\n }\n }\n if (!env3.wasm.simd) {\n throw new Error(\n "Not supported for WebGPU=ON and SIMD=OFF. Please set `env.wasm.simd` to true when using `webgpu` EP"\n );\n }\n await initJsep("webgpu", getInstance(), env3, adapter);\n }\n if (epName === "webnn") {\n if (typeof navigator === "undefined" || !navigator.ml) {\n throw new Error("WebNN is not supported in current environment");\n }\n await initJsep("webnn", getInstance(), env3);\n }\n }\n };\n var activeSessions = /* @__PURE__ */ new Map();\n var getSessionInputOutputCount = (sessionHandle) => {\n const wasm2 = getInstance();\n const stack = wasm2.stackSave();\n try {\n const dataOffset = wasm2.stackAlloc(8);\n const errorCode = wasm2._OrtGetInputOutputCount(sessionHandle, dataOffset, dataOffset + 4);\n if (errorCode !== 0) {\n checkLastError("Can\'t get session input/output count.");\n }\n return [wasm2.HEAP32[dataOffset / 4], wasm2.HEAP32[dataOffset / 4 + 1]];\n } finally {\n wasm2.stackRestore(stack);\n }\n };\n var copyFromExternalBuffer = (model) => {\n const wasm2 = getInstance();\n const modelDataOffset = wasm2._malloc(model.byteLength);\n if (modelDataOffset === 0) {\n throw new Error(`Can\'t create a session. failed to allocate a buffer of size ${model.byteLength}.`);\n }\n wasm2.HEAPU8.set(model, modelDataOffset);\n return [modelDataOffset, model.byteLength];\n };\n var createSession = async (modelData, options) => {\n let modelDataOffset, modelDataLength;\n const wasm2 = getInstance();\n if (Array.isArray(modelData)) {\n [modelDataOffset, modelDataLength] = modelData;\n } else if (modelData.buffer === wasm2.HEAPU8.buffer) {\n [modelDataOffset, modelDataLength] = [modelData.byteOffset, modelData.byteLength];\n } else {\n [modelDataOffset, modelDataLength] = copyFromExternalBuffer(modelData);\n }\n let sessionHandle = 0;\n let sessionOptionsHandle = 0;\n let ioBindingHandle = 0;\n let allocs = [];\n const inputNamesUTF8Encoded = [];\n const outputNamesUTF8Encoded = [];\n try {\n [sessionOptionsHandle, allocs] = setSessionOptions(options);\n if (options?.externalData && wasm2.mountExternalData) {\n const loadingPromises = [];\n for (const file of options.externalData) {\n const path = typeof file === "string" ? file : file.path;\n loadingPromises.push(loadFile(typeof file === "string" ? file : file.data).then((data) => {\n wasm2.mountExternalData(path, data);\n }));\n }\n await Promise.all(loadingPromises);\n }\n sessionHandle = await wasm2._OrtCreateSession(modelDataOffset, modelDataLength, sessionOptionsHandle);\n if (sessionHandle === 0) {\n checkLastError("Can\'t create a session.");\n }\n const [inputCount, outputCount] = getSessionInputOutputCount(sessionHandle);\n const enableGraphCapture = !!options?.enableGraphCapture;\n const inputNames = [];\n const outputNames = [];\n const outputPreferredLocations = [];\n for (let i = 0; i < inputCount; i++) {\n const name = wasm2._OrtGetInputName(sessionHandle, i);\n if (name === 0) {\n checkLastError("Can\'t get an input name.");\n }\n inputNamesUTF8Encoded.push(name);\n inputNames.push(wasm2.UTF8ToString(name));\n }\n for (let i = 0; i < outputCount; i++) {\n const name = wasm2._OrtGetOutputName(sessionHandle, i);\n if (name === 0) {\n checkLastError("Can\'t get an output name.");\n }\n outputNamesUTF8Encoded.push(name);\n const nameString = wasm2.UTF8ToString(name);\n outputNames.push(nameString);\n if (true) {\n if (enableGraphCapture && options?.preferredOutputLocation === void 0) {\n outputPreferredLocations.push("gpu-buffer");\n continue;\n }\n const location = typeof options?.preferredOutputLocation === "string" ? options.preferredOutputLocation : options?.preferredOutputLocation?.[nameString] ?? "cpu";\n if (location !== "cpu" && location !== "cpu-pinned" && location !== "gpu-buffer") {\n throw new Error(`Not supported preferred output location: ${location}.`);\n }\n if (enableGraphCapture && location !== "gpu-buffer") {\n throw new Error(`Not supported preferred output location: ${location}. Only \'gpu-buffer\' location is supported when enableGraphCapture is true.`);\n }\n outputPreferredLocations.push(location);\n }\n }\n let bindingState = null;\n if (outputPreferredLocations.some((l) => l === "gpu-buffer")) {\n ioBindingHandle = wasm2._OrtCreateBinding(sessionHandle);\n if (ioBindingHandle === 0) {\n checkLastError("Can\'t create IO binding.");\n }\n bindingState = {\n handle: ioBindingHandle,\n outputPreferredLocations,\n outputPreferredLocationsEncoded: outputPreferredLocations.map((l) => dataLocationStringToEnum(l))\n };\n }\n activeSessions.set(\n sessionHandle,\n [sessionHandle, inputNamesUTF8Encoded, outputNamesUTF8Encoded, bindingState, enableGraphCapture, false]\n );\n return [sessionHandle, inputNames, outputNames];\n } catch (e) {\n inputNamesUTF8Encoded.forEach((buf) => wasm2._OrtFree(buf));\n outputNamesUTF8Encoded.forEach((buf) => wasm2._OrtFree(buf));\n if (ioBindingHandle !== 0) {\n wasm2._OrtReleaseBinding(ioBindingHandle);\n }\n if (sessionHandle !== 0) {\n wasm2._OrtReleaseSession(sessionHandle);\n }\n throw e;\n } finally {\n wasm2._free(modelDataOffset);\n if (sessionOptionsHandle !== 0) {\n wasm2._OrtReleaseSessionOptions(sessionOptionsHandle);\n }\n allocs.forEach((alloc) => wasm2._free(alloc));\n wasm2.unmountExternalData?.();\n }\n };\n var releaseSession = (sessionId) => {\n const wasm2 = getInstance();\n const session = activeSessions.get(sessionId);\n if (!session) {\n throw new Error(`cannot release session. invalid session id: ${sessionId}`);\n }\n const [sessionHandle, inputNamesUTF8Encoded, outputNamesUTF8Encoded, ioBindingState, enableGraphCapture] = session;\n if (ioBindingState) {\n if (enableGraphCapture) {\n wasm2._OrtClearBoundOutputs(ioBindingState.handle);\n }\n wasm2._OrtReleaseBinding(ioBindingState.handle);\n }\n wasm2.jsepOnReleaseSession?.(sessionId);\n inputNamesUTF8Encoded.forEach((buf) => wasm2._OrtFree(buf));\n outputNamesUTF8Encoded.forEach((buf) => wasm2._OrtFree(buf));\n wasm2._OrtReleaseSession(sessionHandle);\n activeSessions.delete(sessionId);\n };\n var prepareInputOutputTensor = (tensor, tensorHandles, allocs, sessionId, index, enableGraphCapture = false) => {\n if (!tensor) {\n tensorHandles.push(0);\n return;\n }\n const wasm2 = getInstance();\n const dataType = tensor[0];\n const dims = tensor[1];\n const location = tensor[3];\n let rawData;\n let dataByteLength;\n if (dataType === "string" && location === "gpu-buffer") {\n throw new Error("String tensor is not supported on GPU.");\n }\n if (enableGraphCapture && location !== "gpu-buffer") {\n throw new Error(\n `External buffer must be provided for input/output index ${index} when enableGraphCapture is true.`\n );\n }\n if (location === "gpu-buffer") {\n const gpuBuffer = tensor[2].gpuBuffer;\n const elementSizeInBytes = getTensorElementSize(tensorDataTypeStringToEnum(dataType));\n dataByteLength = dims.reduce((a, b) => a * b, 1) * elementSizeInBytes;\n const registerBuffer = wasm2.jsepRegisterBuffer;\n if (!registerBuffer) {\n throw new Error(\'Tensor location "gpu-buffer" is not supported without using WebGPU.\');\n }\n rawData = registerBuffer(sessionId, index, gpuBuffer, dataByteLength);\n } else {\n const data = tensor[2];\n if (Array.isArray(data)) {\n dataByteLength = 4 * data.length;\n rawData = wasm2._malloc(dataByteLength);\n allocs.push(rawData);\n let dataIndex = rawData / 4;\n for (let i = 0; i < data.length; i++) {\n if (typeof data[i] !== "string") {\n throw new TypeError(`tensor data at index ${i} is not a string`);\n }\n wasm2.HEAPU32[dataIndex++] = allocWasmString(data[i], allocs);\n }\n } else {\n dataByteLength = data.byteLength;\n rawData = wasm2._malloc(dataByteLength);\n allocs.push(rawData);\n wasm2.HEAPU8.set(new Uint8Array(data.buffer, data.byteOffset, dataByteLength), rawData);\n }\n }\n const stack = wasm2.stackSave();\n const dimsOffset = wasm2.stackAlloc(4 * dims.length);\n try {\n let dimIndex = dimsOffset / 4;\n dims.forEach((d) => wasm2.HEAP32[dimIndex++] = d);\n const tensor2 = wasm2._OrtCreateTensor(\n tensorDataTypeStringToEnum(dataType),\n rawData,\n dataByteLength,\n dimsOffset,\n dims.length,\n dataLocationStringToEnum(location)\n );\n if (tensor2 === 0) {\n checkLastError(`Can\'t create tensor for input/output. session=${sessionId}, index=${index}.`);\n }\n tensorHandles.push(tensor2);\n } finally {\n wasm2.stackRestore(stack);\n }\n };\n var run = async (sessionId, inputIndices, inputTensors, outputIndices, outputTensors, options) => {\n const wasm2 = getInstance();\n const session = activeSessions.get(sessionId);\n if (!session) {\n throw new Error(`cannot run inference. invalid session id: ${sessionId}`);\n }\n const sessionHandle = session[0];\n const inputNamesUTF8Encoded = session[1];\n const outputNamesUTF8Encoded = session[2];\n const ioBindingState = session[3];\n const enableGraphCapture = session[4];\n const inputOutputBound = session[5];\n const inputCount = inputIndices.length;\n const outputCount = outputIndices.length;\n let runOptionsHandle = 0;\n let runOptionsAllocs = [];\n const inputTensorHandles = [];\n const outputTensorHandles = [];\n const inputOutputAllocs = [];\n const beforeRunStack = wasm2.stackSave();\n const inputValuesOffset = wasm2.stackAlloc(inputCount * 4);\n const inputNamesOffset = wasm2.stackAlloc(inputCount * 4);\n const outputValuesOffset = wasm2.stackAlloc(outputCount * 4);\n const outputNamesOffset = wasm2.stackAlloc(outputCount * 4);\n try {\n [runOptionsHandle, runOptionsAllocs] = setRunOptions(options);\n for (let i = 0; i < inputCount; i++) {\n prepareInputOutputTensor(\n inputTensors[i],\n inputTensorHandles,\n inputOutputAllocs,\n sessionId,\n inputIndices[i],\n enableGraphCapture\n );\n }\n for (let i = 0; i < outputCount; i++) {\n prepareInputOutputTensor(\n outputTensors[i],\n outputTensorHandles,\n inputOutputAllocs,\n sessionId,\n inputCount + outputIndices[i],\n enableGraphCapture\n );\n }\n let inputValuesIndex = inputValuesOffset / 4;\n let inputNamesIndex = inputNamesOffset / 4;\n let outputValuesIndex = outputValuesOffset / 4;\n let outputNamesIndex = outputNamesOffset / 4;\n for (let i = 0; i < inputCount; i++) {\n wasm2.HEAPU32[inputValuesIndex++] = inputTensorHandles[i];\n wasm2.HEAPU32[inputNamesIndex++] = inputNamesUTF8Encoded[inputIndices[i]];\n }\n for (let i = 0; i < outputCount; i++) {\n wasm2.HEAPU32[outputValuesIndex++] = outputTensorHandles[i];\n wasm2.HEAPU32[outputNamesIndex++] = outputNamesUTF8Encoded[outputIndices[i]];\n }\n if (ioBindingState && !inputOutputBound) {\n const { handle, outputPreferredLocations, outputPreferredLocationsEncoded } = ioBindingState;\n if (inputNamesUTF8Encoded.length !== inputCount) {\n throw new Error(`input count from feeds (${inputCount}) is expected to be always equal to model\'s input count (${inputNamesUTF8Encoded.length}).`);\n }\n for (let i = 0; i < inputCount; i++) {\n const index = inputIndices[i];\n const errorCode2 = await wasm2._OrtBindInput(handle, inputNamesUTF8Encoded[index], inputTensorHandles[i]);\n if (errorCode2 !== 0) {\n checkLastError(`Can\'t bind input[${i}] for session=${sessionId}.`);\n }\n }\n for (let i = 0; i < outputCount; i++) {\n const index = outputIndices[i];\n const location = outputTensors[i]?.[3];\n if (location) {\n const errorCode2 = wasm2._OrtBindOutput(handle, outputNamesUTF8Encoded[index], outputTensorHandles[i], 0);\n if (errorCode2 !== 0) {\n checkLastError(`Can\'t bind pre-allocated output[${i}] for session=${sessionId}.`);\n }\n } else {\n const errorCode2 = wasm2._OrtBindOutput(handle, outputNamesUTF8Encoded[index], 0, outputPreferredLocationsEncoded[index]);\n if (errorCode2 !== 0) {\n checkLastError(`Can\'t bind output[${i}] to ${outputPreferredLocations[i]} for session=${sessionId}.`);\n }\n }\n }\n activeSessions.set(\n sessionId,\n [sessionHandle, inputNamesUTF8Encoded, outputNamesUTF8Encoded, ioBindingState, enableGraphCapture, true]\n );\n }\n wasm2.jsepOnRunStart?.(sessionHandle);\n let errorCode;\n if (ioBindingState) {\n errorCode = await wasm2._OrtRunWithBinding(\n sessionHandle,\n ioBindingState.handle,\n outputCount,\n outputValuesOffset,\n runOptionsHandle\n );\n } else {\n errorCode = await wasm2._OrtRun(\n sessionHandle,\n inputNamesOffset,\n inputValuesOffset,\n inputCount,\n outputNamesOffset,\n outputCount,\n outputValuesOffset,\n runOptionsHandle\n );\n }\n if (errorCode !== 0) {\n checkLastError("failed to call OrtRun().");\n }\n const output = [];\n for (let i = 0; i < outputCount; i++) {\n const tensor = wasm2.HEAPU32[outputValuesOffset / 4 + i];\n if (tensor === outputTensorHandles[i]) {\n output.push(outputTensors[i]);\n continue;\n }\n const beforeGetTensorDataStack = wasm2.stackSave();\n const tensorDataOffset = wasm2.stackAlloc(4 * 4);\n let keepOutputTensor = false;\n let type, dataOffset = 0;\n try {\n const errorCode2 = wasm2._OrtGetTensorData(\n tensor,\n tensorDataOffset,\n tensorDataOffset + 4,\n tensorDataOffset + 8,\n tensorDataOffset + 12\n );\n if (errorCode2 !== 0) {\n checkLastError(`Can\'t access output tensor data on index ${i}.`);\n }\n let tensorDataIndex = tensorDataOffset / 4;\n const dataType = wasm2.HEAPU32[tensorDataIndex++];\n dataOffset = wasm2.HEAPU32[tensorDataIndex++];\n const dimsOffset = wasm2.HEAPU32[tensorDataIndex++];\n const dimsLength = wasm2.HEAPU32[tensorDataIndex++];\n const dims = [];\n for (let i2 = 0; i2 < dimsLength; i2++) {\n dims.push(wasm2.HEAPU32[dimsOffset / 4 + i2]);\n }\n wasm2._OrtFree(dimsOffset);\n const size = dims.reduce((a, b) => a * b, 1);\n type = tensorDataTypeEnumToString(dataType);\n const preferredLocation = ioBindingState?.outputPreferredLocations[outputIndices[i]];\n if (type === "string") {\n if (preferredLocation === "gpu-buffer") {\n throw new Error("String tensor is not supported on GPU.");\n }\n const stringData = [];\n let dataIndex = dataOffset / 4;\n for (let i2 = 0; i2 < size; i2++) {\n const offset = wasm2.HEAPU32[dataIndex++];\n const maxBytesToRead = i2 === size - 1 ? void 0 : wasm2.HEAPU32[dataIndex] - offset;\n stringData.push(wasm2.UTF8ToString(offset, maxBytesToRead));\n }\n output.push([type, dims, stringData, "cpu"]);\n } else {\n if (preferredLocation === "gpu-buffer" && size > 0) {\n const getBuffer = wasm2.jsepGetBuffer;\n if (!getBuffer) {\n throw new Error(\'preferredLocation "gpu-buffer" is not supported without using WebGPU.\');\n }\n const gpuBuffer = getBuffer(dataOffset);\n const elementSize = getTensorElementSize(dataType);\n if (elementSize === void 0 || !isGpuBufferSupportedType(type)) {\n throw new Error(`Unsupported data type: ${type}`);\n }\n keepOutputTensor = true;\n output.push([\n type,\n dims,\n {\n gpuBuffer,\n download: wasm2.jsepCreateDownloader(gpuBuffer, size * elementSize, type),\n dispose: () => {\n wasm2._OrtReleaseTensor(tensor);\n }\n },\n "gpu-buffer"\n ]);\n } else {\n const typedArrayConstructor = tensorTypeToTypedArrayConstructor(type);\n const data = new typedArrayConstructor(size);\n new Uint8Array(data.buffer, data.byteOffset, data.byteLength).set(wasm2.HEAPU8.subarray(dataOffset, dataOffset + data.byteLength));\n output.push([type, dims, data, "cpu"]);\n }\n }\n } finally {\n wasm2.stackRestore(beforeGetTensorDataStack);\n if (type === "string" && dataOffset) {\n wasm2._free(dataOffset);\n }\n if (!keepOutputTensor) {\n wasm2._OrtReleaseTensor(tensor);\n }\n }\n }\n if (ioBindingState && !enableGraphCapture) {\n wasm2._OrtClearBoundOutputs(ioBindingState.handle);\n activeSessions.set(\n sessionId,\n [sessionHandle, inputNamesUTF8Encoded, outputNamesUTF8Encoded, ioBindingState, enableGraphCapture, false]\n );\n }\n return output;\n } finally {\n wasm2.stackRestore(beforeRunStack);\n inputTensorHandles.forEach((v) => wasm2._OrtReleaseTensor(v));\n outputTensorHandles.forEach((v) => wasm2._OrtReleaseTensor(v));\n inputOutputAllocs.forEach((p) => wasm2._free(p));\n if (runOptionsHandle !== 0) {\n wasm2._OrtReleaseRunOptions(runOptionsHandle);\n }\n runOptionsAllocs.forEach((p) => wasm2._free(p));\n }\n };\n var endProfiling = (sessionId) => {\n const wasm2 = getInstance();\n const session = activeSessions.get(sessionId);\n if (!session) {\n throw new Error("invalid session id");\n }\n const sessionHandle = session[0];\n const profileFileName = wasm2._OrtEndProfiling(sessionHandle);\n if (profileFileName === 0) {\n checkLastError("Can\'t get an profile file name.");\n }\n wasm2._OrtFree(profileFileName);\n };\n var extractTransferableBuffers = (tensors) => {\n const buffers = [];\n for (const tensor of tensors) {\n const data = tensor[2];\n if (!Array.isArray(data) && "buffer" in data) {\n buffers.push(data.buffer);\n }\n }\n return buffers;\n };\n\n // web/lib/wasm/proxy-worker/main.ts\n self.onmessage = (ev) => {\n const { type, in: message } = ev.data;\n try {\n switch (type) {\n case "init-wasm":\n initializeWebAssembly(message.wasm).then(\n () => {\n initRuntime(message).then(\n () => {\n postMessage({ type });\n },\n (err) => {\n postMessage({ type, err });\n }\n );\n },\n (err) => {\n postMessage({ type, err });\n }\n );\n break;\n case "init-ep": {\n const { epName, env: env3 } = message;\n initEp(env3, epName).then(\n () => {\n postMessage({ type });\n },\n (err) => {\n postMessage({ type, err });\n }\n );\n break;\n }\n case "copy-from": {\n const { buffer } = message;\n const bufferData = copyFromExternalBuffer(buffer);\n postMessage({ type, out: bufferData });\n break;\n }\n case "create": {\n const { model, options } = message;\n createSession(model, options).then(\n (sessionMetadata) => {\n postMessage({ type, out: sessionMetadata });\n },\n (err) => {\n postMessage({ type, err });\n }\n );\n break;\n }\n case "release":\n releaseSession(message);\n postMessage({ type });\n break;\n case "run": {\n const { sessionId, inputIndices, inputs, outputIndices, options } = message;\n run(sessionId, inputIndices, inputs, outputIndices, new Array(outputIndices.length).fill(null), options).then(\n (outputs) => {\n if (outputs.some((o) => o[3] !== "cpu")) {\n postMessage({ type, err: "Proxy does not support non-cpu tensor location." });\n } else {\n postMessage(\n { type, out: outputs },\n extractTransferableBuffers([...inputs, ...outputs])\n );\n }\n },\n (err) => {\n postMessage({ type, err });\n }\n );\n break;\n }\n case "end-profiling":\n endProfiling(message);\n postMessage({ type });\n break;\n default:\n }\n } catch (err) {\n postMessage({ type, err });\n }\n };\n})();\n/**\n * @license\n * Copyright 2021 Google LLC. All Rights Reserved.\n * Licensed under the Apache License, Version 2.0 (the "License");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an "AS IS" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n * =============================================================================\n */\n/**\n * @license\n * Copyright 2020 Google LLC. All Rights Reserved.\n * Licensed under the Apache License, Version 2.0 (the "License");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an "AS IS" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n * =============================================================================\n */\n/**\n * @license\n * Copyright 2019 Google LLC. All Rights Reserved.\n * Licensed under the Apache License, Version 2.0 (the "License");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an "AS IS" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n * =============================================================================\n */\n//# sourceMappingURL=data:application/json;base64,\n'; + } +}); + +// web/lib/wasm/proxy-wrapper.ts +var isProxy, proxyWorker, initializing2, initialized2, aborted2, initWasmCallbacks, queuedCallbacks, enqueueCallbacks, ensureWorker, onProxyWorkerMessage, scriptSrc, initializeWebAssemblyAndOrtRuntime, initializeOrtEp, copyFromExternalBuffer2, createSession2, releaseSession2, run2, endProfiling2; +var init_proxy_wrapper = __esm({ + "web/lib/wasm/proxy-wrapper.ts"() { + "use strict"; + init_esm(); + init_wasm_core_impl(); + init_wasm_factory(); + isProxy = () => !!env2.wasm.proxy && typeof document !== "undefined"; + initializing2 = false; + initialized2 = false; + aborted2 = false; + queuedCallbacks = /* @__PURE__ */ new Map(); + enqueueCallbacks = (type, callbacks) => { + const queue = queuedCallbacks.get(type); + if (queue) { + queue.push(callbacks); + } else { + queuedCallbacks.set(type, [callbacks]); + } + }; + ensureWorker = () => { + if (initializing2 || !initialized2 || aborted2 || !proxyWorker) { + throw new Error("worker not ready"); + } + }; + onProxyWorkerMessage = (ev) => { + switch (ev.data.type) { + case "init-wasm": + initializing2 = false; + if (ev.data.err) { + aborted2 = true; + initWasmCallbacks[1](ev.data.err); + } else { + initialized2 = true; + initWasmCallbacks[0](); + } + break; + case "init-ep": + case "copy-from": + case "create": + case "release": + case "run": + case "end-profiling": { + const callbacks = queuedCallbacks.get(ev.data.type); + if (ev.data.err) { + callbacks.shift()[1](ev.data.err); + } else { + callbacks.shift()[0](ev.data.out); + } + break; + } + default: + } + }; + scriptSrc = typeof document !== "undefined" ? document?.currentScript?.src : void 0; + initializeWebAssemblyAndOrtRuntime = async () => { + if (initialized2) { + return; + } + if (initializing2) { + throw new Error("multiple calls to 'initWasm()' detected."); + } + if (aborted2) { + throw new Error("previous call to 'initWasm()' failed."); + } + initializing2 = true; + if (isProxy()) { + if (env2.wasm.wasmPaths === void 0) { + if (scriptSrc && scriptSrc.indexOf("blob:") !== 0) { + env2.wasm.wasmPaths = scriptSrc.substr(0, +scriptSrc.lastIndexOf("/") + 1); + } + } + return new Promise((resolve, reject) => { + proxyWorker?.terminate(); + const workerUrl = URL.createObjectURL(new Blob( + [ + // This require() function is handled by esbuild plugin to load file content as string. + // eslint-disable-next-line @typescript-eslint/no-require-imports + require_main() + ], + { type: "text/javascript" } + )); + proxyWorker = new Worker(workerUrl, { name: "ort-wasm-proxy-worker" }); + proxyWorker.onerror = (ev) => reject(ev); + proxyWorker.onmessage = onProxyWorkerMessage; + URL.revokeObjectURL(workerUrl); + initWasmCallbacks = [resolve, reject]; + const message = { type: "init-wasm", in: env2 }; + proxyWorker.postMessage(message); + }); + } else { + try { + await initializeWebAssembly(env2.wasm); + await initRuntime(env2); + initialized2 = true; + } catch (e) { + aborted2 = true; + throw e; + } finally { + initializing2 = false; + } + } + }; + initializeOrtEp = async (epName) => { + if (isProxy()) { + ensureWorker(); + return new Promise((resolve, reject) => { + enqueueCallbacks("init-ep", [resolve, reject]); + const message = { type: "init-ep", in: { epName, env: env2 } }; + proxyWorker.postMessage(message); + }); + } else { + await initEp(env2, epName); + } + }; + copyFromExternalBuffer2 = async (buffer) => { + if (isProxy()) { + ensureWorker(); + return new Promise((resolve, reject) => { + enqueueCallbacks("copy-from", [resolve, reject]); + const message = { type: "copy-from", in: { buffer } }; + proxyWorker.postMessage(message, [buffer.buffer]); + }); + } else { + return copyFromExternalBuffer(buffer); + } + }; + createSession2 = async (model, options) => { + if (isProxy()) { + if (options?.preferredOutputLocation) { + throw new Error('session option "preferredOutputLocation" is not supported for proxy.'); + } + ensureWorker(); + return new Promise((resolve, reject) => { + enqueueCallbacks("create", [resolve, reject]); + const message = { type: "create", in: { model, options: { ...options } } }; + const transferable = []; + if (model instanceof Uint8Array) { + transferable.push(model.buffer); + } + proxyWorker.postMessage(message, transferable); + }); + } else { + return createSession(model, options); + } + }; + releaseSession2 = async (sessionId) => { + if (isProxy()) { + ensureWorker(); + return new Promise((resolve, reject) => { + enqueueCallbacks("release", [resolve, reject]); + const message = { type: "release", in: sessionId }; + proxyWorker.postMessage(message); + }); + } else { + releaseSession(sessionId); + } + }; + run2 = async (sessionId, inputIndices, inputs, outputIndices, outputs, options) => { + if (isProxy()) { + if (inputs.some((t) => t[3] !== "cpu")) { + throw new Error("input tensor on GPU is not supported for proxy."); + } + if (outputs.some((t) => t)) { + throw new Error("pre-allocated output tensor is not supported for proxy."); + } + ensureWorker(); + return new Promise((resolve, reject) => { + enqueueCallbacks("run", [resolve, reject]); + const serializableInputs = inputs; + const message = { type: "run", in: { sessionId, inputIndices, inputs: serializableInputs, outputIndices, options } }; + proxyWorker.postMessage(message, extractTransferableBuffers(serializableInputs)); + }); + } else { + return run(sessionId, inputIndices, inputs, outputIndices, outputs, options); + } + }; + endProfiling2 = async (sessionId) => { + if (isProxy()) { + ensureWorker(); + return new Promise((resolve, reject) => { + enqueueCallbacks("end-profiling", [resolve, reject]); + const message = { type: "end-profiling", in: sessionId }; + proxyWorker.postMessage(message); + }); + } else { + endProfiling(sessionId); + } + }; + } +}); + +// web/lib/wasm/session-handler-inference.ts +var encodeTensorMetadata, decodeTensorMetadata, OnnxruntimeWebAssemblySessionHandler; +var init_session_handler_inference = __esm({ + "web/lib/wasm/session-handler-inference.ts"() { + "use strict"; + init_esm(); + init_proxy_wrapper(); + init_wasm_common(); + init_wasm_utils_load_file(); + encodeTensorMetadata = (tensor, getName) => { + switch (tensor.location) { + case "cpu": + return [tensor.type, tensor.dims, tensor.data, "cpu"]; + case "gpu-buffer": + return [tensor.type, tensor.dims, { gpuBuffer: tensor.gpuBuffer }, "gpu-buffer"]; + default: + throw new Error(`invalid data location: ${tensor.location} for ${getName()}`); + } + }; + decodeTensorMetadata = (tensor) => { + switch (tensor[3]) { + case "cpu": + return new Tensor2(tensor[0], tensor[2], tensor[1]); + case "gpu-buffer": { + const dataType = tensor[0]; + if (!isGpuBufferSupportedType(dataType)) { + throw new Error(`not supported data type: ${dataType} for deserializing GPU tensor`); + } + const { gpuBuffer, download, dispose } = tensor[2]; + return Tensor2.fromGpuBuffer(gpuBuffer, { dataType, dims: tensor[1], download, dispose }); + } + default: + throw new Error(`invalid data location: ${tensor[3]}`); + } + }; + OnnxruntimeWebAssemblySessionHandler = class { + async fetchModelAndCopyToWasmMemory(path) { + return copyFromExternalBuffer2(await loadFile(path)); + } + async loadModel(pathOrBuffer, options) { + TRACE_FUNC_BEGIN(); + let model; + if (typeof pathOrBuffer === "string") { + if (typeof process !== "undefined" && process.versions && process.versions.node) { + model = await loadFile(pathOrBuffer); + } else { + model = await this.fetchModelAndCopyToWasmMemory(pathOrBuffer); + } + } else { + model = pathOrBuffer; + } + [this.sessionId, this.inputNames, this.outputNames] = await createSession2(model, options); + TRACE_FUNC_END(); + } + async dispose() { + return releaseSession2(this.sessionId); + } + async run(feeds, fetches, options) { + TRACE_FUNC_BEGIN(); + const inputArray = []; + const inputIndices = []; + Object.entries(feeds).forEach((kvp) => { + const name = kvp[0]; + const tensor = kvp[1]; + const index = this.inputNames.indexOf(name); + if (index === -1) { + throw new Error(`invalid input '${name}'`); + } + inputArray.push(tensor); + inputIndices.push(index); + }); + const outputArray = []; + const outputIndices = []; + Object.entries(fetches).forEach((kvp) => { + const name = kvp[0]; + const tensor = kvp[1]; + const index = this.outputNames.indexOf(name); + if (index === -1) { + throw new Error(`invalid output '${name}'`); + } + outputArray.push(tensor); + outputIndices.push(index); + }); + const inputs = inputArray.map((t, i) => encodeTensorMetadata(t, () => `input "${this.inputNames[inputIndices[i]]}"`)); + const outputs = outputArray.map( + (t, i) => t ? encodeTensorMetadata(t, () => `output "${this.outputNames[outputIndices[i]]}"`) : null + ); + const results = await run2(this.sessionId, inputIndices, inputs, outputIndices, outputs, options); + const resultMap = {}; + for (let i = 0; i < results.length; i++) { + resultMap[this.outputNames[outputIndices[i]]] = outputArray[i] ?? decodeTensorMetadata(results[i]); + } + TRACE_FUNC_END(); + return resultMap; + } + startProfiling() { + } + endProfiling() { + void endProfiling2(this.sessionId); + } + }; + } +}); + +// web/lib/backend-wasm.ts +var initializeFlags, OnnxruntimeWebAssemblyBackend; +var init_backend_wasm = __esm({ + "web/lib/backend-wasm.ts"() { + "use strict"; + init_node_os(); + init_esm(); + init_proxy_wrapper(); + init_session_handler_inference(); + initializeFlags = () => { + if (typeof env2.wasm.initTimeout !== "number" || env2.wasm.initTimeout < 0) { + env2.wasm.initTimeout = 0; + } + if (typeof env2.wasm.simd !== "boolean") { + env2.wasm.simd = true; + } + if (typeof env2.wasm.proxy !== "boolean") { + env2.wasm.proxy = false; + } + if (typeof env2.wasm.trace !== "boolean") { + env2.wasm.trace = false; + } + if (typeof env2.wasm.numThreads !== "number" || !Number.isInteger(env2.wasm.numThreads) || env2.wasm.numThreads <= 0) { + if (typeof self !== "undefined" && !self.crossOriginIsolated || typeof process !== "undefined" && process.versions && process.versions.node) { + env2.wasm.numThreads = 1; + } + const numCpuLogicalCores = typeof navigator === "undefined" ? cpus().length : navigator.hardwareConcurrency; + env2.wasm.numThreads = Math.min(4, Math.ceil((numCpuLogicalCores || 1) / 2)); + } + }; + OnnxruntimeWebAssemblyBackend = class { + /** + * This function initializes the WebAssembly backend. + * + * This function will be called only once for each backend name. It will be called the first time when + * `ort.InferenceSession.create()` is called with a registered backend name. + * + * @param backendName - the registered backend name. + */ + async init(backendName) { + initializeFlags(); + await initializeWebAssemblyAndOrtRuntime(); + await initializeOrtEp(backendName); + } + async createInferenceSessionHandler(pathOrBuffer, options) { + const handler = new OnnxruntimeWebAssemblySessionHandler(); + await handler.loadModel(pathOrBuffer, options); + return Promise.resolve(handler); + } + }; + } +}); + +// web/lib/backend-wasm-inference.ts +var backend_wasm_inference_exports = {}; +__export(backend_wasm_inference_exports, { + wasmBackend: () => wasmBackend +}); +var wasmBackend; +var init_backend_wasm_inference = __esm({ + "web/lib/backend-wasm-inference.ts"() { + "use strict"; + init_backend_wasm(); + wasmBackend = new OnnxruntimeWebAssemblyBackend(); + } +}); + +// web/lib/index.ts +init_esm(); +init_esm(); +init_esm(); + +// web/lib/version.ts +var version2 = "1.18.0"; + +// web/lib/index.ts +var lib_default = esm_exports; +if (false) { + const onnxjsBackend = null.onnxjsBackend; + registerBackend("webgl", onnxjsBackend, -10); +} +if (true) { + const wasmBackend2 = true ? (init_backend_wasm_inference(), __toCommonJS(backend_wasm_inference_exports)).wasmBackend : null.wasmBackend; + if (true) { + registerBackend("webgpu", wasmBackend2, 5); + registerBackend("webnn", wasmBackend2, 5); + } + registerBackend("cpu", wasmBackend2, 10); + registerBackend("wasm", wasmBackend2, 10); +} +Object.defineProperty(env2.versions, "web", { value: version2, enumerable: true }); +export { + InferenceSession2 as InferenceSession, + TRACE, + TRACE_FUNC_BEGIN, + TRACE_FUNC_END, + Tensor2 as Tensor, + TrainingSession2 as TrainingSession, + lib_default as default, + env2 as env, + registerBackend +}; +/** + * @license + * Copyright 2021 Google LLC. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ +/** + * @license + * Copyright 2020 Google LLC. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ +/** + * @license + * Copyright 2019 Google LLC. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ +//# sourceMappingURL=data:application/json;base64,