diff --git "a/assets/index-HiTDJoem.js" "b/assets/index-HiTDJoem.js"
--- "a/assets/index-HiTDJoem.js"
+++ "b/assets/index-HiTDJoem.js"
@@ -2350,4 +2350,4 @@ ${a}`,o=r.createShaderModule({code:s,label:t.name});Qe("verbose",()=>`[WebGPU] $
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================================
- */const z_=Object.freeze(Object.defineProperty({__proto__:null,get InferenceSession(){return ao},get TRACE(){return jn},get TRACE_FUNC_BEGIN(){return Ht},get TRACE_FUNC_END(){return Mt},get Tensor(){return ft},get TrainingSession(){return io},default:O_,get env(){return Fe},get registerBackend(){return Or}},Symbol.toStringTag,{value:"Module"})),R_=(t,e)=>{const r=typeof document<"u"?document.createElement("canvas"):new OffscreenCanvas(1,1);r.width=t.dims[3],r.height=t.dims[2];const n=r.getContext("2d");if(n!=null){let i,a;e?.tensorLayout!==void 0&&e.tensorLayout==="NHWC"?(i=t.dims[2],a=t.dims[3]):(i=t.dims[3],a=t.dims[2]);const s=e?.format!==void 0?e.format:"RGB",o=e?.norm;let u,l;o===void 0||o.mean===void 0?u=[255,255,255,255]:typeof o.mean=="number"?u=[o.mean,o.mean,o.mean,o.mean]:(u=[o.mean[0],o.mean[1],o.mean[2],0],o.mean[3]!==void 0&&(u[3]=o.mean[3])),o===void 0||o.bias===void 0?l=[0,0,0,0]:typeof o.bias=="number"?l=[o.bias,o.bias,o.bias,o.bias]:(l=[o.bias[0],o.bias[1],o.bias[2],0],o.bias[3]!==void 0&&(l[3]=o.bias[3]));const h=a*i;let f=0,m=h,c=h*2,y=-1;s==="RGBA"?(f=0,m=h,c=h*2,y=h*3):s==="RGB"?(f=0,m=h,c=h*2):s==="RBG"&&(f=0,c=h,m=h*2);for(let b=0;b{const r=typeof document<"u"?document.createElement("canvas").getContext("2d"):new OffscreenCanvas(1,1).getContext("2d");let n;if(r!=null){let i,a,s;e?.tensorLayout!==void 0&&e.tensorLayout==="NHWC"?(i=t.dims[2],a=t.dims[1],s=t.dims[3]):(i=t.dims[3],a=t.dims[2],s=t.dims[1]);const o=e!==void 0&&e.format!==void 0?e.format:"RGB",u=e?.norm;let l,h;u===void 0||u.mean===void 0?l=[255,255,255,255]:typeof u.mean=="number"?l=[u.mean,u.mean,u.mean,u.mean]:(l=[u.mean[0],u.mean[1],u.mean[2],255],u.mean[3]!==void 0&&(l[3]=u.mean[3])),u===void 0||u.bias===void 0?h=[0,0,0,0]:typeof u.bias=="number"?h=[u.bias,u.bias,u.bias,u.bias]:(h=[u.bias[0],u.bias[1],u.bias[2],0],u.bias[3]!==void 0&&(h[3]=u.bias[3]));const f=a*i;if(e!==void 0&&(e.format!==void 0&&s===4&&e.format!=="RGBA"||s===3&&e.format!=="RGB"&&e.format!=="BGR"))throw new Error("Tensor format doesn't match input tensor dims");const m=4;let c=0,y=1,b=2,v=3,C=0,x=f,T=f*2,I=-1;o==="RGBA"?(C=0,x=f,T=f*2,I=f*3):o==="RGB"?(C=0,x=f,T=f*2):o==="RBG"&&(C=0,T=f,x=f*2),n=r.createImageData(i,a);for(let A=0;A{if(t===void 0)throw new Error("Image buffer must be defined");if(e.height===void 0||e.width===void 0)throw new Error("Image height and width must be defined");if(e.tensorLayout==="NHWC")throw new Error("NHWC Tensor layout is not supported yet");const{height:r,width:n}=e,i=e.norm??{mean:255,bias:0};let a,s;typeof i.mean=="number"?a=[i.mean,i.mean,i.mean,i.mean]:a=[i.mean[0],i.mean[1],i.mean[2],i.mean[3]??255],typeof i.bias=="number"?s=[i.bias,i.bias,i.bias,i.bias]:s=[i.bias[0],i.bias[1],i.bias[2],i.bias[3]??0];const o=e.format!==void 0?e.format:"RGBA",u=e.tensorFormat!==void 0&&e.tensorFormat!==void 0?e.tensorFormat:"RGB",l=r*n,h=u==="RGBA"?new Float32Array(l*4):new Float32Array(l*3);let f=4,m=0,c=1,y=2,b=3,v=0,C=l,x=l*2,T=-1;o==="RGB"&&(f=3,m=0,c=1,y=2,b=-1),u==="RGBA"?T=l*3:u==="RBG"?(v=0,x=l,C=l*2):u==="BGR"&&(x=0,C=l,v=l*2);for(let A=0;A{const r=typeof HTMLImageElement<"u"&&t instanceof HTMLImageElement,n=typeof ImageData<"u"&&t instanceof ImageData,i=typeof ImageBitmap<"u"&&t instanceof ImageBitmap,a=typeof t=="string";let s,o=e??{};const u=()=>{if(typeof document<"u")return document.createElement("canvas");if(typeof OffscreenCanvas<"u")return new OffscreenCanvas(1,1);throw new Error("Canvas is not supported")},l=h=>h instanceof HTMLCanvasElement||h instanceof OffscreenCanvas?h.getContext("2d"):null;if(r){const h=u();h.width=t.width,h.height=t.height;const f=l(h);if(f!=null){let m=t.height,c=t.width;if(e!==void 0&&e.resizedHeight!==void 0&&e.resizedWidth!==void 0&&(m=e.resizedHeight,c=e.resizedWidth),e!==void 0){if(o=e,e.tensorFormat!==void 0)throw new Error("Image input config format must be RGBA for HTMLImageElement");o.tensorFormat="RGBA",o.height=m,o.width=c}else o.tensorFormat="RGBA",o.height=m,o.width=c;f.drawImage(t,0,0),s=f.getImageData(0,0,c,m).data}else throw new Error("Can not access image data")}else if(n){let h,f;if(e!==void 0&&e.resizedWidth!==void 0&&e.resizedHeight!==void 0?(h=e.resizedHeight,f=e.resizedWidth):(h=t.height,f=t.width),e!==void 0&&(o=e),o.format="RGBA",o.height=h,o.width=f,e!==void 0){const m=u();m.width=f,m.height=h;const c=l(m);if(c!=null)c.putImageData(t,0,0),s=c.getImageData(0,0,f,h).data;else throw new Error("Can not access image data")}else s=t.data}else if(i){if(e===void 0)throw new Error("Please provide image config with format for Imagebitmap");const h=u();h.width=t.width,h.height=t.height;const f=l(h);if(f!=null){const m=t.height,c=t.width;return f.drawImage(t,0,0,c,m),s=f.getImageData(0,0,c,m).data,o.height=m,o.width=c,Ts(s,o)}else throw new Error("Can not access image data")}else{if(a)return new Promise((h,f)=>{const m=u(),c=l(m);if(!t||!c)return f();const y=new Image;y.crossOrigin="Anonymous",y.src=t,y.onload=()=>{m.width=y.width,m.height=y.height,c.drawImage(y,0,0,m.width,m.height);const b=c.getImageData(0,0,m.width,m.height);o.height=m.height,o.width=m.width,h(Ts(b.data,o))}});throw new Error("Input data provided is not supported - aborted tensor creation")}if(s!==void 0)return Ts(s,o);throw new Error("Input data provided is not supported - aborted tensor creation")},D_=(t,e)=>{const{width:r,height:n,download:i,dispose:a}=e,s=[1,n,r,4];return new Qt({location:"texture",type:"float32",texture:t,dims:s,download:i,dispose:a})},N_=(t,e)=>{const{dataType:r,dims:n,download:i,dispose:a}=e;return new Qt({location:"gpu-buffer",type:r??"float32",gpuBuffer:t,dims:n,download:i,dispose:a})},F_=(t,e,r)=>new Qt({location:"cpu-pinned",type:t,data:e,dims:r??[e.length]}),ln=new Map([["float32",Float32Array],["uint8",Uint8Array],["int8",Int8Array],["uint16",Uint16Array],["int16",Int16Array],["int32",Int32Array],["bool",Uint8Array],["float64",Float64Array],["uint32",Uint32Array]]),ii=new Map([[Float32Array,"float32"],[Uint8Array,"uint8"],[Int8Array,"int8"],[Uint16Array,"uint16"],[Int16Array,"int16"],[Int32Array,"int32"],[Float64Array,"float64"],[Uint32Array,"uint32"]]);let wc=!1;const L_=()=>{if(!wc){wc=!0;const t=typeof BigInt64Array<"u"&&BigInt64Array.from,e=typeof BigUint64Array<"u"&&BigUint64Array.from,r=typeof Float16Array<"u"&&Float16Array.from;t&&(ln.set("int64",BigInt64Array),ii.set(BigInt64Array,"int64")),e&&(ln.set("uint64",BigUint64Array),ii.set(BigUint64Array,"uint64")),r?(ln.set("float16",Float16Array),ii.set(Float16Array,"float16")):ln.set("float16",Uint16Array)}},W_=t=>{let e=1;for(let r=0;r{switch(t.location){case"cpu":return new Qt(t.type,t.data,e);case"cpu-pinned":return new Qt({location:"cpu-pinned",data:t.data,type:t.type,dims:e});case"texture":return new Qt({location:"texture",texture:t.texture,type:t.type,dims:e});case"gpu-buffer":return new Qt({location:"gpu-buffer",gpuBuffer:t.gpuBuffer,type:t.type,dims:e});default:throw new Error(`tensorReshape: tensor location ${t.location} is not supported`)}};let Qt=class{constructor(e,r,n){L_();let i,a;if(typeof e=="object"&&"location"in e)switch(this.dataLocation=e.location,i=e.type,a=e.dims,e.location){case"cpu-pinned":{const o=ln.get(i);if(!o)throw new TypeError(`unsupported type "${i}" to create tensor from pinned buffer`);if(!(e.data instanceof o))throw new TypeError(`buffer should be of type ${o.name}`);this.cpuData=e.data;break}case"texture":{if(i!=="float32")throw new TypeError(`unsupported type "${i}" to create tensor from texture`);this.gpuTextureData=e.texture,this.downloader=e.download,this.disposer=e.dispose;break}case"gpu-buffer":{if(i!=="float32"&&i!=="float16"&&i!=="int32"&&i!=="int64"&&i!=="uint32"&&i!=="uint8"&&i!=="bool")throw new TypeError(`unsupported type "${i}" to create tensor from gpu buffer`);this.gpuBufferData=e.gpuBuffer,this.downloader=e.download,this.disposer=e.dispose;break}default:throw new Error(`Tensor constructor: unsupported location '${this.dataLocation}'`)}else{let o,u;if(typeof e=="string")if(i=e,u=n,e==="string"){if(!Array.isArray(r))throw new TypeError("A string tensor's data must be a string array.");o=r}else{const l=ln.get(e);if(l===void 0)throw new TypeError(`Unsupported tensor type: ${e}.`);if(Array.isArray(r)){if(e==="float16"&&l===Uint16Array)throw new TypeError("Creating a float16 tensor from number array is not supported. Please use Uint16Array as data.");e==="uint64"||e==="int64"?o=l.from(r,BigInt):o=l.from(r)}else if(r instanceof l)o=r;else throw new TypeError(`A ${i} tensor's data must be type of ${l}`)}else if(u=r,Array.isArray(e)){if(e.length===0)throw new TypeError("Tensor type cannot be inferred from an empty array.");const l=typeof e[0];if(l==="string")i="string",o=e;else if(l==="boolean")i="bool",o=Uint8Array.from(e);else throw new TypeError(`Invalid element type of data array: ${l}.`)}else{const l=ii.get(e.constructor);if(l===void 0)throw new TypeError(`Unsupported type for tensor data: ${e.constructor}.`);i=l,o=e}if(u===void 0)u=[o.length];else if(!Array.isArray(u))throw new TypeError("A tensor's dims must be a number array");a=u,this.cpuData=o,this.dataLocation="cpu"}const s=W_(a);if(this.cpuData&&s!==this.cpuData.length)throw new Error(`Tensor's size(${s}) does not match data length(${this.cpuData.length}).`);this.type=i,this.dims=a,this.size=s}static async fromImage(e,r){return B_(e,r)}static fromTexture(e,r){return D_(e,r)}static fromGpuBuffer(e,r){return N_(e,r)}static fromPinnedBuffer(e,r,n){return F_(e,r,n)}toDataURL(e){return R_(this,e)}toImageData(e){return P_(this,e)}get data(){if(this.ensureValid(),!this.cpuData)throw new Error("The data is not on CPU. Use `getData()` to download GPU data to CPU, or use `texture` or `gpuBuffer` property to access the GPU data directly.");return this.cpuData}get location(){return this.dataLocation}get texture(){if(this.ensureValid(),!this.gpuTextureData)throw new Error("The data is not stored as a WebGL texture.");return this.gpuTextureData}get gpuBuffer(){if(this.ensureValid(),!this.gpuBufferData)throw new Error("The data is not stored as a WebGPU buffer.");return this.gpuBufferData}async getData(e){switch(this.ensureValid(),this.dataLocation){case"cpu":case"cpu-pinned":return this.data;case"texture":case"gpu-buffer":{if(!this.downloader)throw new Error("The current tensor is not created with a specified data downloader.");if(this.isDownloading)throw new Error("The current tensor is being downloaded.");try{this.isDownloading=!0;const r=await this.downloader();return this.downloader=void 0,this.dataLocation="cpu",this.cpuData=r,e&&this.disposer&&(this.disposer(),this.disposer=void 0),r}finally{this.isDownloading=!1}}default:throw new Error(`cannot get data from location: ${this.dataLocation}`)}}dispose(){if(this.isDownloading)throw new Error("The current tensor is being downloaded.");this.disposer&&(this.disposer(),this.disposer=void 0),this.cpuData=void 0,this.gpuTextureData=void 0,this.gpuBufferData=void 0,this.downloader=void 0,this.isDownloading=void 0,this.dataLocation="none"}ensureValid(){if(this.dataLocation==="none")throw new Error("The tensor is disposed.")}reshape(e){if(this.ensureValid(),this.downloader||this.disposer)throw new Error("Cannot reshape a tensor that owns GPU resource.");return U_(this,e)}};const V_=Qt,Hn=[];let Ks,Xn;Pr.IS_NODE_ENV?(Xn=Ye??qg,Hn.push("cpu"),Ks=["cpu"]):(Xn=z_,Pr.IS_WEBGPU_AVAILABLE&&Hn.push("webgpu"),Hn.push("wasm"),Ks=["wasm"]);const G_=Xn.InferenceSession;function H_(t){let e=Ks;if(t){if(!Hn.includes(t))throw new Error(`Unsupported device: "${t}". Should be one of: ${Hn.join(", ")}.`);e=[t]}return e}async function lm(t,e){return await G_.create(t,e)}function dm(t){return t instanceof Xn.Tensor}const Mr=Xn?.env;Mr?.wasm&&(Mr.wasm.wasmPaths="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.18.0/dist/",Mr.wasm.proxy=!Pr.IS_WEBWORKER_ENV,(typeof crossOriginIsolated>"u"||!crossOriginIsolated)&&(Mr.wasm.numThreads=1),typeof navigator<"u"&&/iP(hone|od|ad).+16_4.+AppleWebKit/.test(navigator.userAgent)&&(Mr.wasm.simd=!1));function q_(){return Mr?.wasm?.proxy}bt.backends.onnx=Mr;const an=async(t,e,r)=>{const n=await lm(new Uint8Array(t),e);return async i=>{const a=Object.fromEntries(Object.entries(i).map(([o,u])=>[o,u.ort_tensor])),s=await n.run(a);return Array.isArray(r)?r.map(o=>new ce(s[o])):new ce(s[r])}};class _i{static session_options={};static get bilinear_interpolate_4d(){return this._bilinear_interpolate_4d||(this._bilinear_interpolate_4d=an([8,9,18,0,58,128,1,10,40,10,1,120,10,0,10,0,10,1,115,18,1,121,34,6,82,101,115,105,122,101,42,17,10,4,109,111,100,101,34,6,108,105,110,101,97,114,160,1,3,18,1,114,90,31,10,1,120,18,26,10,24,8,1,18,20,10,3,18,1,98,10,3,18,1,99,10,3,18,1,104,10,3,18,1,119,90,15,10,1,115,18,10,10,8,8,7,18,4,10,2,8,4,98,31,10,1,121,18,26,10,24,8,1,18,20,10,3,18,1,98,10,3,18,1,99,10,3,18,1,104,10,3,18,1,119,66,2,16,20],this.session_options,"y")),this._bilinear_interpolate_4d}static get bicubic_interpolate_4d(){return this._bicubic_interpolate_4d||(this._bicubic_interpolate_4d=an([8,9,18,0,58,127,10,39,10,1,120,10,0,10,0,10,1,115,18,1,121,34,6,82,101,115,105,122,101,42,16,10,4,109,111,100,101,34,5,99,117,98,105,99,160,1,3,18,1,114,90,31,10,1,120,18,26,10,24,8,1,18,20,10,3,18,1,98,10,3,18,1,99,10,3,18,1,104,10,3,18,1,119,90,15,10,1,115,18,10,10,8,8,7,18,4,10,2,8,4,98,31,10,1,121,18,26,10,24,8,1,18,20,10,3,18,1,98,10,3,18,1,99,10,3,18,1,104,10,3,18,1,119,66,2,16,20],this.session_options,"y")),this._bicubic_interpolate_4d}static get matmul(){return this._matmul||(this._matmul=an([8,9,18,0,58,55,10,17,10,1,97,10,1,98,18,1,99,34,6,77,97,116,77,117,108,18,1,114,90,9,10,1,97,18,4,10,2,8,1,90,9,10,1,98,18,4,10,2,8,1,98,9,10,1,99,18,4,10,2,8,1,66,2,16,20],this.session_options,"c")),this._matmul}static get stft(){return this._stft||(this._stft=an([8,7,18,0,58,148,1,10,38,10,1,115,10,1,106,10,1,119,10,1,108,18,1,111,34,4,83,84,70,84,42,15,10,8,111,110,101,115,105,100,101,100,24,1,160,1,2,18,1,115,90,26,10,1,115,18,21,10,19,8,1,18,15,10,3,18,1,98,10,3,18,1,115,10,3,18,1,99,90,11,10,1,106,18,6,10,4,8,7,18,0,90,16,10,1,119,18,11,10,9,8,1,18,5,10,3,18,1,119,90,11,10,1,108,18,6,10,4,8,7,18,0,98,31,10,1,111,18,26,10,24,8,1,18,20,10,3,18,1,98,10,3,18,1,102,10,3,18,1,100,10,3,18,1,99,66,2,16,17],this.session_options,"o")),this._stft}static get rfft(){return this._rfft||(this._rfft=an([8,9,18,0,58,97,10,33,10,1,120,10,0,10,1,97,18,1,121,34,3,68,70,84,42,15,10,8,111,110,101,115,105,100,101,100,24,1,160,1,2,18,1,100,90,21,10,1,120,18,16,10,14,8,1,18,10,10,3,18,1,115,10,3,18,1,99,90,11,10,1,97,18,6,10,4,8,7,18,0,98,21,10,1,121,18,16,10,14,8,1,18,10,10,3,18,1,115,10,3,18,1,99,66,2,16,20],this.session_options,"y")),this._rfft}static get top_k(){return this._top_k||(this._top_k=an([8,10,18,0,58,73,10,18,10,1,120,10,1,107,18,1,118,18,1,105,34,4,84,111,112,75,18,1,116,90,9,10,1,120,18,4,10,2,8,1,90,15,10,1,107,18,10,10,8,8,7,18,4,10,2,8,1,98,9,10,1,118,18,4,10,2,8,1,98,9,10,1,105,18,4,10,2,8,7,66,2,16,21],this.session_options,["v","i"])),this._top_k}}const bc=Object.freeze({float32:Float32Array,float16:Uint16Array,float64:Float64Array,string:Array,int8:Int8Array,uint8:Uint8Array,int16:Int16Array,uint16:Uint16Array,int32:Int32Array,uint32:Uint32Array,int64:BigInt64Array,uint64:BigUint64Array,bool:Uint8Array});class ce{get dims(){return this.ort_tensor.dims}set dims(e){this.ort_tensor.dims=e}get type(){return this.ort_tensor.type}get data(){return this.ort_tensor.data}get size(){return this.ort_tensor.size}get location(){return this.ort_tensor.location}ort_tensor;constructor(...e){return dm(e[0])?this.ort_tensor=e[0]:this.ort_tensor=new V_(e[0],e[1],e[2]),new Proxy(this,{get:(r,n)=>{if(typeof n=="string"){let i=Number(n);if(Number.isInteger(i))return r._getitem(i)}return r[n]},set:(r,n,i)=>r[n]=i})}dispose(){this.ort_tensor.dispose()}*[Symbol.iterator](){const[e,...r]=this.dims;if(r.length>0){const n=r.reduce((i,a)=>i*a);for(let i=0;i0){const i=n.reduce((a,s)=>a*s);return this._subarray(e,i,n)}else return new ce(this.type,[this.data[e]],n)}indexOf(e){const r=this.data;for(let n=0;nm)throw new Error(`Invalid slice: ${h}`);let c=[Math.max(f,0),Math.min(m,this.dims[l])];n.push(c),r.push(c[1]-c[0])}else throw new Error(`Invalid slice: ${h}`)}let i=n.map(([l,h])=>h-l),a=i.reduce((l,h)=>l*h);const s=this.data;let o=new s.constructor(a);const u=this.stride();for(let l=0;l=0;--f){const c=i[f];h+=(m%c+n[f][0])*u[f],m=Math.floor(m/c)}o[l]=s[h]}return new ce(this.type,o,r)}permute(...e){return K_(this,e)}transpose(...e){return this.permute(...e)}sum(e=null,r=!1){return this.norm(1,e,r)}norm(e="fro",r=null,n=!1){if(e==="fro")e=2;else if(typeof e=="string")throw Error(`Unsupported norm: ${e}`);const i=this.data;if(r===null){let o=i.reduce((u,l)=>u+l**e,0)**(1/e);return new ce(this.type,[o],[])}r=Xt(r,this.dims.length);const a=this.dims.slice();a[r]=1;const s=new i.constructor(i.length/this.dims[r]);for(let o=0;o=0;--l){const m=this.dims[l];if(l!==r){const c=h%m;u+=c*f,f*=a[l]}h=Math.floor(h/m)}s[u]+=i[o]**e}if(e!==1)for(let o=0;o=0;--o){const h=this.dims[o];if(o!==r){const f=u%h;s+=f*l,l*=this.dims[o]}u=Math.floor(u/h)}i[a]/=n.data[s]}return this}normalize(e=2,r=1){return this.clone().normalize_(e,r)}stride(){return J_(this.dims)}squeeze(e=null){return new ce(this.type,this.data,$c(this.dims,e))}squeeze_(e=null){return this.dims=$c(this.dims,e),this}unsqueeze(e=null){return new ce(this.type,this.data,xc(this.dims,e))}unsqueeze_(e=null){return this.dims=xc(this.dims,e),this}flatten_(e=0,r=-1){r=(r+this.dims.length)%this.dims.length;let n=this.dims.slice(0,e),i=this.dims.slice(e,r+1),a=this.dims.slice(r+1);return this.dims=[...n,i.reduce((s,o)=>s*o,1),...a],this}flatten(e=0,r=-1){return this.clone().flatten_(e,r)}view(...e){let r=-1;for(let n=0;ns!==r?i*a:i,1);e[r]=this.data.length/n}return new ce(this.type,this.data,e)}neg_(){const e=this.data;for(let r=0;ra*s);if(r!==n)throw Error(`cannot reshape array of size ${r} into shape (${e})`);let i=t;for(let a=e.length-1;a>=0;a--)i=i.reduce((s,o)=>{let u=s[s.length-1];return u.lengthr!==1):typeof e=="number"?t[e]===1&&t.splice(e,1):Array.isArray(e)&&(t=t.filter((r,n)=>r!==1||!e.includes(n))),t}function xc(t,e){return e=Xt(e,t.length+1),t=t.slice(),t.splice(e,0,1),t}function Xt(t,e,r=null,n=!0){if(n&&(t<-e||t>=e))throw new Error(`IndexError: index ${t} is out of bounds for dimension${r===null?"":" "+r} with size ${e}`);return t<0&&(t=(t%e+e)%e),t}function Jt(t,e=0){e=Xt(e,t[0].dims.length);const r=t[0].dims.slice();r[e]=t.reduce((s,o)=>s+o.dims[e],0);const n=r.reduce((s,o)=>s*o,1),i=new t[0].data.constructor(n),a=t[0].type;if(e===0){let s=0;for(let o of t)i.set(o.data,s),s+=o.data.length}else{let s=0;for(let o=0;o=0;--f){const y=u.dims[f];let b=m%y;f===e&&(b+=s),h+=b*c,c*=r[f],m=Math.floor(m/y)}i[h]=u.data[l]}s+=u.dims[e]}}return new ce(a,i,r)}function Qn(t,e=0){return Jt(t.map(r=>r.unsqueeze(e)),e)}function X_(t,e=null,r=1,n=!1){if(e===null){const l=t.data.reduce((c,y)=>c+y,0)/t.data.length,h=Math.sqrt(t.data.reduce((c,y)=>c+(y-l)**2,0)/(t.data.length-r)),f=new ce(t.type,[l],[]);return[new ce(t.type,[h],[]),f]}e=Xt(e,t.dims.length);const i=xo(t,e,n),a=t.dims.slice();a[e]=1;const s=new t.data.constructor(t.data.length/t.dims[e]);for(let u=0;u=0;--h){const c=t.dims[h];if(h!==e){const y=f%c;l+=y*m,m*=a[h]}f=Math.floor(f/c)}s[l]+=(t.data[u]-i.data[l])**2}for(let u=0;us+o,0);return new ce(t.type,[a/t.data.length],[])}e=Xt(e,t.dims.length);const n=t.dims.slice();n[e]=1;const i=new t.data.constructor(t.data.length/t.dims[e]);for(let a=0;a=0;--o){const h=t.dims[o];if(o!==e){const f=u%h;s+=f*l,l*=n[o]}u=Math.floor(u/h)}i[s]+=t.data[a]}if(t.dims[e]!==1)for(let a=0;a0||o>0;)switch(u.push(s-1),l.push(o-1),a[s][o].item()){case 0:--s,--o;break;case 1:--s;break;case 2:--o;break;default:throw new Error(`Internal error in dynamic time warping. Unexpected trace[${s}, ${o}]. Please file a bug report.`)}return u.reverse(),l.reverse(),[u,l]}function J_(t){const e=new Array(t.length);for(let r=t.length-1,n=1;r>=0;--r)e[r]=n,n*=t[r];return e}function So(t,e,r,n){const i=t.reduce((a,s)=>a*s,1);return new ce(r,new n(i).fill(e),t)}function Z_(t,e){let r,n;return r="float32",n=Float32Array,So(t,e,r,n)}function ey(t,e){return Z_(t.dims,e)}function aa(t){return So(t,1n,"int64",BigInt64Array)}function ty(t){return aa(t.dims)}function ry(t){return So(t,0n,"int64",BigInt64Array)}function ny(t){return ry(t.dims)}var Ce=Object.freeze({Text:"Text",NumericLiteral:"NumericLiteral",BooleanLiteral:"BooleanLiteral",StringLiteral:"StringLiteral",Identifier:"Identifier",Equals:"Equals",OpenParen:"OpenParen",CloseParen:"CloseParen",OpenStatement:"OpenStatement",CloseStatement:"CloseStatement",OpenExpression:"OpenExpression",CloseExpression:"CloseExpression",OpenSquareBracket:"OpenSquareBracket",CloseSquareBracket:"CloseSquareBracket",OpenCurlyBracket:"OpenCurlyBracket",CloseCurlyBracket:"CloseCurlyBracket",Comma:"Comma",Dot:"Dot",Colon:"Colon",Pipe:"Pipe",CallOperator:"CallOperator",AdditiveBinaryOperator:"AdditiveBinaryOperator",MultiplicativeBinaryOperator:"MultiplicativeBinaryOperator",ComparisonBinaryOperator:"ComparisonBinaryOperator",UnaryOperator:"UnaryOperator",Set:"Set",If:"If",For:"For",In:"In",Is:"Is",NotIn:"NotIn",Else:"Else",EndIf:"EndIf",ElseIf:"ElseIf",EndFor:"EndFor",And:"And",Or:"Or",Not:"UnaryOperator"});Object.freeze({set:Ce.Set,for:Ce.For,in:Ce.In,is:Ce.Is,if:Ce.If,else:Ce.Else,endif:Ce.EndIf,elif:Ce.ElseIf,endfor:Ce.EndFor,and:Ce.And,or:Ce.Or,not:Ce.Not,"not in":Ce.NotIn,true:Ce.BooleanLiteral,false:Ce.BooleanLiteral});Ce.OpenStatement,Ce.CloseStatement,Ce.OpenExpression,Ce.CloseExpression,Ce.OpenParen,Ce.CloseParen,Ce.OpenCurlyBracket,Ce.CloseCurlyBracket,Ce.OpenSquareBracket,Ce.CloseSquareBracket,Ce.Comma,Ce.Dot,Ce.Colon,Ce.Pipe,Ce.ComparisonBinaryOperator,Ce.ComparisonBinaryOperator,Ce.ComparisonBinaryOperator,Ce.ComparisonBinaryOperator,Ce.ComparisonBinaryOperator,Ce.ComparisonBinaryOperator,Ce.AdditiveBinaryOperator,Ce.AdditiveBinaryOperator,Ce.MultiplicativeBinaryOperator,Ce.MultiplicativeBinaryOperator,Ce.MultiplicativeBinaryOperator,Ce.Equals;const hm=[["en","english"],["zh","chinese"],["de","german"],["es","spanish"],["ru","russian"],["ko","korean"],["fr","french"],["ja","japanese"],["pt","portuguese"],["tr","turkish"],["pl","polish"],["ca","catalan"],["nl","dutch"],["ar","arabic"],["sv","swedish"],["it","italian"],["id","indonesian"],["hi","hindi"],["fi","finnish"],["vi","vietnamese"],["he","hebrew"],["uk","ukrainian"],["el","greek"],["ms","malay"],["cs","czech"],["ro","romanian"],["da","danish"],["hu","hungarian"],["ta","tamil"],["no","norwegian"],["th","thai"],["ur","urdu"],["hr","croatian"],["bg","bulgarian"],["lt","lithuanian"],["la","latin"],["mi","maori"],["ml","malayalam"],["cy","welsh"],["sk","slovak"],["te","telugu"],["fa","persian"],["lv","latvian"],["bn","bengali"],["sr","serbian"],["az","azerbaijani"],["sl","slovenian"],["kn","kannada"],["et","estonian"],["mk","macedonian"],["br","breton"],["eu","basque"],["is","icelandic"],["hy","armenian"],["ne","nepali"],["mn","mongolian"],["bs","bosnian"],["kk","kazakh"],["sq","albanian"],["sw","swahili"],["gl","galician"],["mr","marathi"],["pa","punjabi"],["si","sinhala"],["km","khmer"],["sn","shona"],["yo","yoruba"],["so","somali"],["af","afrikaans"],["oc","occitan"],["ka","georgian"],["be","belarusian"],["tg","tajik"],["sd","sindhi"],["gu","gujarati"],["am","amharic"],["yi","yiddish"],["lo","lao"],["uz","uzbek"],["fo","faroese"],["ht","haitian creole"],["ps","pashto"],["tk","turkmen"],["nn","nynorsk"],["mt","maltese"],["sa","sanskrit"],["lb","luxembourgish"],["my","myanmar"],["bo","tibetan"],["tl","tagalog"],["mg","malagasy"],["as","assamese"],["tt","tatar"],["haw","hawaiian"],["ln","lingala"],["ha","hausa"],["ba","bashkir"],["jw","javanese"],["su","sundanese"]],ks=new Map(hm),ay=new Map([...hm.map(([t,e])=>[e,t]),["burmese","my"],["valencian","ca"],["flemish","nl"],["haitian","ht"],["letzeburgesch","lb"],["pushto","ps"],["panjabi","pa"],["moldavian","ro"],["moldovan","ro"],["sinhalese","si"],["castilian","es"]]);function iy(t){t=t.toLowerCase();let e=ay.get(t);if(e===void 0)if(ks.has(t))e=t;else{const n=t.length===2?ks.keys():ks.values();throw new Error(`Language "${t}" is not supported. Must be one of: ${JSON.stringify(n)}`)}return e}const sy=(()=>{const t=[...Array.from({length:94},(i,a)=>a+33),...Array.from({length:12},(i,a)=>a+161),...Array.from({length:82},(i,a)=>a+174)],e=t.slice();let r=0;for(let i=0;i<256;++i)t.includes(i)||(t.push(i),e.push(256+r),r+=1);const n=e.map(i=>String.fromCharCode(i));return Object.fromEntries(t.map((i,a)=>[i,n[a]]))})();Zg(sy);async function oy(t,e){return await on(t,"config.json",!0,e)}function sn(t){const e={};let r={};switch(t.model_type){case"llava":case"paligemma":r=sn(t.text_config);break;case"moondream1":r=sn(t.phi_config);break;case"musicgen":r=sn(t.decoder);break;case"gpt2":case"gptj":case"codegen":case"gpt_bigcode":e.num_heads="n_head",e.num_layers="n_layer",e.hidden_size="n_embd";break;case"gpt_neox":case"stablelm":case"opt":case"phi":case"phi3":case"falcon":e.num_heads="num_attention_heads",e.num_layers="num_hidden_layers",e.hidden_size="hidden_size";break;case"llama":case"cohere":case"mistral":case"starcoder2":case"qwen2":e.num_heads="num_key_value_heads",e.num_layers="num_hidden_layers",e.hidden_size="hidden_size",e.num_attention_heads="num_attention_heads";break;case"gemma":e.num_heads="num_key_value_heads",e.num_layers="num_hidden_layers",e.dim_kv="head_dim";break;case"openelm":e.num_heads="num_kv_heads",e.num_layers="num_transformer_layers",e.dim_kv="head_dim";break;case"gpt_neo":e.num_heads="num_heads",e.num_layers="num_layers",e.hidden_size="hidden_size";break;case"bloom":e.num_heads="n_head",e.num_layers="n_layer",e.hidden_size="hidden_size";break;case"mpt":e.num_heads="n_heads",e.num_layers="n_layers",e.hidden_size="d_model";break;case"t5":case"mt5":case"longt5":e.num_decoder_layers="num_decoder_layers",e.num_decoder_heads="num_heads",e.decoder_dim_kv="d_kv",e.num_encoder_layers="num_layers",e.num_encoder_heads="num_heads",e.encoder_dim_kv="d_kv";break;case"bart":case"mbart":case"marian":case"whisper":case"m2m_100":case"blenderbot":case"blenderbot-small":e.num_decoder_layers="decoder_layers",e.num_decoder_heads="decoder_attention_heads",e.decoder_hidden_size="d_model",e.num_encoder_layers="encoder_layers",e.num_encoder_heads="encoder_attention_heads",e.encoder_hidden_size="d_model";break;case"speecht5":e.num_decoder_layers="decoder_layers",e.num_decoder_heads="decoder_attention_heads",e.decoder_hidden_size="hidden_size",e.num_encoder_layers="encoder_layers",e.num_encoder_heads="encoder_attention_heads",e.encoder_hidden_size="hidden_size";break;case"trocr":e.num_encoder_layers=e.num_decoder_layers="decoder_layers",e.num_encoder_heads=e.num_decoder_heads="decoder_attention_heads",e.encoder_hidden_size=e.decoder_hidden_size="d_model";break;case"musicgen_decoder":e.num_encoder_layers=e.num_decoder_layers="num_hidden_layers",e.num_encoder_heads=e.num_decoder_heads="num_attention_heads",e.encoder_hidden_size=e.decoder_hidden_size="hidden_size";break;case"vision-encoder-decoder":const i=sn(t.encoder),a=sn(t.decoder),s="num_decoder_layers"in a,o={};return s?(o.num_decoder_layers=a.num_layers,o.num_decoder_heads=a.num_heads,o.decoder_hidden_size=a.hidden_size,o.num_encoder_layers=i.num_layers,o.num_encoder_heads=i.num_heads,o.encoder_hidden_size=i.hidden_size):(o.num_layers=a.num_layers,o.num_heads=a.num_heads,o.hidden_size=a.hidden_size),o}const n={...r,...zr(t,["model_type","multi_query","is_encoder_decoder"])};for(const i in e)n[i]=t[e[i]];return n}function fm(t,{prefix:e="past_key_values",encoder_add_pkv:r=!0}={}){const n={},i=t.normalized_config,a=1;if(i.is_encoder_decoder&&r){const s=i.encoder_dim_kv??i.encoder_hidden_size/i.num_encoder_heads,o=i.decoder_dim_kv??i.decoder_hidden_size/i.num_decoder_heads,u=[a,i.num_encoder_heads,0,s],l=[a,i.num_decoder_heads,0,o];for(let h=0;h=1&&s[s.length-1]>=this.timestamp_begin,u=s.length<2||s[s.length-2]>=this.timestamp_begin;if(o&&(u?a.subarray(this.timestamp_begin).fill(-1/0):a.subarray(0,this.eos_token_id).fill(-1/0)),e[n].length===this.begin_index&&this.max_initial_timestamp_index!==null){const m=this.timestamp_begin+this.max_initial_timestamp_index;a.subarray(m+1).fill(-1/0)}const l=o0(a),h=Math.log(l.subarray(this.timestamp_begin).map(Math.exp).reduce((m,c)=>m+c)),f=_r(l.subarray(0,this.timestamp_begin))[0];h>f&&a.subarray(0,this.timestamp_begin).fill(-1/0)}return r}}class fy extends Zt{constructor(e){super(),this.no_repeat_ngram_size=e}getNgrams(e){const r=e.length,n=[];for(let a=0;a1 to use the classifier free guidance processor, got guidance scale ${e}.`);this.guidance_scale=e}_call(e,r){if(r.dims[0]!==2*e.length)throw new Error(`Logits should have twice the batch size of the input ids, the first half of batches corresponding to the conditional inputs, and the second half of batches corresponding to the unconditional inputs. Got batch size ${r.dims[0]} for the logits and ${e.length} for the input ids.`);const n=e.length,i=r.slice([0,n],null),a=r.slice([n,r.dims[0]],null);for(let s=0;s1)throw new Error(`\`top_p\` must be a float > 0 and < 1, but is ${e}`);if(!Number.isInteger(n)||n<1)throw new Error(`\`min_tokens_to_keep\` must be a positive integer, but is ${n}`);this.top_p=e,this.filter_value=r,this.min_tokens_to_keep=n}}class $y extends Co{constructor(e,{filter_value:r=-1/0,min_tokens_to_keep:n=1}={}){if(super(),!Number.isInteger(e)||e<0)throw new Error(`\`top_k\` must be a positive integer, but is ${e}`);this.top_k=Math.max(e,n),this.filter_value=r}}class gm{max_length=20;max_new_tokens=null;min_length=0;min_new_tokens=null;early_stopping=!1;max_time=null;do_sample=!1;num_beams=1;num_beam_groups=1;penalty_alpha=null;use_cache=!0;temperature=1;top_k=50;top_p=1;typical_p=1;epsilon_cutoff=0;eta_cutoff=0;diversity_penalty=0;repetition_penalty=1;encoder_repetition_penalty=1;length_penalty=1;no_repeat_ngram_size=0;bad_words_ids=null;force_words_ids=null;renormalize_logits=!1;constraints=null;forced_bos_token_id=null;forced_eos_token_id=null;remove_invalid_values=!1;exponential_decay_length_penalty=null;suppress_tokens=null;begin_suppress_tokens=null;forced_decoder_ids=null;guidance_scale=null;num_return_sequences=1;output_attentions=!1;output_hidden_states=!1;output_scores=!1;return_dict_in_generate=!1;pad_token_id=null;bos_token_id=null;eos_token_id=null;encoder_no_repeat_ngram_size=0;decoder_start_token_id=null;generation_kwargs={};constructor(e){Object.assign(this,zr(e,Object.getOwnPropertyNames(this)))}}class Eo extends or{_call(e,r){throw Error("StoppingCriteria needs to be subclassed")}}class To extends or{constructor(){super(),this.criteria=[]}push(e){this.criteria.push(e)}extend(e){e instanceof To?e=e.criteria:e instanceof Eo&&(e=[e]),this.criteria.push(...e)}_call(e,r){const n=new Array(e.length).fill(!1);for(const i of this.criteria){const a=i(e,r);for(let s=0;sr.length>=this.max_length)}}class Sy extends Eo{constructor(e){super(),Array.isArray(e)||(e=[e]),this.eos_token_id=e}_call(e,r){return e.map(n=>{const i=n.at(-1);return this.eos_token_id.some(a=>i==a)})}}class Si extends or{constructor(e){super(),this.generation_config=e}async _call(e){return this.sample(e)}async sample(e){throw Error("sample should be implemented in subclasses.")}getLogits(e,r){let n=e.dims.at(-1),i=e.data;if(r===-1)i=i.slice(-n);else{let a=r*n;i=i.slice(a,a+n)}return i}randomSelect(e){let r=0;for(let i=0;i1)return new Ty(e);if(e.num_return_sequences>1)throw Error(`num_return_sequences has to be 1 when doing greedy search, but is ${e.num_return_sequences}.`);return new Cy(e)}}class Cy extends Si{async sample(e){const r=_r(e.data)[1];return[[BigInt(r),0]]}}class Ey extends Si{async sample(e){let r=e.dims.at(-1);this.generation_config.top_k>0&&(r=Math.min(this.generation_config.top_k,r));const[n,i]=await pm(e,r),a=ta(n.data);return Array.from({length:this.generation_config.num_beams},()=>{const s=this.randomSelect(a);return[i.data[s],Math.log(a[s])]})}}class Ty extends Si{async sample(e){let r=e.dims.at(-1);this.generation_config.top_k>0&&(r=Math.min(this.generation_config.top_k,r));const[n,i]=await pm(e,r),a=ta(n.data);return Array.from({length:this.generation_config.num_beams},(s,o)=>[i.data[o],Math.log(a[o])])}}class ky extends gm{return_timestamps=null;return_token_timestamps=null;num_frames=null;alignment_heads=null;task=null;language=null;no_timestamps_token_id=null;prompt_ids=null;is_multilingual=null;lang_to_id=null;task_to_id=null;max_initial_timestamp_index=1}const be={EncoderOnly:0,EncoderDecoder:1,Seq2Seq:2,Vision2Seq:3,DecoderOnly:4,MaskGeneration:5,ImageTextToText:6,Musicgen:7},wi=new Map,_m=new Map,qn=new Map;async function Iy(t,e,r){let n=r.device;n&&typeof n!="string"&&(n.hasOwnProperty(e)?n=n[e]:(console.warn(`Device not specified for ${e}. Using the default device.`),n=null));const i=H_(n);let a=r.dtype;if(typeof a!="string"&&(a&&a.hasOwnProperty(e)?a=a[e]:(a=ly[i[0]],console.warn(`Dtype not specified for ${e}. Using the default dtype: ${a}.`))),Sc.hasOwnProperty(a)){if(a===$t.fp16&&!await uy())throw new Error("The device does not support fp16.")}else throw new Error(`Invalid dtype: ${a}. Should be one of: ${Object.keys($t).join(", ")}`);const s=Sc[a],o=`${r.subfolder??""}/${e}${s}.onnx`,u={...r.session_options};u.executionProviders??=i;const l=ei(t,o,!0,r);let h=[];if(r.use_external_data_format){if(Pr.IS_NODE_ENV)throw new Error("External data format is not yet supported in Node.js");const m=`${e}${s}.onnx_data`,c=`${r.subfolder??""}/${m}`;h.push(new Promise(async(y,b)=>{const v=await ei(t,c,!0,r);y({path:m,data:v})}))}else u.externalData!==void 0&&(h=u.externalData.map(async m=>{if(typeof m.data=="string"){const c=await ei(t,m.data,!0,r);return{...m,data:c}}return m}));if(h.length>0&&(u.externalData=await Promise.all(h)),n==="webgpu"){const m=fm(r.config,{prefix:"present"});if(Object.keys(m).length>0){const c={};for(const y in m)c[y]="gpu-buffer";u.preferredOutputLocation=c}}return{buffer:await l,session_options:u}}async function Tr(t,e,r){const n=Object.keys(e),i=await Promise.all(n.map(async s=>Iy(t,e[s],r))),a={};for(let s=0;s0)throw new Error(`An error occurred during model execution: "Missing the following inputs: ${n.join(", ")}.`);const i=Object.keys(e).length,a=t.inputNames.length;if(i>a){let s=Object.keys(e).filter(o=>!t.inputNames.includes(o));console.warn(`WARNING: Too many inputs were provided (${i} > ${a}). The following inputs will be ignored: "${s.join(", ")}".`)}return r}async function gr(t,e){const r=Ay(t,e);try{const n=Object.fromEntries(Object.entries(r).map(([a,s])=>[a,s.ort_tensor]));let i=await t.run(n);return i=ym(i),i}catch(n){throw console.error(`An error occurred during model execution: "${n}".`),console.error("Inputs given to model:",r),n}}function ym(t){for(let e in t)dm(t[e])?t[e]=new ce(t[e]):typeof t[e]=="object"&&ym(t[e]);return t}function wm(t){if(t instanceof ce)return t;if(t.length===0)throw Error("items must be non-empty");if(Array.isArray(t[0])){if(t.some(e=>e.length!==t[0].length))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' and/or 'truncation=True' to have batched tensors with the same length.");return new ce("int64",BigInt64Array.from(t.flat().map(e=>BigInt(e))),[t.length,t[0].length])}else return new ce("int64",BigInt64Array.from(t.map(e=>BigInt(e))),[1,t.length])}function bm(t){return new ce("bool",[t],[1])}async function Cc(t,e){let{encoder_outputs:r,past_key_values:n}=e;if(!r){const u=zr(e,t.sessions.model.inputNames);r=(await Jn(t,u)).last_hidden_state}const{input_ids:i,decoder_input_ids:a,...s}=e;return s.input_ids=a,s.encoder_hidden_states=r,t.sessions.decoder_model_merged.inputNames.includes("encoder_attention_mask")&&(s.encoder_attention_mask=e.attention_mask),await ko(t,s,!0)}async function Jn(t,e){const r=t.sessions.model,n=Object.create(null);for(const i of r.inputNames)n[i]=e[i];return r.inputNames.includes("token_type_ids")&&!n.token_type_ids&&(n.token_type_ids=new ce("int64",new BigInt64Array(n.input_ids.data.length),n.input_ids.dims)),await gr(r,n)}async function ko(t,e,r=!1){const n=t.sessions[r?"decoder_model_merged":"model"],{past_key_values:i,...a}=e;n.inputNames.includes("use_cache_branch")&&(a.use_cache_branch=bm(!!i)),n.inputNames.includes("position_ids")&&a.attention_mask&&!a.position_ids&&(a.position_ids=Oy(a,i)),t.addPastKeyValues(a,i);const s=zr(a,n.inputNames);return await gr(n,s)}async function My(t,{input_ids:e=null,attention_mask:r=null,pixel_values:n=null,position_ids:i=null,inputs_embeds:a=null,past_key_values:s=null,generation_config:o=null,logits_processor:u=null,...l}){if(!a){if(a=await t.encode_text({input_ids:e}),n&&e.dims[1]!==1){const f=await t.encode_image({pixel_values:n});({inputs_embeds:a,attention_mask:r}=t._merge_input_ids_with_image_features({image_features:f,inputs_embeds:a,input_ids:e,attention_mask:r}))}else if(s&&n&&e.dims[1]===1){const f=e.dims[1],m=Object.values(s)[0].dims.at(-2);r=Jt([aa([e.dims[0],m]),r.slice(null,[r.dims[1]-f,r.dims[1]])],1)}}return await ko(t,{inputs_embeds:a,past_key_values:s,attention_mask:r,position_ids:i,generation_config:o,logits_processor:u},!0)}function Oy(t,e=null){const{input_ids:r,inputs_embeds:n,attention_mask:i}=t,[a,s]=i.dims,o=new BigInt64Array(i.data.length);for(let l=0;la.dims[1])){if(io==t.config.image_token_index)){const o=t.config.num_image_tokens;if(!o)throw new Error("`num_image_tokens` is missing in the model configuration.");const u=a.dims[1]-(i-o);r.input_ids=a.slice(null,[-u,null]),r.attention_mask=aa([1,i+u])}}}return r}function zy(t,e,r,n){const{...i}=r;return r.past_key_values&&(e=e.map(s=>[s.at(-1)])),i.decoder_input_ids=wm(e),i}class X extends or{main_input_name="input_ids";forward_params=["input_ids","attention_mask"];constructor(e,r){super(),this.config=e,this.sessions=r;const n=qn.get(this.constructor),i=wi.get(n);this.can_generate=!1,this._forward=null,this._prepare_inputs_for_generation=null,i===be.DecoderOnly?(this.can_generate=!0,this._forward=ko,this._prepare_inputs_for_generation=Ec):i===be.Seq2Seq||i===be.Vision2Seq||i===be.Musicgen?(this.can_generate=!0,this._forward=Cc,this._prepare_inputs_for_generation=zy):i===be.EncoderDecoder?this._forward=Cc:i===be.ImageTextToText?(this.can_generate=!0,this._forward=My,this._prepare_inputs_for_generation=Ec):this._forward=Jn,this.can_generate&&this.forward_params.push("past_key_values"),this.custom_config=this.config["transformers.js_config"]??{}}async dispose(){const e=[];for(const r of Object.values(this.sessions))r?.handler?.dispose&&e.push(r.handler.dispose());return await Promise.all(e)}static async from_pretrained(e,{progress_callback:r=null,config:n=null,cache_dir:i=null,local_files_only:a=!1,revision:s="main",model_file_name:o=null,subfolder:u="onnx",device:l=null,dtype:h=null,use_external_data_format:f=null,session_options:m={}}={}){let c={progress_callback:r,config:n,cache_dir:i,local_files_only:a,revision:s,model_file_name:o,subfolder:u,device:l,dtype:h,use_external_data_format:f,session_options:m};const y=qn.get(this),b=wi.get(y);c.config=await mm.from_pretrained(e,c);let v;return b===be.DecoderOnly?v=await Promise.all([Tr(e,{model:c.model_file_name??"model"},c),on(e,"generation_config.json",!1,c)]):b===be.Seq2Seq||b===be.Vision2Seq?v=await Promise.all([Tr(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},c),on(e,"generation_config.json",!1,c)]):b===be.MaskGeneration?v=await Promise.all([Tr(e,{model:"vision_encoder",prompt_encoder_mask_decoder:"prompt_encoder_mask_decoder"},c)]):b===be.EncoderDecoder?v=await Promise.all([Tr(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},c)]):b===be.ImageTextToText?v=await Promise.all([Tr(e,{embed_tokens:"embed_tokens",vision_encoder:"vision_encoder",decoder_model_merged:"decoder_model_merged"},c),on(e,"generation_config.json",!1,c)]):b===be.Musicgen?v=await Promise.all([Tr(e,{model:"text_encoder",decoder_model_merged:"decoder_model_merged",encodec_decode:"encodec_decode"},c),on(e,"generation_config.json",!1,c)]):(b!==be.EncoderOnly&&console.warn(`Model type for '${y??n?.model_type}' not found, assuming encoder-only architecture. Please report this at https://github.com/xenova/transformers.js/issues/new/choose.`),v=await Promise.all([Tr(e,{model:c.model_file_name??"model"},c)])),new this(c.config,...v)}async _call(e){return await this.forward(e)}async forward(e){return await this._forward(this,e)}_get_logits_warper(e){const r=new Ys;return e.temperature!==null&&e.temperature!==1&&r.push(new by(e.temperature)),e.top_k!==null&&e.top_k!==0&&r.push(new $y(e.top_k)),e.top_p!==null&&e.top_p<1&&r.push(new vy(e.top_p)),r}_get_logits_processor(e,r,n=null){const i=new Ys;if(e.repetition_penalty!==null&&e.repetition_penalty!==1&&i.push(new my(e.repetition_penalty)),e.no_repeat_ngram_size!==null&&e.no_repeat_ngram_size>0&&i.push(new fy(e.no_repeat_ngram_size)),e.bad_words_ids!==null&&i.push(new yy(e.bad_words_ids,e.eos_token_id)),e.min_length!==null&&e.eos_token_id!==null&&e.min_length>0&&i.push(new gy(e.min_length,e.eos_token_id)),e.min_new_tokens!==null&&e.eos_token_id!==null&&e.min_new_tokens>0&&i.push(new _y(r,e.min_new_tokens,e.eos_token_id)),e.forced_bos_token_id!==null&&i.push(new dy(e.forced_bos_token_id)),e.forced_eos_token_id!==null&&i.push(new cy(e.max_length,e.forced_eos_token_id)),e.begin_suppress_tokens!==null){const a=r>1||e.forced_bos_token_id===null?r:r+1;i.push(new py(e.begin_suppress_tokens,a))}return e.guidance_scale!==null&&e.guidance_scale>1&&i.push(new wy(e.guidance_scale)),n!==null&&i.extend(n),i}_prepare_generation_config(e,r,n=gm){const i={...this.config};for(const s of["decoder","generator","text_config"])s in i&&Object.assign(i,i[s]);const a=new n(i);return"generation_config"in this&&Object.assign(a,this.generation_config),e&&Object.assign(a,e),r&&Object.assign(a,zr(r,Object.getOwnPropertyNames(a))),a}_get_stopping_criteria(e,r=null){const n=new To;return e.max_length!==null&&n.push(new xy(e.max_length,this.config.max_position_embeddings??null)),e.eos_token_id!==null&&n.push(new Sy(e.eos_token_id)),r&&n.extend(r),n}_validate_model_class(){if(!this.can_generate){const e=[Cg,Eg,Sg,xg],r=qn.get(this.constructor),n=new Set,i=this.config.model_type;for(const s of e){const o=s.get(i);o&&n.add(o[0])}let a=`The current model class (${r}) is not compatible with \`.generate()\`, as it doesn't have a language model head.`;throw n.size>0&&(a+=` Please use the following class instead: ${[...n].join(", ")}`),Error(a)}}prepare_inputs_for_generation(...e){return this._prepare_inputs_for_generation(this,...e)}_update_model_kwargs_for_generation({generated_input_ids:e,outputs:r,model_inputs:n,is_encoder_decoder:i}){return n.past_key_values=this.getPastKeyValues(r,n.past_key_values),n.input_ids=new ce("int64",e.flat(),[e.length,1]),i||(n.attention_mask=Jt([n.attention_mask,aa([n.attention_mask.dims[0],1])],1)),n.position_ids=null,n}_prepare_model_inputs({inputs:e,bos_token_id:r,model_kwargs:n}){const i=zr(n,this.forward_params),a=this.main_input_name;if(a in i){if(e)throw new Error("`inputs`: {inputs}` were passed alongside {input_name} which is not allowed. Make sure to either pass {inputs} or {input_name}=...")}else i[a]=e;return{inputs_tensor:i[a],model_inputs:i,model_input_name:a}}async _prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:e,model_inputs:r,model_input_name:n,generation_config:i}){const a=zr(r,this.sessions.model.inputNames);let{last_hidden_state:s}=await Jn(this,a);return i.guidance_scale!==null&&i.guidance_scale>1&&(s=Jt([s,ey(s,0)],0),"attention_mask"in r&&(r.attention_mask=Jt([r.attention_mask,ny(r.attention_mask)],0))),r.encoder_outputs=s,r}_prepare_decoder_input_ids_for_generation({batch_size:e,model_input_name:r,model_kwargs:n,decoder_start_token_id:i,bos_token_id:a,generation_config:s}){let{decoder_input_ids:o,...u}=n;if(!o)if(i??=a,this.config.model_type==="musicgen")o=Array.from({length:e*this.config.decoder.num_codebooks},()=>[i]);else if(Array.isArray(i)){if(i.length!==e)throw new Error(`\`decoder_start_token_id\` expcted to have length ${e} but got ${i.length}`);o=i}else o=Array.from({length:e},()=>[i]);return o=wm(o),n.decoder_attention_mask=ty(o),{input_ids:o,model_inputs:u}}async generate({inputs:e=null,generation_config:r=null,logits_processor:n=null,stopping_criteria:i=null,streamer:a=null,...s}){this._validate_model_class(),r=this._prepare_generation_config(r,s);let{inputs_tensor:o,model_inputs:u,model_input_name:l}=this._prepare_model_inputs({inputs:e,model_kwargs:s});const h=this.config.is_encoder_decoder;h&&("encoder_outputs"in u||(u=await this._prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:o,model_inputs:u,model_input_name:l,generation_config:r})));let f;h?{input_ids:f,model_inputs:u}=this._prepare_decoder_input_ids_for_generation({batch_size:u[l].dims.at(0),model_input_name:l,model_kwargs:u,decoder_start_token_id:r.decoder_start_token_id,bos_token_id:r.bos_token_id,generation_config:r}):f=u[l];let m=f.dims.at(-1);r.max_new_tokens!==null&&(r.max_length=m+r.max_new_tokens);const c=this._get_logits_processor(r,m,n),y=this._get_stopping_criteria(r,i),b=u[l].dims.at(0),v=Si.getSampler(r),C=new Array(b).fill(0),x=f.tolist();a&&a.put(x);let T=null;for(;;){u=this.prepare_inputs_for_generation(x,u,r);const A=await this.forward(u),R=A.logits.slice(null,-1,null),z=c(x,R),P=[];for(let K=0;KK)){r.return_dict_in_generate&&(T=this.getPastKeyValues(A,u.past_key_values,!1));break}u=this._update_model_kwargs_for_generation({generated_input_ids:P,outputs:A,model_inputs:u,is_encoder_decoder:h})}a&&a.end();const I=new ce("int64",x.flat(),[x.length,x[0].length]);return r.return_dict_in_generate?{sequences:I,past_key_values:T}:I}addAttentionsToBeam(e,r){if(this.config.is_encoder_decoder){if(!r.cross_attentions||r.cross_attentions.length===0)throw Error("`output_attentions` is true, but the model did not produce cross-attentions. This is most likely because the model was not exported with `output_attentions=True`.");e.cross_attentions||(e.cross_attentions=[]),e.cross_attentions.push(r.cross_attentions)}if(!r.decoder_attentions||r.decoder_attentions.length===0)throw Error("`output_attentions` is true, but the model did not produce decoder-attentions. This is most likely because the model was not exported with `output_attentions=True`.");e.decoder_attentions||(e.decoder_attentions=[]),e.decoder_attentions.push(r.decoder_attentions)}groupBeams(e){const r=Object.create(null);for(const n of e)r[n.id]===void 0?r[n.id]=[n]:r[n.id].push(n);return Object.values(r)}getPastKeyValues(e,r,n=!0){const i=Object.create(null);for(const a in e)if(a.startsWith("present")){let s=a.replace("present","past_key_values");if(r&&a.includes("encoder"))i[s]=r[s];else{if(n&&r){const o=r[s];o.location==="gpu-buffer"&&o.dispose()}i[s]=e[a]}}return i}getAttentions(e){const r=Object.create(null);for(const n of["cross_attentions","decoder_attentions"]){const i=[];for(const a in e)if(a.startsWith(n)){const s=a.split(".").pop();i[s]=e[a]}r[n]=i}return r}addPastKeyValues(e,r){if(r)Object.assign(e,r);else{const n=this.custom_config.kv_cache_dtype??"float32",i=n==="float16"?new Uint16Array:[],a=fm(this.config);for(const s in a)e[s]=new ce(n,i,a[s])}}}class Ot{}class ia extends X{}class Ry extends ia{}class Py extends ia{async _call(e){return new dt(await super._call(e))}}class By extends ia{async _call(e){return new Ae(await super._call(e))}}class Dy extends ia{async _call(e){return new lt(await super._call(e))}}class Ny extends ia{async _call(e){return new mt(await super._call(e))}}class Fy extends X{}class Ly extends Fy{}class sa extends X{}class Wy extends sa{}class Uy extends sa{async _call(e){return new dt(await super._call(e))}}class Vy extends sa{async _call(e){return new Ae(await super._call(e))}}class Gy extends sa{async _call(e){return new lt(await super._call(e))}}class Hy extends sa{async _call(e){return new mt(await super._call(e))}}class oa extends X{}class qy extends oa{}class jy extends oa{async _call(e){return new dt(await super._call(e))}}class Ky extends oa{async _call(e){return new Ae(await super._call(e))}}class Yy extends oa{async _call(e){return new lt(await super._call(e))}}class Xy extends oa{async _call(e){return new mt(await super._call(e))}}class ua extends X{}class Qy extends ua{}class Jy extends ua{async _call(e){return new dt(await super._call(e))}}class Zy extends ua{async _call(e){return new Ae(await super._call(e))}}class ew extends ua{async _call(e){return new lt(await super._call(e))}}class tw extends ua{async _call(e){return new mt(await super._call(e))}}class la extends X{}class rw extends la{}class nw extends la{async _call(e){return new dt(await super._call(e))}}class aw extends la{async _call(e){return new Ae(await super._call(e))}}class iw extends la{async _call(e){return new lt(await super._call(e))}}class sw extends la{async _call(e){return new mt(await super._call(e))}}class da extends X{}class ow extends da{}class uw extends da{async _call(e){return new dt(await super._call(e))}}class lw extends da{async _call(e){return new Ae(await super._call(e))}}class dw extends da{async _call(e){return new lt(await super._call(e))}}class cw extends da{async _call(e){return new mt(await super._call(e))}}class ca extends X{}class pw extends ca{}class hw extends ca{async _call(e){return new dt(await super._call(e))}}class fw extends ca{async _call(e){return new Ae(await super._call(e))}}class mw extends ca{async _call(e){return new lt(await super._call(e))}}class gw extends ca{async _call(e){return new mt(await super._call(e))}}class pa extends X{}class _w extends pa{}class yw extends pa{async _call(e){return new Ae(await super._call(e))}}class ww extends pa{async _call(e){return new lt(await super._call(e))}}class bw extends pa{async _call(e){return new mt(await super._call(e))}}class vw extends pa{async _call(e){return new dt(await super._call(e))}}class Ci extends X{}class $w extends Ci{}class xw extends Ci{async _call(e){return new dt(await super._call(e))}}class Sw extends Ci{async _call(e){return new Ae(await super._call(e))}}class Cw extends Ci{async _call(e){return new lt(await super._call(e))}}class Ei extends X{}class Ew extends Ei{}class Tw extends Ei{async _call(e){return new dt(await super._call(e))}}class kw extends Ei{async _call(e){return new Ae(await super._call(e))}}class Iw extends Ei{async _call(e){return new mt(await super._call(e))}}class ha extends X{}class Aw extends ha{}class Mw extends ha{async _call(e){return new dt(await super._call(e))}}class Ow extends ha{async _call(e){return new Ae(await super._call(e))}}class zw extends ha{async _call(e){return new lt(await super._call(e))}}class Rw extends ha{async _call(e){return new mt(await super._call(e))}}class Ti extends X{}class Pw extends Ti{}class Bw extends Ti{async _call(e){return new dt(await super._call(e))}}class Dw extends Ti{async _call(e){return new Ae(await super._call(e))}}class Nw extends Ti{async _call(e){return new mt(await super._call(e))}}class ki extends X{}class Fw extends ki{}class Lw extends ki{async _call(e){return new Ae(await super._call(e))}}class Ww extends ki{async _call(e){return new mt(await super._call(e))}}class Uw extends ki{async _call(e){return new dt(await super._call(e))}}class vm extends X{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,r,n){super(e,r),this.generation_config=n}}class Vw extends vm{}class Gw extends vm{}class $m extends X{constructor(e,r,n){super(e,r),this.generation_config=n}}class Hw extends $m{}class qw extends $m{}class xm extends X{constructor(e,r,n){super(e,r),this.generation_config=n}}class jw extends xm{}class Kw extends xm{}class Io extends X{constructor(e,r,n){super(e,r),this.generation_config=n}}class Yw extends Io{}class Xw extends Io{}class Qw extends Io{async _call(e){return new Ae(await super._call(e))}}class Ii extends X{constructor(e,r,n){super(e,r),this.generation_config=n}}class Jw extends Ii{}class Zw extends Ii{}class eb extends Ii{async _call(e){return new Ae(await super._call(e))}}class tb extends Ii{}class Sm extends X{constructor(e,r,n){super(e,r),this.generation_config=n}}class rb extends Sm{}class nb extends Sm{}class Cm extends X{constructor(e,r,n){super(e,r),this.generation_config=n}}class ab extends Cm{}class ib extends Cm{}class fa extends X{}class sb extends fa{}class ob extends fa{async _call(e){return new dt(await super._call(e))}}class ub extends fa{async _call(e){return new Ae(await super._call(e))}}class lb extends fa{async _call(e){return new lt(await super._call(e))}}class db extends fa{async _call(e){return new mt(await super._call(e))}}class ma extends X{}class cb extends ma{}class pb extends ma{async _call(e){return new dt(await super._call(e))}}class hb extends ma{async _call(e){return new Ae(await super._call(e))}}class fb extends ma{async _call(e){return new lt(await super._call(e))}}class mb extends ma{async _call(e){return new mt(await super._call(e))}}class ga extends X{}class gb extends ga{}class _b extends ga{async _call(e){return new dt(await super._call(e))}}class yb extends ga{async _call(e){return new Ae(await super._call(e))}}class wb extends ga{async _call(e){return new lt(await super._call(e))}}class bb extends ga{async _call(e){return new mt(await super._call(e))}}class Em extends X{}class vb extends Em{}class $b extends Em{}class Tm extends X{requires_attention_mask=!1;main_input_name="input_features";forward_params=["input_features","attention_mask","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,r,n){super(e,r),this.generation_config=n}}class xb extends Tm{}class Sb extends Tm{_prepare_generation_config(e,r){return super._prepare_generation_config(e,r,ky)}_retrieve_init_tokens(e){const r=[e.decoder_start_token_id];let n=e.language;const i=e.task;if(e.is_multilingual){n||(console.warn("No language specified - defaulting to English (en)."),n="en");const s=`<|${iy(n)}|>`;r.push(e.lang_to_id[s]),r.push(e.task_to_id[i??"transcribe"])}else if(n||i)throw new Error("Cannot specify `task` or `language` for an English-only model. If the model is intended to be multilingual, pass `is_multilingual=true` to generate, or update the generation config.");return!e.return_timestamps&&e.no_timestamps_token_id&&r.at(-1)!==e.no_timestamps_token_id?r.push(e.no_timestamps_token_id):e.return_timestamps&&r.at(-1)===e.no_timestamps_token_id&&(console.warn("<|notimestamps|> prompt token is removed from generation_config since `return_timestamps` is set to `true`."),r.pop()),r.filter(a=>a!=null)}async generate({inputs:e=null,generation_config:r=null,logits_processor:n=null,stopping_criteria:i=null,...a}){r=this._prepare_generation_config(r,a);const s=this._retrieve_init_tokens(r);return r.return_timestamps&&(n??=new Ys,n.push(new hy(r,s))),await super.generate({inputs:e,generation_config:r,logits_processor:n,decoder_input_ids:s,...a})}_extract_token_timestamps(e,r,n=null,i=.02){if(!e.cross_attentions)throw new Error("Model outputs must contain cross attentions to extract timestamps. This is most likely because the model was not exported with `output_attentions=True`.");let a=this.config.median_filter_width;a===void 0&&(console.warn("Model config has no `median_filter_width`, using default value of 7."),a=7);const s=e.cross_attentions.map(l=>{let h=Array.from({length:this.config.decoder_layers},(v,C)=>Jt(l.map(x=>x[C]),2)),f=Qn(r.map(([v,C])=>n?h[v].slice(null,C,null,[0,n]):h[v].slice(null,C)));f=f.transpose(1,0,2,3);let[m,c]=X_(f,-2,0,!0),y=f.clone();for(let v=0;vf[C+1]-f[C]),y=e0([1],c).map(v=>!!v),b=[];for(let v=0;vm.findIndex(c=>c==a)),u=o.every(m=>m===-1),l=o.every(m=>m!==-1);if(!u&&!l)throw new Error("Every input should contain either 0 or 1 image token.");if(u)return{inputs_embeds:e,attention_mask:i};const h=[],f=[];for(let m=0;ma*s,1);e.input_labels=new ce("int64",new BigInt64Array(i).fill(1n),n)}const r={image_embeddings:e.image_embeddings,image_positional_embeddings:e.image_positional_embeddings};return e.input_points&&(r.input_points=e.input_points),e.input_labels&&(r.input_labels=e.input_labels),e.input_boxes&&(r.input_boxes=e.input_boxes),await gr(this.sessions.prompt_encoder_mask_decoder,r)}async _call(e){return new p1(await super._call(e))}}class p1 extends Ot{constructor({iou_scores:e,pred_masks:r}){super(),this.iou_scores=e,this.pred_masks=r}}class pg extends X{constructor(e,r,n){super(e,r),this.generation_config=n}}class h1 extends pg{}class f1 extends pg{}class hg extends X{constructor(e,r,n){super(e,r),this.generation_config=n}}class m1 extends hg{}class g1 extends hg{}class Ur extends X{}class _1 extends Ur{}class y1 extends Ur{async _call(e){return new gn(await super._call(e))}}class w1 extends Ur{async _call(e){return new Ae(await super._call(e))}}class b1 extends Ur{async _call(e){return new lt(await super._call(e))}}class Mo extends X{}class v1 extends Mo{}class $1 extends Mo{async _call(e){return new gn(await super._call(e))}}class x1 extends Mo{async _call(e){return new Ae(await super._call(e))}}class Mi extends X{}class S1 extends Mi{}class C1 extends Mi{async _call(e){return new gn(await super._call(e))}}class E1 extends Mi{async _call(e){return new Ae(await super._call(e))}}class T1 extends Mi{async _call(e){return new lt(await super._call(e))}}class Oo extends X{}class k1 extends Oo{}class I1 extends Oo{async _call(e){return new gn(await super._call(e))}}class A1 extends Oo{async _call(e){return new Ae(await super._call(e))}}class M1 extends Ur{}class O1 extends Ur{async _call(e){return new gn(await super._call(e))}}class z1 extends Ur{async _call(e){return new Ae(await super._call(e))}}class _a extends X{}class R1 extends _a{}class P1 extends _a{async _call(e){return new gn(await super._call(e))}}class B1 extends _a{async _call(e){return new Ae(await super._call(e))}}class D1 extends _a{async _call(e){return new zv(await super._call(e))}}class N1 extends _a{async _call(e){return new lt(await super._call(e))}}class fg extends X{constructor(e,r,n){super(e,r),this.generation_config=n}}class F1 extends fg{}class L1 extends fg{async generate_speech(e,r,{threshold:n=.5,minlenratio:i=0,maxlenratio:a=20,vocoder:s=null}={}){const o={input_ids:e},{encoder_outputs:u,encoder_attention_mask:l}=await Jn(this,o),h=u.dims[1]/this.config.reduction_factor,f=Math.floor(h*a),m=Math.floor(h*i),c=this.config.num_mel_bins;let y=[],b=null,v=null,C=0;for(;;){++C;const I=bm(!!v);let A;v?A=v.output_sequence_out:A=new ce("float32",new Float32Array(c),[1,1,c]);let R={use_cache_branch:I,output_sequence:A,encoder_attention_mask:l,speaker_embeddings:r,encoder_hidden_states:u};this.addPastKeyValues(R,b),v=await gr(this.sessions.decoder_model_merged,R),b=this.getPastKeyValues(v,b);const{prob:z,spectrum:P}=v;if(y.push(P),C>=m&&(Array.from(z.data).filter(J=>J>=n).length>0||C>=f))break}const x=Jt(y),{waveform:T}=await gr(s.sessions.model,{spectrogram:x});return{spectrogram:x,waveform:T}}}class W1 extends X{main_input_name="spectrogram"}class U1 extends X{constructor(e,r,n){super(e,r),this.generation_config=n}}class V1 extends U1{}class mg extends X{constructor(e,r,n){super(e,r),this.generation_config=n}}class G1 extends mg{}class H1 extends mg{}class gg extends X{constructor(e,r,n){super(e,r),this.generation_config=n}}class q1 extends gg{}class j1 extends gg{}class _g extends X{constructor(e,r,n){super(e,r),this.generation_config=n}}class K1 extends _g{}class Y1 extends _g{}class zo extends X{}class X1 extends zo{}class Q1 extends zo{static async from_pretrained(e,r={}){return r.model_file_name??="text_model",super.from_pretrained(e,r)}}class J1 extends zo{static async from_pretrained(e,r={}){return r.model_file_name??="audio_model",super.from_pretrained(e,r)}}class Z1 extends X{}class yg extends Z1{async _call(e){return new Pv(await super._call(e))}}class wg extends X{}class ev extends wg{}class tv extends wg{}class bg extends X{constructor(e,r,n){super(e,r),this.generation_config=n}}class rv extends bg{}class nv extends bg{}class vg extends X{}class av extends vg{}class iv extends vg{async _call(e){return new Ae(await super._call(e))}}class $g extends X{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,r,n){super(e,r),this.generation_config=n}_apply_and_filter_by_delay_pattern_mask(e){const[r,n]=e.dims,i=this.config.decoder.num_codebooks,a=n-i;let s=0;for(let l=0;l0&&m<=a&&(e.data[s++]=e.data[l])}const o=Math.floor(r/i),u=s/(o*i);return new ce(e.type,e.data.slice(0,s),[o,i,u])}prepare_inputs_for_generation(e,r,n){let i=structuredClone(e);for(let s=0;s=o&&(i[s][o]=BigInt(this.config.decoder.pad_token_id));return n.guidance_scale!==null&&n.guidance_scale>1&&(i=i.concat(i)),super.prepare_inputs_for_generation(i,r,n)}async generate(e){const r=await super.generate(e),n=this._apply_and_filter_by_delay_pattern_mask(r).unsqueeze_(0),{audio_values:i}=await gr(this.sessions.encodec_decode,{audio_codes:n});return i}}class sv{static MODEL_CLASS_MAPPINGS=null;static BASE_IF_FAIL=!1;static async from_pretrained(e,{progress_callback:r=null,config:n=null,cache_dir:i=null,local_files_only:a=!1,revision:s="main",model_file_name:o=null,subfolder:u="onnx",device:l=null,dtype:h=null,use_external_data_format:f=null,session_options:m={}}={}){let c={progress_callback:r,config:n,cache_dir:i,local_files_only:a,revision:s,model_file_name:o,subfolder:u,device:l,dtype:h,use_external_data_format:f,session_options:m};if(c.config=await mm.from_pretrained(e,c),!this.MODEL_CLASS_MAPPINGS)throw new Error("`MODEL_CLASS_MAPPINGS` not implemented for this type of `AutoClass`: "+this.name);for(let y of this.MODEL_CLASS_MAPPINGS){const b=y.get(c.config.model_type);if(b)return await b[1].from_pretrained(e,c)}if(this.BASE_IF_FAIL)return console.warn(`Unknown model class "${c.config.model_type}", attempting to construct from base class.`),await X.from_pretrained(e,c);throw Error(`Unsupported model type: ${c.config.model_type}`)}}const ov=new Map([["bert",["BertModel",Ry]],["nomic_bert",["NomicBertModel",Ly]],["roformer",["RoFormerModel",Wy]],["electra",["ElectraModel",Qy]],["esm",["EsmModel",$w]],["convbert",["ConvBertModel",qy]],["camembert",["CamembertModel",rw]],["deberta",["DebertaModel",ow]],["deberta-v2",["DebertaV2Model",pw]],["mpnet",["MPNetModel",Aw]],["albert",["AlbertModel",Fw]],["distilbert",["DistilBertModel",_w]],["roberta",["RobertaModel",sb]],["xlm",["XLMModel",cb]],["xlm-roberta",["XLMRobertaModel",gb]],["clap",["ClapModel",X1]],["clip",["CLIPModel",kb]],["clipseg",["CLIPSegModel",Bb]],["chinese_clip",["ChineseCLIPModel",Pb]],["siglip",["SiglipModel",Mb]],["mobilebert",["MobileBertModel",Ew]],["squeezebert",["SqueezeBertModel",Pw]],["wav2vec2",["Wav2Vec2Model",_1]],["wav2vec2-bert",["Wav2Vec2BertModel",k1]],["unispeech",["UniSpeechModel",v1]],["unispeech-sat",["UniSpeechSatModel",S1]],["hubert",["HubertModel",M1]],["wavlm",["WavLMModel",R1]],["audio-spectrogram-transformer",["ASTModel",vb]],["vits",["VitsModel",yg]],["detr",["DetrModel",O2]],["table-transformer",["TableTransformerModel",B2]],["vit",["ViTModel",g2]],["fastvit",["FastViTModel",y2]],["mobilevit",["MobileViTModel",$2]],["mobilevitv2",["MobileViTV2Model",S2]],["owlvit",["OwlViTModel",E2]],["owlv2",["Owlv2Model",k2]],["beit",["BeitModel",A2]],["deit",["DeiTModel",F2]],["convnext",["ConvNextModel",t1]],["convnextv2",["ConvNextV2Model",n1]],["dinov2",["Dinov2Model",i1]],["resnet",["ResNetModel",W2]],["swin",["SwinModel",V2]],["swin2sr",["Swin2SRModel",H2]],["donut-swin",["DonutSwinModel",e1]],["yolos",["YolosModel",o1]],["dpt",["DPTModel",j2]],["glpn",["GLPNModel",Q2]],["hifigan",["SpeechT5HifiGan",W1]],["efficientnet",["EfficientNetModel",av]]]),uv=new Map([["t5",["T5Model",Vw]],["longt5",["LongT5Model",Hw]],["mt5",["MT5Model",jw]],["bart",["BartModel",Yw]],["mbart",["MBartModel",Jw]],["marian",["MarianModel",h1]],["whisper",["WhisperModel",xb]],["m2m_100",["M2M100Model",m1]],["blenderbot",["BlenderbotModel",rb]],["blenderbot-small",["BlenderbotSmallModel",ab]]]),lv=new Map([["bloom",["BloomModel",d2]],["gpt2",["GPT2Model",Nb]],["gptj",["GPTJModel",Gb]],["gpt_bigcode",["GPTBigCodeModel",qb]],["gpt_neo",["GPTNeoModel",Lb]],["gpt_neox",["GPTNeoXModel",Ub]],["codegen",["CodeGenModel",Kb]],["llama",["LlamaModel",Xb]],["cohere",["CohereModel",Jb]],["gemma",["GemmaModel",e2]],["openelm",["OpenELMModel",r2]],["qwen2",["Qwen2Model",a2]],["phi",["PhiModel",s2]],["phi3",["Phi3Model",u2]],["mpt",["MptModel",p2]],["opt",["OPTModel",f2]],["mistral",["MistralModel",G1]],["starcoder2",["Starcoder2Model",q1]],["falcon",["FalconModel",K1]],["stablelm",["StableLmModel",rv]]]),xg=new Map([["speecht5",["SpeechT5ForSpeechToText",F1]],["whisper",["WhisperForConditionalGeneration",Sb]]]),dv=new Map([["speecht5",["SpeechT5ForTextToSpeech",L1]]]),cv=new Map([["vits",["VitsModel",yg]],["musicgen",["MusicgenForConditionalGeneration",$g]]]),pv=new Map([["bert",["BertForSequenceClassification",By]],["roformer",["RoFormerForSequenceClassification",Vy]],["electra",["ElectraForSequenceClassification",Zy]],["esm",["EsmForSequenceClassification",Sw]],["convbert",["ConvBertForSequenceClassification",Ky]],["camembert",["CamembertForSequenceClassification",aw]],["deberta",["DebertaForSequenceClassification",lw]],["deberta-v2",["DebertaV2ForSequenceClassification",fw]],["mpnet",["MPNetForSequenceClassification",Ow]],["albert",["AlbertForSequenceClassification",Lw]],["distilbert",["DistilBertForSequenceClassification",yw]],["roberta",["RobertaForSequenceClassification",ub]],["xlm",["XLMForSequenceClassification",hb]],["xlm-roberta",["XLMRobertaForSequenceClassification",yb]],["bart",["BartForSequenceClassification",Qw]],["mbart",["MBartForSequenceClassification",eb]],["mobilebert",["MobileBertForSequenceClassification",kw]],["squeezebert",["SqueezeBertForSequenceClassification",Dw]]]),hv=new Map([["bert",["BertForTokenClassification",Dy]],["roformer",["RoFormerForTokenClassification",Gy]],["electra",["ElectraForTokenClassification",ew]],["esm",["EsmForTokenClassification",Cw]],["convbert",["ConvBertForTokenClassification",Yy]],["camembert",["CamembertForTokenClassification",iw]],["deberta",["DebertaForTokenClassification",dw]],["deberta-v2",["DebertaV2ForTokenClassification",mw]],["mpnet",["MPNetForTokenClassification",zw]],["distilbert",["DistilBertForTokenClassification",ww]],["roberta",["RobertaForTokenClassification",lb]],["xlm",["XLMForTokenClassification",fb]],["xlm-roberta",["XLMRobertaForTokenClassification",wb]]]),Sg=new Map([["t5",["T5ForConditionalGeneration",Gw]],["longt5",["LongT5ForConditionalGeneration",qw]],["mt5",["MT5ForConditionalGeneration",Kw]],["bart",["BartForConditionalGeneration",Xw]],["mbart",["MBartForConditionalGeneration",Zw]],["marian",["MarianMTModel",f1]],["m2m_100",["M2M100ForConditionalGeneration",g1]],["blenderbot",["BlenderbotForConditionalGeneration",nb]],["blenderbot-small",["BlenderbotSmallForConditionalGeneration",ib]]]),Cg=new Map([["bloom",["BloomForCausalLM",c2]],["gpt2",["GPT2LMHeadModel",Fb]],["gptj",["GPTJForCausalLM",Hb]],["gpt_bigcode",["GPTBigCodeForCausalLM",jb]],["gpt_neo",["GPTNeoForCausalLM",Wb]],["gpt_neox",["GPTNeoXForCausalLM",Vb]],["codegen",["CodeGenForCausalLM",Yb]],["llama",["LlamaForCausalLM",Qb]],["cohere",["CohereForCausalLM",Zb]],["gemma",["GemmaForCausalLM",t2]],["openelm",["OpenELMForCausalLM",n2]],["qwen2",["Qwen2ForCausalLM",i2]],["phi",["PhiForCausalLM",o2]],["phi3",["Phi3ForCausalLM",l2]],["mpt",["MptForCausalLM",h2]],["opt",["OPTForCausalLM",m2]],["mbart",["MBartForCausalLM",tb]],["mistral",["MistralForCausalLM",H1]],["starcoder2",["Starcoder2ForCausalLM",j1]],["falcon",["FalconForCausalLM",Y1]],["trocr",["TrOCRForCausalLM",V1]],["stablelm",["StableLmForCausalLM",nv]]]),fv=new Map([["bert",["BertForMaskedLM",Py]],["roformer",["RoFormerForMaskedLM",Uy]],["electra",["ElectraForMaskedLM",Jy]],["esm",["EsmForMaskedLM",xw]],["convbert",["ConvBertForMaskedLM",jy]],["camembert",["CamembertForMaskedLM",nw]],["deberta",["DebertaForMaskedLM",uw]],["deberta-v2",["DebertaV2ForMaskedLM",hw]],["mpnet",["MPNetForMaskedLM",Mw]],["albert",["AlbertForMaskedLM",Uw]],["distilbert",["DistilBertForMaskedLM",vw]],["roberta",["RobertaForMaskedLM",ob]],["xlm",["XLMWithLMHeadModel",pb]],["xlm-roberta",["XLMRobertaForMaskedLM",_b]],["mobilebert",["MobileBertForMaskedLM",Tw]],["squeezebert",["SqueezeBertForMaskedLM",Bw]]]),mv=new Map([["bert",["BertForQuestionAnswering",Ny]],["roformer",["RoFormerForQuestionAnswering",Hy]],["electra",["ElectraForQuestionAnswering",tw]],["convbert",["ConvBertForQuestionAnswering",Xy]],["camembert",["CamembertForQuestionAnswering",sw]],["deberta",["DebertaForQuestionAnswering",cw]],["deberta-v2",["DebertaV2ForQuestionAnswering",gw]],["mpnet",["MPNetForQuestionAnswering",Rw]],["albert",["AlbertForQuestionAnswering",Ww]],["distilbert",["DistilBertForQuestionAnswering",bw]],["roberta",["RobertaForQuestionAnswering",db]],["xlm",["XLMForQuestionAnswering",mb]],["xlm-roberta",["XLMRobertaForQuestionAnswering",bb]],["mobilebert",["MobileBertForQuestionAnswering",Iw]],["squeezebert",["SqueezeBertForQuestionAnswering",Nw]]]),Eg=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",Cb]]]),gv=new Map([["llava",["LlavaForConditionalGeneration",km]],["moondream1",["Moondream1ForConditionalGeneration",Tb]]]),_v=new Map([["vit",["ViTForImageClassification",_2]],["fastvit",["FastViTForImageClassification",w2]],["mobilevit",["MobileViTForImageClassification",x2]],["mobilevitv2",["MobileViTV2ForImageClassification",C2]],["beit",["BeitForImageClassification",M2]],["deit",["DeiTForImageClassification",L2]],["convnext",["ConvNextForImageClassification",r1]],["convnextv2",["ConvNextV2ForImageClassification",a1]],["dinov2",["Dinov2ForImageClassification",s1]],["resnet",["ResNetForImageClassification",U2]],["swin",["SwinForImageClassification",G2]],["segformer",["SegformerForImageClassification",ev]],["efficientnet",["EfficientNetForImageClassification",iv]]]),yv=new Map([["detr",["DetrForObjectDetection",z2]],["table-transformer",["TableTransformerForObjectDetection",D2]],["yolos",["YolosForObjectDetection",u1]]]),wv=new Map([["owlvit",["OwlViTForObjectDetection",T2]],["owlv2",["Owlv2ForObjectDetection",I2]]]),bv=new Map([["detr",["DetrForSegmentation",R2]],["clipseg",["CLIPSegForImageSegmentation",Db]]]),vv=new Map([["segformer",["SegformerForSemanticSegmentation",tv]]]),$v=new Map([["sam",["SamModel",c1]]]),xv=new Map([["wav2vec2",["Wav2Vec2ForCTC",y1]],["wav2vec2-bert",["Wav2Vec2BertForCTC",I1]],["unispeech",["UniSpeechForCTC",$1]],["unispeech-sat",["UniSpeechSatForCTC",C1]],["wavlm",["WavLMForCTC",P1]],["hubert",["HubertForCTC",O1]]]),Sv=new Map([["wav2vec2",["Wav2Vec2ForSequenceClassification",w1]],["wav2vec2-bert",["Wav2Vec2BertForSequenceClassification",A1]],["unispeech",["UniSpeechForSequenceClassification",x1]],["unispeech-sat",["UniSpeechSatForSequenceClassification",E1]],["wavlm",["WavLMForSequenceClassification",B1]],["hubert",["HubertForSequenceClassification",z1]],["audio-spectrogram-transformer",["ASTForAudioClassification",$b]]]),Cv=new Map([["wavlm",["WavLMForXVector",D1]]]),Ev=new Map([["unispeech-sat",["UniSpeechSatForAudioFrameClassification",T1]],["wavlm",["WavLMForAudioFrameClassification",N1]],["wav2vec2",["Wav2Vec2ForAudioFrameClassification",b1]]]),Tv=new Map([["vitmatte",["VitMatteForImageMatting",v2]]]),kv=new Map([["swin2sr",["Swin2SRForImageSuperResolution",q2]]]),Iv=new Map([["dpt",["DPTForDepthEstimation",K2]],["depth_anything",["DepthAnythingForDepthEstimation",X2]],["glpn",["GLPNForDepthEstimation",J2]]]),Av=new Map([["clip",["CLIPVisionModelWithProjection",Ab]],["siglip",["SiglipVisionModel",zb]]]),Tg=[[ov,be.EncoderOnly],[uv,be.EncoderDecoder],[lv,be.DecoderOnly],[pv,be.EncoderOnly],[hv,be.EncoderOnly],[Sg,be.Seq2Seq],[xg,be.Seq2Seq],[Cg,be.DecoderOnly],[fv,be.EncoderOnly],[mv,be.EncoderOnly],[Eg,be.Vision2Seq],[gv,be.ImageTextToText],[_v,be.EncoderOnly],[bv,be.EncoderOnly],[vv,be.EncoderOnly],[Tv,be.EncoderOnly],[kv,be.EncoderOnly],[Iv,be.EncoderOnly],[yv,be.EncoderOnly],[wv,be.EncoderOnly],[$v,be.MaskGeneration],[xv,be.EncoderOnly],[Sv,be.EncoderOnly],[dv,be.Seq2Seq],[cv,be.EncoderOnly],[Cv,be.EncoderOnly],[Ev,be.EncoderOnly],[Av,be.EncoderOnly]];for(const[t,e]of Tg)for(const[r,n]of t.values())wi.set(r,e),qn.set(n,r),_m.set(r,n);const Mv=[["MusicgenForConditionalGeneration",$g,be.Musicgen],["CLIPTextModelWithProjection",Ib,be.EncoderOnly],["SiglipTextModel",Ob,be.EncoderOnly],["ClapTextModelWithProjection",Q1,be.EncoderOnly],["ClapAudioModelWithProjection",J1,be.EncoderOnly]];for(const[t,e,r]of Mv)wi.set(t,r),qn.set(e,t),_m.set(t,e);class Ov extends sv{static MODEL_CLASS_MAPPINGS=Tg.map(e=>e[0]);static BASE_IF_FAIL=!0}class Ae extends Ot{constructor({logits:e}){super(),this.logits=e}}class zv extends Ot{constructor({logits:e,embeddings:r}){super(),this.logits=e,this.embeddings=r}}class lt extends Ot{constructor({logits:e}){super(),this.logits=e}}class dt extends Ot{constructor({logits:e}){super(),this.logits=e}}class mt extends Ot{constructor({start_logits:e,end_logits:r}){super(),this.start_logits=e,this.end_logits=r}}class gn extends Ot{constructor({logits:e}){super(),this.logits=e}}class Rv extends Ot{constructor({alphas:e}){super(),this.alphas=e}}class Pv extends Ot{constructor({waveform:e,spectrogram:r}){super(),this.waveform=e,this.spectrogram=r}}const It=typeof self<"u",Bv=It&&self.constructor.name==="DedicatedWorkerGlobalScope";let kr,kg,fr;if(It)kr=(t,e)=>{if(!self.OffscreenCanvas)throw new Error("OffscreenCanvas not supported by this browser.");return new self.OffscreenCanvas(t,e)},fr=self.createImageBitmap,kg=self.ImageData;else if(Ye)fr=async t=>{const r=(await t.metadata()).channels,{data:n,info:i}=await t.rotate().raw().toBuffer({resolveWithObject:!0}),a=new Vt(new Uint8ClampedArray(n),i.width,i.height,i.channels);return r!==void 0&&r!==i.channels&&a.convert(r),a};else throw new Error("Unable to load image processing library.");const Dv={0:"nearest",1:"lanczos",2:"bilinear",3:"bicubic",4:"box",5:"hamming"},Nv=new Map([["png","image/png"],["jpg","image/jpeg"],["jpeg","image/jpeg"],["gif","image/gif"]]);class Vt{constructor(e,r,n,i){this.data=e,this.width=r,this.height=n,this.channels=i}get size(){return[this.width,this.height]}static async read(e){if(e instanceof Vt)return e;if(typeof e=="string"||e instanceof URL)return await this.fromURL(e);throw new Error(`Unsupported input type: ${typeof e}`)}static fromCanvas(e){if(!It)throw new Error("fromCanvas() is only supported in browser environments.");const n=e.getContext("2d").getImageData(0,0,e.width,e.height).data;return new Vt(n,e.width,e.height,4)}static async fromURL(e){const r=await Rs(e);if(r.status!==200)throw new Error(`Unable to read image from "${e}" (${r.status} ${r.statusText})`);const n=await r.blob();return this.fromBlob(n)}static async fromBlob(e){if(It){const r=await fr(e),n=kr(r.width,r.height).getContext("2d");return n.drawImage(r,0,0),new this(n.getImageData(0,0,r.width,r.height).data,r.width,r.height,4)}else{const r=Ye(await e.arrayBuffer());return await fr(r)}}static fromTensor(e,r="CHW"){if(e.dims.length!==3)throw new Error(`Tensor should have 3 dimensions, but has ${e.dims.length} dimensions.`);if(r==="CHW")e=e.transpose(1,2,0);else if(r!=="HWC")throw new Error(`Unsupported channel format: ${r}`);if(!(e.data instanceof Uint8ClampedArray||e.data instanceof Uint8Array))throw new Error(`Unsupported tensor type: ${e.type}`);switch(e.dims[2]){case 1:case 2:case 3:case 4:return new Vt(e.data,e.dims[1],e.dims[0],e.dims[2]);default:throw new Error(`Unsupported number of channels: ${e.dims[2]}`)}}grayscale(){if(this.channels===1)return this;const e=new Uint8ClampedArray(this.width*this.height*1);switch(this.channels){case 3:case 4:for(let r=0,n=0;r=0?u=n:h=-n,i>=0?l=i:f=-i,o.drawImage(s,u,l,e,r,h,f,e,r),new Vt(o.getImageData(0,0,e,r).data,e,r,4).convert(a)}else{let a=this.toSharp();if(n>=0&&i>=0)a=a.extract({left:Math.floor(n),top:Math.floor(i),width:e,height:r});else if(n<=0&&i<=0){const s=Math.floor(-i),o=Math.floor(-n);a=a.extend({top:s,left:o,right:e-this.width-o,bottom:r-this.height-s})}else{let s=[0,0],o=0;i<0?(s[0]=Math.floor(-i),s[1]=r-this.height-s[0]):o=Math.floor(i);let u=[0,0],l=0;n<0?(u[0]=Math.floor(-n),u[1]=e-this.width-u[0]):l=Math.floor(n),a=a.extend({top:s[0],bottom:s[1],left:u[0],right:u[1]}).extract({left:l,top:o,width:e,height:r})}return await fr(a)}}async toBlob(e="image/png",r=1){if(!It)throw new Error("toBlob() is only supported in browser environments.");return await this.toCanvas().convertToBlob({type:e,quality:r})}toTensor(e="CHW"){let r=new ce("uint8",new Uint8Array(this.data),[this.height,this.width,this.channels]);if(e!=="HWC")if(e==="CHW")r=r.permute(2,0,1);else throw new Error(`Unsupported channel format: ${e}`);return r}toCanvas(){if(!It)throw new Error("toCanvas() is only supported in browser environments.");const e=this.clone().rgba(),r=kr(e.width,e.height),n=new kg(e.data,e.width,e.height);return r.getContext("2d").putImageData(n,0,0),r}_update(e,r,n,i=null){return this.data=e,this.width=r,this.height=n,i!==null&&(this.channels=i),this}clone(){return new Vt(this.data.slice(),this.width,this.height,this.channels)}convert(e){if(this.channels===e)return this;switch(e){case 1:this.grayscale();break;case 3:this.rgb();break;case 4:this.rgba();break;default:throw new Error(`Conversion failed due to unsupported number of channels: ${this.channels}`)}return this}async save(e){if(It){if(Bv)throw new Error("Unable to save an image from a Web Worker.");const r=e.split(".").pop().toLowerCase(),n=Nv.get(r)??"image/png",i=await this.toBlob(n),a=URL.createObjectURL(i),s=document.createElement("a");s.href=a,s.download=e,s.click(),s.remove()}else{if(bt.useFS)return await this.toSharp().toFile(e);throw new Error("Unable to save the image because filesystem is disabled in this environment.")}}toSharp(){if(It)throw new Error("toSharp() is only supported in server-side environments.");return Ye(this.data,{raw:{width:this.width,height:this.height,channels:this.channels}})}}function Tc(t){if(t<1)return new Float64Array;if(t===1)return new Float64Array([1]);const e=t-1,r=Math.PI/e,n=new Float64Array(t);for(let i=0;i2595*Math.log10(1+t/700),kaldi:t=>1127*Math.log(1+t/700),slaney:(t,e=1e3,r=15,n=27/Math.log(6.4))=>t>=e?r+Math.log(t/e)*n:3*t/200};function Is(t,e="htk"){const r=Fv[e];if(!r)throw new Error('mel_scale should be one of "htk", "slaney" or "kaldi".');return typeof t=="number"?r(t):t.map(n=>r(n))}const Lv={htk:t=>700*(10**(t/2595)-1),kaldi:t=>700*(Math.exp(t/1127)-1),slaney:(t,e=1e3,r=15,n=Math.log(6.4)/27)=>t>=r?e*Math.exp(n*(t-r)):200*t/3};function Wv(t,e="htk"){const r=Lv[e];if(!r)throw new Error('mel_scale should be one of "htk", "slaney" or "kaldi".');return typeof t=="number"?r(t):t.map(n=>r(n))}function Uv(t,e){const r=Float64Array.from({length:e.length-1},(s,o)=>e[o+1]-e[o]),n=Array.from({length:t.length},()=>new Array(e.length));for(let s=0;snew Array(t.length));for(let s=0;st+n*a)}function Zn(t,e,r,n,i,a=null,s="htk",o=!1){if(a!==null&&a!=="slaney")throw new Error('norm must be one of null or "slaney"');const u=Is(r,s),l=Is(n,s),h=kc(u,l,e+2);let f=Wv(h,s),m;if(o){const y=i/(t*2);m=Is(Float64Array.from({length:t},(b,v)=>v*y),s),f=h}else m=kc(0,Math.floor(i/2),t);const c=Uv(m,f);if(a!==null&&a==="slaney")for(let y=0;yi)throw Error(`frame_length (${r}) may not be larger than fft_length (${i})`);if(I!==r)throw new Error(`Length of the window (${I}) must equal frame_length (${r})`);if(n<=0)throw new Error("hop_length must be greater than zero");if(a===null&&h!==null)throw new Error("You have provided `mel_filters` but `power` is `None`. Mel spectrogram computation is not yet supported for complex-valued spectrogram. Specify `power` to fix this issue.");if(s){if(o!=="reflect")throw new Error(`pad_mode="${o}" not implemented yet.`);const U=Math.floor((i-1)/2)+1;t=Vv(t,U,U)}const A=Math.floor(1+Math.floor((t.length-r)/n)),R=u?Math.floor(i/2)+1:i;let z=A,P=A;C!==null&&(C>A?x&&(P=C):P=z=C);const J=new d0(i),K=new Float64Array(i),ue=new Float64Array(J.outputBufferSize),ie=new Float32Array(R*P);for(let U=0;U=1;--re)K[re]-=l*K[re-1];K[0]*=1-l}for(let re=0;reMath.pow(o,.85));break;default:throw new Error(`Unknown window type ${e}.`)}if(r&&(s=s.subarray(0,t)),n===null)return s;if(t>n)throw new Error(`Length of the window (${t}) may not be larger than frame_length (${n})`);return s}function qv([t,e,r,n]){return[t-r/2,e-n/2,t+r/2,e+n/2]}function Ro(t,e=.5,r=null,n=!1){const i=t.logits,a=t.pred_boxes,[s,o,u]=i.dims;if(r!==null&&r.length!==s)throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits");let l=[];for(let h=0;he&&C.push(T)}else{let T=_r(v.data)[1];if(T===u-1||(x=ta(v.data),x[T]A*f[(R+1)%2])),m.boxes.push(I),m.classes.push(T),m.scores.push(x[T])}}l.push(m)}return l}function ya(t,e){if(!(t instanceof Float32Array||t instanceof Float64Array))throw new Error(`${e} expects input to be a Float32Array or a Float64Array, but got ${t?.constructor?.name??typeof t} instead. If using the feature extractor directly, remember to use \`read_audio(url, sampling_rate)\` to obtain the raw audio data of the file/url.`)}function Ic(t,e,r=0,n=null){const i=t/e;let a=p0(i)*e;return n!==null&&a>n&&(a=Math.floor(i)*e),aa?l=Math.floor(a*u/i):a>i&&(u=Math.floor(i*l/a)),await e.resize(l,u,{resample:n}))}async crop_margin(e,r=200){const n=e.clone().grayscale(),i=u0(n.data)[0],s=_r(n.data)[0]-i;if(s===0)return e;const o=r/255;let u=n.width,l=n.height,h=0,f=0;const m=n.data;for(let c=0;cthis.preprocess(a)));return{pixel_values:Qn(n.map(a=>a.pixel_values),0),original_sizes:n.map(a=>a.original_size),reshaped_input_sizes:n.map(a=>a.reshaped_input_size)}}}class jv extends je{post_process_semantic_segmentation(e,r=null){const n=e.logits,i=n.dims[0];if(r!==null&&r.length!==i)throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits");const a=[];for(let s=0;sm[T]&&(m[T]=x[T],c[T]=C)}const y=new Array(u.dims[0]),b=f.data;for(let C=0;CC!==void 0);a.push({segmentation:f,labels:v})}return a}}class Ag extends je{}class Kv extends Ag{}class Yv extends je{}class Xv extends je{}class Mg extends je{}class Qv extends Mg{}class Jv extends je{}class Zv extends je{}class Og extends je{constructor(e){super(e),this.crop_pct=this.config.crop_pct??224/256}async resize(e){const r=this.size?.shortest_edge;if(r===void 0)throw new Error("Size dictionary must contain 'shortest_edge' key.");if(r<384){const n=Math.floor(r/this.crop_pct),[i,a]=this.get_resize_output_image_size(e,{shortest_edge:n});e=await e.resize(i,a,{resample:this.resample}),e=await e.center_crop(r,r)}else e=await e.resize(r,r,{resample:this.resample});return e}}class e$ extends Og{}class t$ extends je{}class r$ extends je{}class n$ extends je{constructor(e){super(e),this.include_top=this.config.include_top??!0,this.include_top&&(this.image_std=this.image_std.map(r=>r*r))}}class zg extends je{}class a$ extends zg{}class Rg extends je{post_process_object_detection(...e){return Ro(...e)}}class i$ extends Rg{}class s$ extends je{}class o$ extends je{}class Pg extends je{pad_image(e,r,n,i={}){const[a,s,o]=r;let u=this.image_mean;Array.isArray(this.image_mean)||(u=new Array(o).fill(u));let l=this.image_std;Array.isArray(l)||(l=new Array(o).fill(u));const h=u.map((f,m)=>-f/l[m]);return super.pad_image(e,r,n,{center:!0,constant_values:h,...i})}}class u$ extends Pg{}class l$ extends je{async _call(e){const r=await super._call(e),n=[r.pixel_values.dims[0],64,64],i=new ce("int64",new BigInt64Array(n.reduce((a,s)=>a*s)).fill(1n),n);return{...r,pixel_mask:i}}post_process_object_detection(...e){return Ro(...e)}remove_low_and_no_objects(e,r,n,i){let a=[],s=[],o=[];for(let u=0;un&&(a.push(h),s.push(c),o.push(f))}return[a,s,o]}check_segment_validity(e,r,n,i=.5,a=.8){let s=[],o=0,u=0;const l=r[n].data;for(let f=0;f=i&&++u;let h=o>0&&u>0;return h&&(h=o/u>a),[h,s]}compute_segments(e,r,n,i,a,s=null,o=null){let[u,l]=o??e[0].dims,h=new ce("int32",new Int32Array(u*l),[u,l]),f=[];if(o!==null)for(let v=0;vc[T]&&(m[T]=v,c[T]=x[T])}let y=0;const b=h.data;for(let v=0;vi!==r.dims[a]))throw Error(`The first ${n.length} dimensions of 'input_points' and 'input_labels' must be the same.`);return new ce("int64",e.flat(1/0).map(BigInt),n)}async _call(e,{input_points:r=null,input_labels:n=null,input_boxes:i=null}={}){const a=await super._call(e);if(r&&(a.input_points=this.reshape_input_points(r,a.original_sizes,a.reshaped_input_sizes)),n){if(!a.input_points)throw Error("`input_points` must be provided if `input_labels` are provided.");a.input_labels=this.add_input_labels(n,a.input_points)}return i&&(a.input_boxes=this.reshape_input_points(i,a.original_sizes,a.reshaped_input_sizes,!0)),a}async post_process_masks(e,r,n,{mask_threshold:i=0,binarize:a=!0,pad_size:s=null}={}){const o=[];s=s??this.pad_size;const u=[s.height,s.width];for(let l=0;li&&(y[b]=1);m=new ce("bool",y,m.dims)}o.push(m)}return o}generate_crop_boxes(e,r,{crop_n_layers:n=0,overlap_ratio:i=512/1500,points_per_crop:a=32,crop_n_points_downscale_factor:s=1}={}){}}class p$ extends je{pad_image(e,r,n,i={}){const[a,s,o]=r;return super.pad_image(e,r,{width:s+(n-s%n)%n,height:a+(n-a%n)%n},{mode:"symmetric",center:!1,constant_values:-1,...i})}}class h$ extends je{async _call(e,r){Array.isArray(e)||(e=[e]),Array.isArray(r)||(r=[r]);const n=await Promise.all(e.map(s=>this.preprocess(s))),i=await Promise.all(r.map(s=>this.preprocess(s,{do_normalize:!1,do_convert_rgb:!1,do_convert_grayscale:!0})));return{pixel_values:Qn(n.map((s,o)=>Jt([s.pixel_values,i[o].pixel_values],0)),0),original_sizes:n.map(s=>s.original_size),reshaped_input_sizes:n.map(s=>s.reshaped_input_size)}}}class f$ extends Vr{constructor(e){super(e),this.config.mel_filters??=Zn(Math.floor(1+this.config.n_fft/2),this.config.feature_size,0,8e3,this.config.sampling_rate,"slaney","slaney"),this.window=zi(this.config.n_fft,"hann")}async _extract_fbank_features(e){const r=await Oi(e,this.window,this.config.n_fft,this.config.hop_length,{power:2,mel_filters:this.config.mel_filters,log_mel:"log10",max_num_frames:this.config.nb_max_frames}),n=r.data,i=_r(n)[0];for(let a=0;athis.config.n_samples?(console.warn("Attempting to extract features for audio longer than 30 seconds. If using a pipeline to extract transcript from a long audio clip, remember to specify `chunk_length_s` and/or `stride_length_s`."),r=e.slice(0,this.config.n_samples)):(r=new Float32Array(this.config.n_samples),r.set(e)),{input_features:(await this._extract_fbank_features(r)).unsqueeze_(0)}}}class m$ extends Vr{_zero_mean_unit_var_norm(e){const n=e.reduce((a,s)=>a+s,0)/e.length,i=e.reduce((a,s)=>a+(s-n)**2,0)/e.length;return e.map(a=>(a-n)/Math.sqrt(i+1e-7))}async _call(e){ya(e,"Wav2Vec2FeatureExtractor"),e instanceof Float64Array&&(e=new Float32Array(e));let r=e;this.config.do_normalize&&(r=this._zero_mean_unit_var_norm(r));const n=[1,r.length];return{input_values:new ce("float32",r,n),attention_mask:new ce("int64",new BigInt64Array(r.length).fill(1n),n)}}}class g$ extends Vr{constructor(e){super(e);const r=this.config.sampling_rate,n=Zn(256,this.config.num_mel_bins,20,Math.floor(r/2),r,null,"kaldi",!0);for(let i=0;in*32768),Oi(e,this.window,400,160,{fft_length:512,power:2,center:!1,preemphasis:.97,mel_filters:this.mel_filters,log_mel:"log",mel_floor:1192092955078125e-22,remove_dc_offset:!0,max_num_frames:r,transpose:!0})}async _call(e,{padding:r=!0,pad_to_multiple_of:n=2,do_normalize_per_mel_bins:i=!0,return_attention_mask:a=!0}={}){ya(e,"SeamlessM4TFeatureExtractor");let s=await this._extract_fbank_features(e,this.config.max_length);if(i){const[y,b]=s.dims,v=s.data;for(let C=0;C0){const x=new Float32Array(b*(y+C));x.set(v),x.fill(this.config.padding_value,v.length);const T=y+C;s=new ce(s.type,x,[T,b]),a&&(o=new ce("int64",new BigInt64Array(T),[1,T]),o.data.fill(1n,0,y))}}const[u,l]=s.dims,h=this.config.stride;if(u%h!==0)throw new Error(`The number of frames (${u}) must be a multiple of the stride (${h}).`);const m=s.view(1,Math.floor(u/h),l*h),c={input_features:m};if(a){const y=m.dims[1],b=new BigInt64Array(y);if(o){const v=o.data;for(let C=1,x=0;C0)if(n==="rand_trunc"){const o=Math.floor(Math.random()*(s+1));e=e.subarray(o,o+r),a=await this._extract_fbank_features(e,this.mel_filters_slaney,this.config.nb_max_samples)}else throw new Error(`Truncation strategy "${n}" not implemented`);else{if(s<0){let o=new Float64Array(r);if(o.set(e),i==="repeat")for(let u=e.length;u{dn=Number(Xs.value),Po.feature_extractor.size={width:dn,height:dn},T$.textContent=dn});Xs.disabled=!1;let cn=.4;Qs.addEventListener("input",()=>{cn=Number(Qs.value),Bg(Fr.videoWidth*cn,Fr.videoHeight*cn),k$.textContent=cn});Qs.disabled=!1;Ri.textContent="Ready";let Ms=!1,Os;const Ac=bi.getContext("2d",{willReadFrequently:!0}),I$=ea.getContext("2d",{willReadFrequently:!0});function Fg(){const{width:t,height:e}=bi;Ms||(Ms=!0,async function(){Ac.drawImage(Fr,0,0,t,e);const r=Ac.getImageData(0,0,t,e),n=new Vt(r.data,t,e,4),i=await Po(n),{predicted_depth:a}=await Ng(i),s=a.data,[o,u,l]=a.dims;let h=1/0,f=-1/0;ea.width=l,ea.height=u;for(let b=0;bf&&(f=v)}const m=f-h,c=new Uint8ClampedArray(4*s.length);for(let b=0;b{Fr.srcObject=t,Fr.play();const e=t.getVideoTracks()[0],{width:r,height:n}=e.getSettings();Bg(r*cn,n*cn),setTimeout(Fg,50)}).catch(t=>{alert(t)});
+ */const z_=Object.freeze(Object.defineProperty({__proto__:null,get InferenceSession(){return ao},get TRACE(){return jn},get TRACE_FUNC_BEGIN(){return Ht},get TRACE_FUNC_END(){return Mt},get Tensor(){return ft},get TrainingSession(){return io},default:O_,get env(){return Fe},get registerBackend(){return Or}},Symbol.toStringTag,{value:"Module"})),R_=(t,e)=>{const r=typeof document<"u"?document.createElement("canvas"):new OffscreenCanvas(1,1);r.width=t.dims[3],r.height=t.dims[2];const n=r.getContext("2d");if(n!=null){let i,a;e?.tensorLayout!==void 0&&e.tensorLayout==="NHWC"?(i=t.dims[2],a=t.dims[3]):(i=t.dims[3],a=t.dims[2]);const s=e?.format!==void 0?e.format:"RGB",o=e?.norm;let u,l;o===void 0||o.mean===void 0?u=[255,255,255,255]:typeof o.mean=="number"?u=[o.mean,o.mean,o.mean,o.mean]:(u=[o.mean[0],o.mean[1],o.mean[2],0],o.mean[3]!==void 0&&(u[3]=o.mean[3])),o===void 0||o.bias===void 0?l=[0,0,0,0]:typeof o.bias=="number"?l=[o.bias,o.bias,o.bias,o.bias]:(l=[o.bias[0],o.bias[1],o.bias[2],0],o.bias[3]!==void 0&&(l[3]=o.bias[3]));const h=a*i;let f=0,m=h,c=h*2,y=-1;s==="RGBA"?(f=0,m=h,c=h*2,y=h*3):s==="RGB"?(f=0,m=h,c=h*2):s==="RBG"&&(f=0,c=h,m=h*2);for(let b=0;b{const r=typeof document<"u"?document.createElement("canvas").getContext("2d"):new OffscreenCanvas(1,1).getContext("2d");let n;if(r!=null){let i,a,s;e?.tensorLayout!==void 0&&e.tensorLayout==="NHWC"?(i=t.dims[2],a=t.dims[1],s=t.dims[3]):(i=t.dims[3],a=t.dims[2],s=t.dims[1]);const o=e!==void 0&&e.format!==void 0?e.format:"RGB",u=e?.norm;let l,h;u===void 0||u.mean===void 0?l=[255,255,255,255]:typeof u.mean=="number"?l=[u.mean,u.mean,u.mean,u.mean]:(l=[u.mean[0],u.mean[1],u.mean[2],255],u.mean[3]!==void 0&&(l[3]=u.mean[3])),u===void 0||u.bias===void 0?h=[0,0,0,0]:typeof u.bias=="number"?h=[u.bias,u.bias,u.bias,u.bias]:(h=[u.bias[0],u.bias[1],u.bias[2],0],u.bias[3]!==void 0&&(h[3]=u.bias[3]));const f=a*i;if(e!==void 0&&(e.format!==void 0&&s===4&&e.format!=="RGBA"||s===3&&e.format!=="RGB"&&e.format!=="BGR"))throw new Error("Tensor format doesn't match input tensor dims");const m=4;let c=0,y=1,b=2,v=3,C=0,x=f,T=f*2,I=-1;o==="RGBA"?(C=0,x=f,T=f*2,I=f*3):o==="RGB"?(C=0,x=f,T=f*2):o==="RBG"&&(C=0,T=f,x=f*2),n=r.createImageData(i,a);for(let A=0;A{if(t===void 0)throw new Error("Image buffer must be defined");if(e.height===void 0||e.width===void 0)throw new Error("Image height and width must be defined");if(e.tensorLayout==="NHWC")throw new Error("NHWC Tensor layout is not supported yet");const{height:r,width:n}=e,i=e.norm??{mean:255,bias:0};let a,s;typeof i.mean=="number"?a=[i.mean,i.mean,i.mean,i.mean]:a=[i.mean[0],i.mean[1],i.mean[2],i.mean[3]??255],typeof i.bias=="number"?s=[i.bias,i.bias,i.bias,i.bias]:s=[i.bias[0],i.bias[1],i.bias[2],i.bias[3]??0];const o=e.format!==void 0?e.format:"RGBA",u=e.tensorFormat!==void 0&&e.tensorFormat!==void 0?e.tensorFormat:"RGB",l=r*n,h=u==="RGBA"?new Float32Array(l*4):new Float32Array(l*3);let f=4,m=0,c=1,y=2,b=3,v=0,C=l,x=l*2,T=-1;o==="RGB"&&(f=3,m=0,c=1,y=2,b=-1),u==="RGBA"?T=l*3:u==="RBG"?(v=0,x=l,C=l*2):u==="BGR"&&(x=0,C=l,v=l*2);for(let A=0;A{const r=typeof HTMLImageElement<"u"&&t instanceof HTMLImageElement,n=typeof ImageData<"u"&&t instanceof ImageData,i=typeof ImageBitmap<"u"&&t instanceof ImageBitmap,a=typeof t=="string";let s,o=e??{};const u=()=>{if(typeof document<"u")return document.createElement("canvas");if(typeof OffscreenCanvas<"u")return new OffscreenCanvas(1,1);throw new Error("Canvas is not supported")},l=h=>h instanceof HTMLCanvasElement||h instanceof OffscreenCanvas?h.getContext("2d"):null;if(r){const h=u();h.width=t.width,h.height=t.height;const f=l(h);if(f!=null){let m=t.height,c=t.width;if(e!==void 0&&e.resizedHeight!==void 0&&e.resizedWidth!==void 0&&(m=e.resizedHeight,c=e.resizedWidth),e!==void 0){if(o=e,e.tensorFormat!==void 0)throw new Error("Image input config format must be RGBA for HTMLImageElement");o.tensorFormat="RGBA",o.height=m,o.width=c}else o.tensorFormat="RGBA",o.height=m,o.width=c;f.drawImage(t,0,0),s=f.getImageData(0,0,c,m).data}else throw new Error("Can not access image data")}else if(n){let h,f;if(e!==void 0&&e.resizedWidth!==void 0&&e.resizedHeight!==void 0?(h=e.resizedHeight,f=e.resizedWidth):(h=t.height,f=t.width),e!==void 0&&(o=e),o.format="RGBA",o.height=h,o.width=f,e!==void 0){const m=u();m.width=f,m.height=h;const c=l(m);if(c!=null)c.putImageData(t,0,0),s=c.getImageData(0,0,f,h).data;else throw new Error("Can not access image data")}else s=t.data}else if(i){if(e===void 0)throw new Error("Please provide image config with format for Imagebitmap");const h=u();h.width=t.width,h.height=t.height;const f=l(h);if(f!=null){const m=t.height,c=t.width;return f.drawImage(t,0,0,c,m),s=f.getImageData(0,0,c,m).data,o.height=m,o.width=c,Ts(s,o)}else throw new Error("Can not access image data")}else{if(a)return new Promise((h,f)=>{const m=u(),c=l(m);if(!t||!c)return f();const y=new Image;y.crossOrigin="Anonymous",y.src=t,y.onload=()=>{m.width=y.width,m.height=y.height,c.drawImage(y,0,0,m.width,m.height);const b=c.getImageData(0,0,m.width,m.height);o.height=m.height,o.width=m.width,h(Ts(b.data,o))}});throw new Error("Input data provided is not supported - aborted tensor creation")}if(s!==void 0)return Ts(s,o);throw new Error("Input data provided is not supported - aborted tensor creation")},D_=(t,e)=>{const{width:r,height:n,download:i,dispose:a}=e,s=[1,n,r,4];return new Qt({location:"texture",type:"float32",texture:t,dims:s,download:i,dispose:a})},N_=(t,e)=>{const{dataType:r,dims:n,download:i,dispose:a}=e;return new Qt({location:"gpu-buffer",type:r??"float32",gpuBuffer:t,dims:n,download:i,dispose:a})},F_=(t,e,r)=>new Qt({location:"cpu-pinned",type:t,data:e,dims:r??[e.length]}),ln=new Map([["float32",Float32Array],["uint8",Uint8Array],["int8",Int8Array],["uint16",Uint16Array],["int16",Int16Array],["int32",Int32Array],["bool",Uint8Array],["float64",Float64Array],["uint32",Uint32Array]]),ii=new Map([[Float32Array,"float32"],[Uint8Array,"uint8"],[Int8Array,"int8"],[Uint16Array,"uint16"],[Int16Array,"int16"],[Int32Array,"int32"],[Float64Array,"float64"],[Uint32Array,"uint32"]]);let wc=!1;const L_=()=>{if(!wc){wc=!0;const t=typeof BigInt64Array<"u"&&BigInt64Array.from,e=typeof BigUint64Array<"u"&&BigUint64Array.from,r=typeof Float16Array<"u"&&Float16Array.from;t&&(ln.set("int64",BigInt64Array),ii.set(BigInt64Array,"int64")),e&&(ln.set("uint64",BigUint64Array),ii.set(BigUint64Array,"uint64")),r?(ln.set("float16",Float16Array),ii.set(Float16Array,"float16")):ln.set("float16",Uint16Array)}},W_=t=>{let e=1;for(let r=0;r{switch(t.location){case"cpu":return new Qt(t.type,t.data,e);case"cpu-pinned":return new Qt({location:"cpu-pinned",data:t.data,type:t.type,dims:e});case"texture":return new Qt({location:"texture",texture:t.texture,type:t.type,dims:e});case"gpu-buffer":return new Qt({location:"gpu-buffer",gpuBuffer:t.gpuBuffer,type:t.type,dims:e});default:throw new Error(`tensorReshape: tensor location ${t.location} is not supported`)}};let Qt=class{constructor(e,r,n){L_();let i,a;if(typeof e=="object"&&"location"in e)switch(this.dataLocation=e.location,i=e.type,a=e.dims,e.location){case"cpu-pinned":{const o=ln.get(i);if(!o)throw new TypeError(`unsupported type "${i}" to create tensor from pinned buffer`);if(!(e.data instanceof o))throw new TypeError(`buffer should be of type ${o.name}`);this.cpuData=e.data;break}case"texture":{if(i!=="float32")throw new TypeError(`unsupported type "${i}" to create tensor from texture`);this.gpuTextureData=e.texture,this.downloader=e.download,this.disposer=e.dispose;break}case"gpu-buffer":{if(i!=="float32"&&i!=="float16"&&i!=="int32"&&i!=="int64"&&i!=="uint32"&&i!=="uint8"&&i!=="bool")throw new TypeError(`unsupported type "${i}" to create tensor from gpu buffer`);this.gpuBufferData=e.gpuBuffer,this.downloader=e.download,this.disposer=e.dispose;break}default:throw new Error(`Tensor constructor: unsupported location '${this.dataLocation}'`)}else{let o,u;if(typeof e=="string")if(i=e,u=n,e==="string"){if(!Array.isArray(r))throw new TypeError("A string tensor's data must be a string array.");o=r}else{const l=ln.get(e);if(l===void 0)throw new TypeError(`Unsupported tensor type: ${e}.`);if(Array.isArray(r)){if(e==="float16"&&l===Uint16Array)throw new TypeError("Creating a float16 tensor from number array is not supported. Please use Uint16Array as data.");e==="uint64"||e==="int64"?o=l.from(r,BigInt):o=l.from(r)}else if(r instanceof l)o=r;else throw new TypeError(`A ${i} tensor's data must be type of ${l}`)}else if(u=r,Array.isArray(e)){if(e.length===0)throw new TypeError("Tensor type cannot be inferred from an empty array.");const l=typeof e[0];if(l==="string")i="string",o=e;else if(l==="boolean")i="bool",o=Uint8Array.from(e);else throw new TypeError(`Invalid element type of data array: ${l}.`)}else{const l=ii.get(e.constructor);if(l===void 0)throw new TypeError(`Unsupported type for tensor data: ${e.constructor}.`);i=l,o=e}if(u===void 0)u=[o.length];else if(!Array.isArray(u))throw new TypeError("A tensor's dims must be a number array");a=u,this.cpuData=o,this.dataLocation="cpu"}const s=W_(a);if(this.cpuData&&s!==this.cpuData.length)throw new Error(`Tensor's size(${s}) does not match data length(${this.cpuData.length}).`);this.type=i,this.dims=a,this.size=s}static async fromImage(e,r){return B_(e,r)}static fromTexture(e,r){return D_(e,r)}static fromGpuBuffer(e,r){return N_(e,r)}static fromPinnedBuffer(e,r,n){return F_(e,r,n)}toDataURL(e){return R_(this,e)}toImageData(e){return P_(this,e)}get data(){if(this.ensureValid(),!this.cpuData)throw new Error("The data is not on CPU. Use `getData()` to download GPU data to CPU, or use `texture` or `gpuBuffer` property to access the GPU data directly.");return this.cpuData}get location(){return this.dataLocation}get texture(){if(this.ensureValid(),!this.gpuTextureData)throw new Error("The data is not stored as a WebGL texture.");return this.gpuTextureData}get gpuBuffer(){if(this.ensureValid(),!this.gpuBufferData)throw new Error("The data is not stored as a WebGPU buffer.");return this.gpuBufferData}async getData(e){switch(this.ensureValid(),this.dataLocation){case"cpu":case"cpu-pinned":return this.data;case"texture":case"gpu-buffer":{if(!this.downloader)throw new Error("The current tensor is not created with a specified data downloader.");if(this.isDownloading)throw new Error("The current tensor is being downloaded.");try{this.isDownloading=!0;const r=await this.downloader();return this.downloader=void 0,this.dataLocation="cpu",this.cpuData=r,e&&this.disposer&&(this.disposer(),this.disposer=void 0),r}finally{this.isDownloading=!1}}default:throw new Error(`cannot get data from location: ${this.dataLocation}`)}}dispose(){if(this.isDownloading)throw new Error("The current tensor is being downloaded.");this.disposer&&(this.disposer(),this.disposer=void 0),this.cpuData=void 0,this.gpuTextureData=void 0,this.gpuBufferData=void 0,this.downloader=void 0,this.isDownloading=void 0,this.dataLocation="none"}ensureValid(){if(this.dataLocation==="none")throw new Error("The tensor is disposed.")}reshape(e){if(this.ensureValid(),this.downloader||this.disposer)throw new Error("Cannot reshape a tensor that owns GPU resource.");return U_(this,e)}};const V_=Qt,Hn=[];let Ks,Xn;Pr.IS_NODE_ENV?(Xn=Ye??qg,Hn.push("cpu"),Ks=["cpu"]):(Xn=z_,Pr.IS_WEBGPU_AVAILABLE&&Hn.push("webgpu"),Hn.push("wasm"),Ks=["wasm"]);const G_=Xn.InferenceSession;function H_(t){let e=Ks;if(t){if(!Hn.includes(t))throw new Error(`Unsupported device: "${t}". Should be one of: ${Hn.join(", ")}.`);e=[t]}return e}async function lm(t,e){return await G_.create(t,e)}function dm(t){return t instanceof Xn.Tensor}const Mr=Xn?.env;Mr?.wasm&&(Mr.wasm.wasmPaths="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.18.0/dist/",Mr.wasm.proxy=!Pr.IS_WEBWORKER_ENV,(typeof crossOriginIsolated>"u"||!crossOriginIsolated)&&(Mr.wasm.numThreads=1),typeof navigator<"u"&&/iP(hone|od|ad).+16_4.+AppleWebKit/.test(navigator.userAgent)&&(Mr.wasm.simd=!1));function q_(){return Mr?.wasm?.proxy}bt.backends.onnx=Mr;const an=async(t,e,r)=>{const n=await lm(new Uint8Array(t),e);return async i=>{const a=Object.fromEntries(Object.entries(i).map(([o,u])=>[o,u.ort_tensor])),s=await n.run(a);return Array.isArray(r)?r.map(o=>new ce(s[o])):new ce(s[r])}};class _i{static session_options={};static get bilinear_interpolate_4d(){return this._bilinear_interpolate_4d||(this._bilinear_interpolate_4d=an([8,9,18,0,58,128,1,10,40,10,1,120,10,0,10,0,10,1,115,18,1,121,34,6,82,101,115,105,122,101,42,17,10,4,109,111,100,101,34,6,108,105,110,101,97,114,160,1,3,18,1,114,90,31,10,1,120,18,26,10,24,8,1,18,20,10,3,18,1,98,10,3,18,1,99,10,3,18,1,104,10,3,18,1,119,90,15,10,1,115,18,10,10,8,8,7,18,4,10,2,8,4,98,31,10,1,121,18,26,10,24,8,1,18,20,10,3,18,1,98,10,3,18,1,99,10,3,18,1,104,10,3,18,1,119,66,2,16,20],this.session_options,"y")),this._bilinear_interpolate_4d}static get bicubic_interpolate_4d(){return this._bicubic_interpolate_4d||(this._bicubic_interpolate_4d=an([8,9,18,0,58,127,10,39,10,1,120,10,0,10,0,10,1,115,18,1,121,34,6,82,101,115,105,122,101,42,16,10,4,109,111,100,101,34,5,99,117,98,105,99,160,1,3,18,1,114,90,31,10,1,120,18,26,10,24,8,1,18,20,10,3,18,1,98,10,3,18,1,99,10,3,18,1,104,10,3,18,1,119,90,15,10,1,115,18,10,10,8,8,7,18,4,10,2,8,4,98,31,10,1,121,18,26,10,24,8,1,18,20,10,3,18,1,98,10,3,18,1,99,10,3,18,1,104,10,3,18,1,119,66,2,16,20],this.session_options,"y")),this._bicubic_interpolate_4d}static get matmul(){return this._matmul||(this._matmul=an([8,9,18,0,58,55,10,17,10,1,97,10,1,98,18,1,99,34,6,77,97,116,77,117,108,18,1,114,90,9,10,1,97,18,4,10,2,8,1,90,9,10,1,98,18,4,10,2,8,1,98,9,10,1,99,18,4,10,2,8,1,66,2,16,20],this.session_options,"c")),this._matmul}static get stft(){return this._stft||(this._stft=an([8,7,18,0,58,148,1,10,38,10,1,115,10,1,106,10,1,119,10,1,108,18,1,111,34,4,83,84,70,84,42,15,10,8,111,110,101,115,105,100,101,100,24,1,160,1,2,18,1,115,90,26,10,1,115,18,21,10,19,8,1,18,15,10,3,18,1,98,10,3,18,1,115,10,3,18,1,99,90,11,10,1,106,18,6,10,4,8,7,18,0,90,16,10,1,119,18,11,10,9,8,1,18,5,10,3,18,1,119,90,11,10,1,108,18,6,10,4,8,7,18,0,98,31,10,1,111,18,26,10,24,8,1,18,20,10,3,18,1,98,10,3,18,1,102,10,3,18,1,100,10,3,18,1,99,66,2,16,17],this.session_options,"o")),this._stft}static get rfft(){return this._rfft||(this._rfft=an([8,9,18,0,58,97,10,33,10,1,120,10,0,10,1,97,18,1,121,34,3,68,70,84,42,15,10,8,111,110,101,115,105,100,101,100,24,1,160,1,2,18,1,100,90,21,10,1,120,18,16,10,14,8,1,18,10,10,3,18,1,115,10,3,18,1,99,90,11,10,1,97,18,6,10,4,8,7,18,0,98,21,10,1,121,18,16,10,14,8,1,18,10,10,3,18,1,115,10,3,18,1,99,66,2,16,20],this.session_options,"y")),this._rfft}static get top_k(){return this._top_k||(this._top_k=an([8,10,18,0,58,73,10,18,10,1,120,10,1,107,18,1,118,18,1,105,34,4,84,111,112,75,18,1,116,90,9,10,1,120,18,4,10,2,8,1,90,15,10,1,107,18,10,10,8,8,7,18,4,10,2,8,1,98,9,10,1,118,18,4,10,2,8,1,98,9,10,1,105,18,4,10,2,8,7,66,2,16,21],this.session_options,["v","i"])),this._top_k}}const bc=Object.freeze({float32:Float32Array,float16:Uint16Array,float64:Float64Array,string:Array,int8:Int8Array,uint8:Uint8Array,int16:Int16Array,uint16:Uint16Array,int32:Int32Array,uint32:Uint32Array,int64:BigInt64Array,uint64:BigUint64Array,bool:Uint8Array});class ce{get dims(){return this.ort_tensor.dims}set dims(e){this.ort_tensor.dims=e}get type(){return this.ort_tensor.type}get data(){return this.ort_tensor.data}get size(){return this.ort_tensor.size}get location(){return this.ort_tensor.location}ort_tensor;constructor(...e){return dm(e[0])?this.ort_tensor=e[0]:this.ort_tensor=new V_(e[0],e[1],e[2]),new Proxy(this,{get:(r,n)=>{if(typeof n=="string"){let i=Number(n);if(Number.isInteger(i))return r._getitem(i)}return r[n]},set:(r,n,i)=>r[n]=i})}dispose(){this.ort_tensor.dispose()}*[Symbol.iterator](){const[e,...r]=this.dims;if(r.length>0){const n=r.reduce((i,a)=>i*a);for(let i=0;i0){const i=n.reduce((a,s)=>a*s);return this._subarray(e,i,n)}else return new ce(this.type,[this.data[e]],n)}indexOf(e){const r=this.data;for(let n=0;nm)throw new Error(`Invalid slice: ${h}`);let c=[Math.max(f,0),Math.min(m,this.dims[l])];n.push(c),r.push(c[1]-c[0])}else throw new Error(`Invalid slice: ${h}`)}let i=n.map(([l,h])=>h-l),a=i.reduce((l,h)=>l*h);const s=this.data;let o=new s.constructor(a);const u=this.stride();for(let l=0;l=0;--f){const c=i[f];h+=(m%c+n[f][0])*u[f],m=Math.floor(m/c)}o[l]=s[h]}return new ce(this.type,o,r)}permute(...e){return K_(this,e)}transpose(...e){return this.permute(...e)}sum(e=null,r=!1){return this.norm(1,e,r)}norm(e="fro",r=null,n=!1){if(e==="fro")e=2;else if(typeof e=="string")throw Error(`Unsupported norm: ${e}`);const i=this.data;if(r===null){let o=i.reduce((u,l)=>u+l**e,0)**(1/e);return new ce(this.type,[o],[])}r=Xt(r,this.dims.length);const a=this.dims.slice();a[r]=1;const s=new i.constructor(i.length/this.dims[r]);for(let o=0;o=0;--l){const m=this.dims[l];if(l!==r){const c=h%m;u+=c*f,f*=a[l]}h=Math.floor(h/m)}s[u]+=i[o]**e}if(e!==1)for(let o=0;o=0;--o){const h=this.dims[o];if(o!==r){const f=u%h;s+=f*l,l*=this.dims[o]}u=Math.floor(u/h)}i[a]/=n.data[s]}return this}normalize(e=2,r=1){return this.clone().normalize_(e,r)}stride(){return J_(this.dims)}squeeze(e=null){return new ce(this.type,this.data,$c(this.dims,e))}squeeze_(e=null){return this.dims=$c(this.dims,e),this}unsqueeze(e=null){return new ce(this.type,this.data,xc(this.dims,e))}unsqueeze_(e=null){return this.dims=xc(this.dims,e),this}flatten_(e=0,r=-1){r=(r+this.dims.length)%this.dims.length;let n=this.dims.slice(0,e),i=this.dims.slice(e,r+1),a=this.dims.slice(r+1);return this.dims=[...n,i.reduce((s,o)=>s*o,1),...a],this}flatten(e=0,r=-1){return this.clone().flatten_(e,r)}view(...e){let r=-1;for(let n=0;ns!==r?i*a:i,1);e[r]=this.data.length/n}return new ce(this.type,this.data,e)}neg_(){const e=this.data;for(let r=0;ra*s);if(r!==n)throw Error(`cannot reshape array of size ${r} into shape (${e})`);let i=t;for(let a=e.length-1;a>=0;a--)i=i.reduce((s,o)=>{let u=s[s.length-1];return u.lengthr!==1):typeof e=="number"?t[e]===1&&t.splice(e,1):Array.isArray(e)&&(t=t.filter((r,n)=>r!==1||!e.includes(n))),t}function xc(t,e){return e=Xt(e,t.length+1),t=t.slice(),t.splice(e,0,1),t}function Xt(t,e,r=null,n=!0){if(n&&(t<-e||t>=e))throw new Error(`IndexError: index ${t} is out of bounds for dimension${r===null?"":" "+r} with size ${e}`);return t<0&&(t=(t%e+e)%e),t}function Jt(t,e=0){e=Xt(e,t[0].dims.length);const r=t[0].dims.slice();r[e]=t.reduce((s,o)=>s+o.dims[e],0);const n=r.reduce((s,o)=>s*o,1),i=new t[0].data.constructor(n),a=t[0].type;if(e===0){let s=0;for(let o of t)i.set(o.data,s),s+=o.data.length}else{let s=0;for(let o=0;o=0;--f){const y=u.dims[f];let b=m%y;f===e&&(b+=s),h+=b*c,c*=r[f],m=Math.floor(m/y)}i[h]=u.data[l]}s+=u.dims[e]}}return new ce(a,i,r)}function Qn(t,e=0){return Jt(t.map(r=>r.unsqueeze(e)),e)}function X_(t,e=null,r=1,n=!1){if(e===null){const l=t.data.reduce((c,y)=>c+y,0)/t.data.length,h=Math.sqrt(t.data.reduce((c,y)=>c+(y-l)**2,0)/(t.data.length-r)),f=new ce(t.type,[l],[]);return[new ce(t.type,[h],[]),f]}e=Xt(e,t.dims.length);const i=xo(t,e,n),a=t.dims.slice();a[e]=1;const s=new t.data.constructor(t.data.length/t.dims[e]);for(let u=0;u=0;--h){const c=t.dims[h];if(h!==e){const y=f%c;l+=y*m,m*=a[h]}f=Math.floor(f/c)}s[l]+=(t.data[u]-i.data[l])**2}for(let u=0;us+o,0);return new ce(t.type,[a/t.data.length],[])}e=Xt(e,t.dims.length);const n=t.dims.slice();n[e]=1;const i=new t.data.constructor(t.data.length/t.dims[e]);for(let a=0;a=0;--o){const h=t.dims[o];if(o!==e){const f=u%h;s+=f*l,l*=n[o]}u=Math.floor(u/h)}i[s]+=t.data[a]}if(t.dims[e]!==1)for(let a=0;a0||o>0;)switch(u.push(s-1),l.push(o-1),a[s][o].item()){case 0:--s,--o;break;case 1:--s;break;case 2:--o;break;default:throw new Error(`Internal error in dynamic time warping. Unexpected trace[${s}, ${o}]. Please file a bug report.`)}return u.reverse(),l.reverse(),[u,l]}function J_(t){const e=new Array(t.length);for(let r=t.length-1,n=1;r>=0;--r)e[r]=n,n*=t[r];return e}function So(t,e,r,n){const i=t.reduce((a,s)=>a*s,1);return new ce(r,new n(i).fill(e),t)}function Z_(t,e){let r,n;return r="float32",n=Float32Array,So(t,e,r,n)}function ey(t,e){return Z_(t.dims,e)}function aa(t){return So(t,1n,"int64",BigInt64Array)}function ty(t){return aa(t.dims)}function ry(t){return So(t,0n,"int64",BigInt64Array)}function ny(t){return ry(t.dims)}var Ce=Object.freeze({Text:"Text",NumericLiteral:"NumericLiteral",BooleanLiteral:"BooleanLiteral",StringLiteral:"StringLiteral",Identifier:"Identifier",Equals:"Equals",OpenParen:"OpenParen",CloseParen:"CloseParen",OpenStatement:"OpenStatement",CloseStatement:"CloseStatement",OpenExpression:"OpenExpression",CloseExpression:"CloseExpression",OpenSquareBracket:"OpenSquareBracket",CloseSquareBracket:"CloseSquareBracket",OpenCurlyBracket:"OpenCurlyBracket",CloseCurlyBracket:"CloseCurlyBracket",Comma:"Comma",Dot:"Dot",Colon:"Colon",Pipe:"Pipe",CallOperator:"CallOperator",AdditiveBinaryOperator:"AdditiveBinaryOperator",MultiplicativeBinaryOperator:"MultiplicativeBinaryOperator",ComparisonBinaryOperator:"ComparisonBinaryOperator",UnaryOperator:"UnaryOperator",Set:"Set",If:"If",For:"For",In:"In",Is:"Is",NotIn:"NotIn",Else:"Else",EndIf:"EndIf",ElseIf:"ElseIf",EndFor:"EndFor",And:"And",Or:"Or",Not:"UnaryOperator"});Object.freeze({set:Ce.Set,for:Ce.For,in:Ce.In,is:Ce.Is,if:Ce.If,else:Ce.Else,endif:Ce.EndIf,elif:Ce.ElseIf,endfor:Ce.EndFor,and:Ce.And,or:Ce.Or,not:Ce.Not,"not in":Ce.NotIn,true:Ce.BooleanLiteral,false:Ce.BooleanLiteral});Ce.OpenStatement,Ce.CloseStatement,Ce.OpenExpression,Ce.CloseExpression,Ce.OpenParen,Ce.CloseParen,Ce.OpenCurlyBracket,Ce.CloseCurlyBracket,Ce.OpenSquareBracket,Ce.CloseSquareBracket,Ce.Comma,Ce.Dot,Ce.Colon,Ce.Pipe,Ce.ComparisonBinaryOperator,Ce.ComparisonBinaryOperator,Ce.ComparisonBinaryOperator,Ce.ComparisonBinaryOperator,Ce.ComparisonBinaryOperator,Ce.ComparisonBinaryOperator,Ce.AdditiveBinaryOperator,Ce.AdditiveBinaryOperator,Ce.MultiplicativeBinaryOperator,Ce.MultiplicativeBinaryOperator,Ce.MultiplicativeBinaryOperator,Ce.Equals;const hm=[["en","english"],["zh","chinese"],["de","german"],["es","spanish"],["ru","russian"],["ko","korean"],["fr","french"],["ja","japanese"],["pt","portuguese"],["tr","turkish"],["pl","polish"],["ca","catalan"],["nl","dutch"],["ar","arabic"],["sv","swedish"],["it","italian"],["id","indonesian"],["hi","hindi"],["fi","finnish"],["vi","vietnamese"],["he","hebrew"],["uk","ukrainian"],["el","greek"],["ms","malay"],["cs","czech"],["ro","romanian"],["da","danish"],["hu","hungarian"],["ta","tamil"],["no","norwegian"],["th","thai"],["ur","urdu"],["hr","croatian"],["bg","bulgarian"],["lt","lithuanian"],["la","latin"],["mi","maori"],["ml","malayalam"],["cy","welsh"],["sk","slovak"],["te","telugu"],["fa","persian"],["lv","latvian"],["bn","bengali"],["sr","serbian"],["az","azerbaijani"],["sl","slovenian"],["kn","kannada"],["et","estonian"],["mk","macedonian"],["br","breton"],["eu","basque"],["is","icelandic"],["hy","armenian"],["ne","nepali"],["mn","mongolian"],["bs","bosnian"],["kk","kazakh"],["sq","albanian"],["sw","swahili"],["gl","galician"],["mr","marathi"],["pa","punjabi"],["si","sinhala"],["km","khmer"],["sn","shona"],["yo","yoruba"],["so","somali"],["af","afrikaans"],["oc","occitan"],["ka","georgian"],["be","belarusian"],["tg","tajik"],["sd","sindhi"],["gu","gujarati"],["am","amharic"],["yi","yiddish"],["lo","lao"],["uz","uzbek"],["fo","faroese"],["ht","haitian creole"],["ps","pashto"],["tk","turkmen"],["nn","nynorsk"],["mt","maltese"],["sa","sanskrit"],["lb","luxembourgish"],["my","myanmar"],["bo","tibetan"],["tl","tagalog"],["mg","malagasy"],["as","assamese"],["tt","tatar"],["haw","hawaiian"],["ln","lingala"],["ha","hausa"],["ba","bashkir"],["jw","javanese"],["su","sundanese"]],ks=new Map(hm),ay=new Map([...hm.map(([t,e])=>[e,t]),["burmese","my"],["valencian","ca"],["flemish","nl"],["haitian","ht"],["letzeburgesch","lb"],["pushto","ps"],["panjabi","pa"],["moldavian","ro"],["moldovan","ro"],["sinhalese","si"],["castilian","es"]]);function iy(t){t=t.toLowerCase();let e=ay.get(t);if(e===void 0)if(ks.has(t))e=t;else{const n=t.length===2?ks.keys():ks.values();throw new Error(`Language "${t}" is not supported. Must be one of: ${JSON.stringify(n)}`)}return e}const sy=(()=>{const t=[...Array.from({length:94},(i,a)=>a+33),...Array.from({length:12},(i,a)=>a+161),...Array.from({length:82},(i,a)=>a+174)],e=t.slice();let r=0;for(let i=0;i<256;++i)t.includes(i)||(t.push(i),e.push(256+r),r+=1);const n=e.map(i=>String.fromCharCode(i));return Object.fromEntries(t.map((i,a)=>[i,n[a]]))})();Zg(sy);async function oy(t,e){return await on(t,"config.json",!0,e)}function sn(t){const e={};let r={};switch(t.model_type){case"llava":case"paligemma":r=sn(t.text_config);break;case"moondream1":r=sn(t.phi_config);break;case"musicgen":r=sn(t.decoder);break;case"gpt2":case"gptj":case"codegen":case"gpt_bigcode":e.num_heads="n_head",e.num_layers="n_layer",e.hidden_size="n_embd";break;case"gpt_neox":case"stablelm":case"opt":case"phi":case"phi3":case"falcon":e.num_heads="num_attention_heads",e.num_layers="num_hidden_layers",e.hidden_size="hidden_size";break;case"llama":case"cohere":case"mistral":case"starcoder2":case"qwen2":e.num_heads="num_key_value_heads",e.num_layers="num_hidden_layers",e.hidden_size="hidden_size",e.num_attention_heads="num_attention_heads";break;case"gemma":e.num_heads="num_key_value_heads",e.num_layers="num_hidden_layers",e.dim_kv="head_dim";break;case"openelm":e.num_heads="num_kv_heads",e.num_layers="num_transformer_layers",e.dim_kv="head_dim";break;case"gpt_neo":e.num_heads="num_heads",e.num_layers="num_layers",e.hidden_size="hidden_size";break;case"bloom":e.num_heads="n_head",e.num_layers="n_layer",e.hidden_size="hidden_size";break;case"mpt":e.num_heads="n_heads",e.num_layers="n_layers",e.hidden_size="d_model";break;case"t5":case"mt5":case"longt5":e.num_decoder_layers="num_decoder_layers",e.num_decoder_heads="num_heads",e.decoder_dim_kv="d_kv",e.num_encoder_layers="num_layers",e.num_encoder_heads="num_heads",e.encoder_dim_kv="d_kv";break;case"bart":case"mbart":case"marian":case"whisper":case"m2m_100":case"blenderbot":case"blenderbot-small":e.num_decoder_layers="decoder_layers",e.num_decoder_heads="decoder_attention_heads",e.decoder_hidden_size="d_model",e.num_encoder_layers="encoder_layers",e.num_encoder_heads="encoder_attention_heads",e.encoder_hidden_size="d_model";break;case"speecht5":e.num_decoder_layers="decoder_layers",e.num_decoder_heads="decoder_attention_heads",e.decoder_hidden_size="hidden_size",e.num_encoder_layers="encoder_layers",e.num_encoder_heads="encoder_attention_heads",e.encoder_hidden_size="hidden_size";break;case"trocr":e.num_encoder_layers=e.num_decoder_layers="decoder_layers",e.num_encoder_heads=e.num_decoder_heads="decoder_attention_heads",e.encoder_hidden_size=e.decoder_hidden_size="d_model";break;case"musicgen_decoder":e.num_encoder_layers=e.num_decoder_layers="num_hidden_layers",e.num_encoder_heads=e.num_decoder_heads="num_attention_heads",e.encoder_hidden_size=e.decoder_hidden_size="hidden_size";break;case"vision-encoder-decoder":const i=sn(t.encoder),a=sn(t.decoder),s="num_decoder_layers"in a,o={};return s?(o.num_decoder_layers=a.num_layers,o.num_decoder_heads=a.num_heads,o.decoder_hidden_size=a.hidden_size,o.num_encoder_layers=i.num_layers,o.num_encoder_heads=i.num_heads,o.encoder_hidden_size=i.hidden_size):(o.num_layers=a.num_layers,o.num_heads=a.num_heads,o.hidden_size=a.hidden_size),o}const n={...r,...zr(t,["model_type","multi_query","is_encoder_decoder"])};for(const i in e)n[i]=t[e[i]];return n}function fm(t,{prefix:e="past_key_values",encoder_add_pkv:r=!0}={}){const n={},i=t.normalized_config,a=1;if(i.is_encoder_decoder&&r){const s=i.encoder_dim_kv??i.encoder_hidden_size/i.num_encoder_heads,o=i.decoder_dim_kv??i.decoder_hidden_size/i.num_decoder_heads,u=[a,i.num_encoder_heads,0,s],l=[a,i.num_decoder_heads,0,o];for(let h=0;h=1&&s[s.length-1]>=this.timestamp_begin,u=s.length<2||s[s.length-2]>=this.timestamp_begin;if(o&&(u?a.subarray(this.timestamp_begin).fill(-1/0):a.subarray(0,this.eos_token_id).fill(-1/0)),e[n].length===this.begin_index&&this.max_initial_timestamp_index!==null){const m=this.timestamp_begin+this.max_initial_timestamp_index;a.subarray(m+1).fill(-1/0)}const l=o0(a),h=Math.log(l.subarray(this.timestamp_begin).map(Math.exp).reduce((m,c)=>m+c)),f=_r(l.subarray(0,this.timestamp_begin))[0];h>f&&a.subarray(0,this.timestamp_begin).fill(-1/0)}return r}}class fy extends Zt{constructor(e){super(),this.no_repeat_ngram_size=e}getNgrams(e){const r=e.length,n=[];for(let a=0;a1 to use the classifier free guidance processor, got guidance scale ${e}.`);this.guidance_scale=e}_call(e,r){if(r.dims[0]!==2*e.length)throw new Error(`Logits should have twice the batch size of the input ids, the first half of batches corresponding to the conditional inputs, and the second half of batches corresponding to the unconditional inputs. Got batch size ${r.dims[0]} for the logits and ${e.length} for the input ids.`);const n=e.length,i=r.slice([0,n],null),a=r.slice([n,r.dims[0]],null);for(let s=0;s1)throw new Error(`\`top_p\` must be a float > 0 and < 1, but is ${e}`);if(!Number.isInteger(n)||n<1)throw new Error(`\`min_tokens_to_keep\` must be a positive integer, but is ${n}`);this.top_p=e,this.filter_value=r,this.min_tokens_to_keep=n}}class $y extends Co{constructor(e,{filter_value:r=-1/0,min_tokens_to_keep:n=1}={}){if(super(),!Number.isInteger(e)||e<0)throw new Error(`\`top_k\` must be a positive integer, but is ${e}`);this.top_k=Math.max(e,n),this.filter_value=r}}class gm{max_length=20;max_new_tokens=null;min_length=0;min_new_tokens=null;early_stopping=!1;max_time=null;do_sample=!1;num_beams=1;num_beam_groups=1;penalty_alpha=null;use_cache=!0;temperature=1;top_k=50;top_p=1;typical_p=1;epsilon_cutoff=0;eta_cutoff=0;diversity_penalty=0;repetition_penalty=1;encoder_repetition_penalty=1;length_penalty=1;no_repeat_ngram_size=0;bad_words_ids=null;force_words_ids=null;renormalize_logits=!1;constraints=null;forced_bos_token_id=null;forced_eos_token_id=null;remove_invalid_values=!1;exponential_decay_length_penalty=null;suppress_tokens=null;begin_suppress_tokens=null;forced_decoder_ids=null;guidance_scale=null;num_return_sequences=1;output_attentions=!1;output_hidden_states=!1;output_scores=!1;return_dict_in_generate=!1;pad_token_id=null;bos_token_id=null;eos_token_id=null;encoder_no_repeat_ngram_size=0;decoder_start_token_id=null;generation_kwargs={};constructor(e){Object.assign(this,zr(e,Object.getOwnPropertyNames(this)))}}class Eo extends or{_call(e,r){throw Error("StoppingCriteria needs to be subclassed")}}class To extends or{constructor(){super(),this.criteria=[]}push(e){this.criteria.push(e)}extend(e){e instanceof To?e=e.criteria:e instanceof Eo&&(e=[e]),this.criteria.push(...e)}_call(e,r){const n=new Array(e.length).fill(!1);for(const i of this.criteria){const a=i(e,r);for(let s=0;sr.length>=this.max_length)}}class Sy extends Eo{constructor(e){super(),Array.isArray(e)||(e=[e]),this.eos_token_id=e}_call(e,r){return e.map(n=>{const i=n.at(-1);return this.eos_token_id.some(a=>i==a)})}}class Si extends or{constructor(e){super(),this.generation_config=e}async _call(e){return this.sample(e)}async sample(e){throw Error("sample should be implemented in subclasses.")}getLogits(e,r){let n=e.dims.at(-1),i=e.data;if(r===-1)i=i.slice(-n);else{let a=r*n;i=i.slice(a,a+n)}return i}randomSelect(e){let r=0;for(let i=0;i1)return new Ty(e);if(e.num_return_sequences>1)throw Error(`num_return_sequences has to be 1 when doing greedy search, but is ${e.num_return_sequences}.`);return new Cy(e)}}class Cy extends Si{async sample(e){const r=_r(e.data)[1];return[[BigInt(r),0]]}}class Ey extends Si{async sample(e){let r=e.dims.at(-1);this.generation_config.top_k>0&&(r=Math.min(this.generation_config.top_k,r));const[n,i]=await pm(e,r),a=ta(n.data);return Array.from({length:this.generation_config.num_beams},()=>{const s=this.randomSelect(a);return[i.data[s],Math.log(a[s])]})}}class Ty extends Si{async sample(e){let r=e.dims.at(-1);this.generation_config.top_k>0&&(r=Math.min(this.generation_config.top_k,r));const[n,i]=await pm(e,r),a=ta(n.data);return Array.from({length:this.generation_config.num_beams},(s,o)=>[i.data[o],Math.log(a[o])])}}class ky extends gm{return_timestamps=null;return_token_timestamps=null;num_frames=null;alignment_heads=null;task=null;language=null;no_timestamps_token_id=null;prompt_ids=null;is_multilingual=null;lang_to_id=null;task_to_id=null;max_initial_timestamp_index=1}const be={EncoderOnly:0,EncoderDecoder:1,Seq2Seq:2,Vision2Seq:3,DecoderOnly:4,MaskGeneration:5,ImageTextToText:6,Musicgen:7},wi=new Map,_m=new Map,qn=new Map;async function Iy(t,e,r){let n=r.device;n&&typeof n!="string"&&(n.hasOwnProperty(e)?n=n[e]:(console.warn(`Device not specified for ${e}. Using the default device.`),n=null));const i=H_(n);let a=r.dtype;if(typeof a!="string"&&(a&&a.hasOwnProperty(e)?a=a[e]:(a=ly[i[0]],console.warn(`Dtype not specified for ${e}. Using the default dtype: ${a}.`))),Sc.hasOwnProperty(a)){if(a===$t.fp16&&!await uy())throw new Error("The device does not support fp16.")}else throw new Error(`Invalid dtype: ${a}. Should be one of: ${Object.keys($t).join(", ")}`);const s=Sc[a],o=`${r.subfolder??""}/${e}${s}.onnx`,u={...r.session_options};u.executionProviders??=i;const l=ei(t,o,!0,r);let h=[];if(r.use_external_data_format){if(Pr.IS_NODE_ENV)throw new Error("External data format is not yet supported in Node.js");const m=`${e}${s}.onnx_data`,c=`${r.subfolder??""}/${m}`;h.push(new Promise(async(y,b)=>{const v=await ei(t,c,!0,r);y({path:m,data:v})}))}else u.externalData!==void 0&&(h=u.externalData.map(async m=>{if(typeof m.data=="string"){const c=await ei(t,m.data,!0,r);return{...m,data:c}}return m}));if(h.length>0&&(u.externalData=await Promise.all(h)),n==="webgpu"){const m=fm(r.config,{prefix:"present"});if(Object.keys(m).length>0){const c={};for(const y in m)c[y]="gpu-buffer";u.preferredOutputLocation=c}}return{buffer:await l,session_options:u}}async function Tr(t,e,r){const n=Object.keys(e),i=await Promise.all(n.map(async s=>Iy(t,e[s],r))),a={};for(let s=0;s0)throw new Error(`An error occurred during model execution: "Missing the following inputs: ${n.join(", ")}.`);const i=Object.keys(e).length,a=t.inputNames.length;if(i>a){let s=Object.keys(e).filter(o=>!t.inputNames.includes(o));console.warn(`WARNING: Too many inputs were provided (${i} > ${a}). The following inputs will be ignored: "${s.join(", ")}".`)}return r}async function gr(t,e){const r=Ay(t,e);try{const n=Object.fromEntries(Object.entries(r).map(([a,s])=>[a,s.ort_tensor]));let i=await t.run(n);return i=ym(i),i}catch(n){throw console.error(`An error occurred during model execution: "${n}".`),console.error("Inputs given to model:",r),n}}function ym(t){for(let e in t)dm(t[e])?t[e]=new ce(t[e]):typeof t[e]=="object"&&ym(t[e]);return t}function wm(t){if(t instanceof ce)return t;if(t.length===0)throw Error("items must be non-empty");if(Array.isArray(t[0])){if(t.some(e=>e.length!==t[0].length))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' and/or 'truncation=True' to have batched tensors with the same length.");return new ce("int64",BigInt64Array.from(t.flat().map(e=>BigInt(e))),[t.length,t[0].length])}else return new ce("int64",BigInt64Array.from(t.map(e=>BigInt(e))),[1,t.length])}function bm(t){return new ce("bool",[t],[1])}async function Cc(t,e){let{encoder_outputs:r,past_key_values:n}=e;if(!r){const u=zr(e,t.sessions.model.inputNames);r=(await Jn(t,u)).last_hidden_state}const{input_ids:i,decoder_input_ids:a,...s}=e;return s.input_ids=a,s.encoder_hidden_states=r,t.sessions.decoder_model_merged.inputNames.includes("encoder_attention_mask")&&(s.encoder_attention_mask=e.attention_mask),await ko(t,s,!0)}async function Jn(t,e){const r=t.sessions.model,n=Object.create(null);for(const i of r.inputNames)n[i]=e[i];return r.inputNames.includes("token_type_ids")&&!n.token_type_ids&&(n.token_type_ids=new ce("int64",new BigInt64Array(n.input_ids.data.length),n.input_ids.dims)),await gr(r,n)}async function ko(t,e,r=!1){const n=t.sessions[r?"decoder_model_merged":"model"],{past_key_values:i,...a}=e;n.inputNames.includes("use_cache_branch")&&(a.use_cache_branch=bm(!!i)),n.inputNames.includes("position_ids")&&a.attention_mask&&!a.position_ids&&(a.position_ids=Oy(a,i)),t.addPastKeyValues(a,i);const s=zr(a,n.inputNames);return await gr(n,s)}async function My(t,{input_ids:e=null,attention_mask:r=null,pixel_values:n=null,position_ids:i=null,inputs_embeds:a=null,past_key_values:s=null,generation_config:o=null,logits_processor:u=null,...l}){if(!a){if(a=await t.encode_text({input_ids:e}),n&&e.dims[1]!==1){const f=await t.encode_image({pixel_values:n});({inputs_embeds:a,attention_mask:r}=t._merge_input_ids_with_image_features({image_features:f,inputs_embeds:a,input_ids:e,attention_mask:r}))}else if(s&&n&&e.dims[1]===1){const f=e.dims[1],m=Object.values(s)[0].dims.at(-2);r=Jt([aa([e.dims[0],m]),r.slice(null,[r.dims[1]-f,r.dims[1]])],1)}}return await ko(t,{inputs_embeds:a,past_key_values:s,attention_mask:r,position_ids:i,generation_config:o,logits_processor:u},!0)}function Oy(t,e=null){const{input_ids:r,inputs_embeds:n,attention_mask:i}=t,[a,s]=i.dims,o=new BigInt64Array(i.data.length);for(let l=0;la.dims[1])){if(io==t.config.image_token_index)){const o=t.config.num_image_tokens;if(!o)throw new Error("`num_image_tokens` is missing in the model configuration.");const u=a.dims[1]-(i-o);r.input_ids=a.slice(null,[-u,null]),r.attention_mask=aa([1,i+u])}}}return r}function zy(t,e,r,n){const{...i}=r;return r.past_key_values&&(e=e.map(s=>[s.at(-1)])),i.decoder_input_ids=wm(e),i}class X extends or{main_input_name="input_ids";forward_params=["input_ids","attention_mask"];constructor(e,r){super(),this.config=e,this.sessions=r;const n=qn.get(this.constructor),i=wi.get(n);this.can_generate=!1,this._forward=null,this._prepare_inputs_for_generation=null,i===be.DecoderOnly?(this.can_generate=!0,this._forward=ko,this._prepare_inputs_for_generation=Ec):i===be.Seq2Seq||i===be.Vision2Seq||i===be.Musicgen?(this.can_generate=!0,this._forward=Cc,this._prepare_inputs_for_generation=zy):i===be.EncoderDecoder?this._forward=Cc:i===be.ImageTextToText?(this.can_generate=!0,this._forward=My,this._prepare_inputs_for_generation=Ec):this._forward=Jn,this.can_generate&&this.forward_params.push("past_key_values"),this.custom_config=this.config["transformers.js_config"]??{}}async dispose(){const e=[];for(const r of Object.values(this.sessions))r?.handler?.dispose&&e.push(r.handler.dispose());return await Promise.all(e)}static async from_pretrained(e,{progress_callback:r=null,config:n=null,cache_dir:i=null,local_files_only:a=!1,revision:s="main",model_file_name:o=null,subfolder:u="onnx",device:l=null,dtype:h=null,use_external_data_format:f=null,session_options:m={}}={}){let c={progress_callback:r,config:n,cache_dir:i,local_files_only:a,revision:s,model_file_name:o,subfolder:u,device:l,dtype:h,use_external_data_format:f,session_options:m};const y=qn.get(this),b=wi.get(y);c.config=await mm.from_pretrained(e,c);let v;return b===be.DecoderOnly?v=await Promise.all([Tr(e,{model:c.model_file_name??"model"},c),on(e,"generation_config.json",!1,c)]):b===be.Seq2Seq||b===be.Vision2Seq?v=await Promise.all([Tr(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},c),on(e,"generation_config.json",!1,c)]):b===be.MaskGeneration?v=await Promise.all([Tr(e,{model:"vision_encoder",prompt_encoder_mask_decoder:"prompt_encoder_mask_decoder"},c)]):b===be.EncoderDecoder?v=await Promise.all([Tr(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},c)]):b===be.ImageTextToText?v=await Promise.all([Tr(e,{embed_tokens:"embed_tokens",vision_encoder:"vision_encoder",decoder_model_merged:"decoder_model_merged"},c),on(e,"generation_config.json",!1,c)]):b===be.Musicgen?v=await Promise.all([Tr(e,{model:"text_encoder",decoder_model_merged:"decoder_model_merged",encodec_decode:"encodec_decode"},c),on(e,"generation_config.json",!1,c)]):(b!==be.EncoderOnly&&console.warn(`Model type for '${y??n?.model_type}' not found, assuming encoder-only architecture. Please report this at https://github.com/xenova/transformers.js/issues/new/choose.`),v=await Promise.all([Tr(e,{model:c.model_file_name??"model"},c)])),new this(c.config,...v)}async _call(e){return await this.forward(e)}async forward(e){return await this._forward(this,e)}_get_logits_warper(e){const r=new Ys;return e.temperature!==null&&e.temperature!==1&&r.push(new by(e.temperature)),e.top_k!==null&&e.top_k!==0&&r.push(new $y(e.top_k)),e.top_p!==null&&e.top_p<1&&r.push(new vy(e.top_p)),r}_get_logits_processor(e,r,n=null){const i=new Ys;if(e.repetition_penalty!==null&&e.repetition_penalty!==1&&i.push(new my(e.repetition_penalty)),e.no_repeat_ngram_size!==null&&e.no_repeat_ngram_size>0&&i.push(new fy(e.no_repeat_ngram_size)),e.bad_words_ids!==null&&i.push(new yy(e.bad_words_ids,e.eos_token_id)),e.min_length!==null&&e.eos_token_id!==null&&e.min_length>0&&i.push(new gy(e.min_length,e.eos_token_id)),e.min_new_tokens!==null&&e.eos_token_id!==null&&e.min_new_tokens>0&&i.push(new _y(r,e.min_new_tokens,e.eos_token_id)),e.forced_bos_token_id!==null&&i.push(new dy(e.forced_bos_token_id)),e.forced_eos_token_id!==null&&i.push(new cy(e.max_length,e.forced_eos_token_id)),e.begin_suppress_tokens!==null){const a=r>1||e.forced_bos_token_id===null?r:r+1;i.push(new py(e.begin_suppress_tokens,a))}return e.guidance_scale!==null&&e.guidance_scale>1&&i.push(new wy(e.guidance_scale)),n!==null&&i.extend(n),i}_prepare_generation_config(e,r,n=gm){const i={...this.config};for(const s of["decoder","generator","text_config"])s in i&&Object.assign(i,i[s]);const a=new n(i);return"generation_config"in this&&Object.assign(a,this.generation_config),e&&Object.assign(a,e),r&&Object.assign(a,zr(r,Object.getOwnPropertyNames(a))),a}_get_stopping_criteria(e,r=null){const n=new To;return e.max_length!==null&&n.push(new xy(e.max_length,this.config.max_position_embeddings??null)),e.eos_token_id!==null&&n.push(new Sy(e.eos_token_id)),r&&n.extend(r),n}_validate_model_class(){if(!this.can_generate){const e=[Cg,Eg,Sg,xg],r=qn.get(this.constructor),n=new Set,i=this.config.model_type;for(const s of e){const o=s.get(i);o&&n.add(o[0])}let a=`The current model class (${r}) is not compatible with \`.generate()\`, as it doesn't have a language model head.`;throw n.size>0&&(a+=` Please use the following class instead: ${[...n].join(", ")}`),Error(a)}}prepare_inputs_for_generation(...e){return this._prepare_inputs_for_generation(this,...e)}_update_model_kwargs_for_generation({generated_input_ids:e,outputs:r,model_inputs:n,is_encoder_decoder:i}){return n.past_key_values=this.getPastKeyValues(r,n.past_key_values),n.input_ids=new ce("int64",e.flat(),[e.length,1]),i||(n.attention_mask=Jt([n.attention_mask,aa([n.attention_mask.dims[0],1])],1)),n.position_ids=null,n}_prepare_model_inputs({inputs:e,bos_token_id:r,model_kwargs:n}){const i=zr(n,this.forward_params),a=this.main_input_name;if(a in i){if(e)throw new Error("`inputs`: {inputs}` were passed alongside {input_name} which is not allowed. Make sure to either pass {inputs} or {input_name}=...")}else i[a]=e;return{inputs_tensor:i[a],model_inputs:i,model_input_name:a}}async _prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:e,model_inputs:r,model_input_name:n,generation_config:i}){const a=zr(r,this.sessions.model.inputNames);let{last_hidden_state:s}=await Jn(this,a);return i.guidance_scale!==null&&i.guidance_scale>1&&(s=Jt([s,ey(s,0)],0),"attention_mask"in r&&(r.attention_mask=Jt([r.attention_mask,ny(r.attention_mask)],0))),r.encoder_outputs=s,r}_prepare_decoder_input_ids_for_generation({batch_size:e,model_input_name:r,model_kwargs:n,decoder_start_token_id:i,bos_token_id:a,generation_config:s}){let{decoder_input_ids:o,...u}=n;if(!o)if(i??=a,this.config.model_type==="musicgen")o=Array.from({length:e*this.config.decoder.num_codebooks},()=>[i]);else if(Array.isArray(i)){if(i.length!==e)throw new Error(`\`decoder_start_token_id\` expcted to have length ${e} but got ${i.length}`);o=i}else o=Array.from({length:e},()=>[i]);return o=wm(o),n.decoder_attention_mask=ty(o),{input_ids:o,model_inputs:u}}async generate({inputs:e=null,generation_config:r=null,logits_processor:n=null,stopping_criteria:i=null,streamer:a=null,...s}){this._validate_model_class(),r=this._prepare_generation_config(r,s);let{inputs_tensor:o,model_inputs:u,model_input_name:l}=this._prepare_model_inputs({inputs:e,model_kwargs:s});const h=this.config.is_encoder_decoder;h&&("encoder_outputs"in u||(u=await this._prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:o,model_inputs:u,model_input_name:l,generation_config:r})));let f;h?{input_ids:f,model_inputs:u}=this._prepare_decoder_input_ids_for_generation({batch_size:u[l].dims.at(0),model_input_name:l,model_kwargs:u,decoder_start_token_id:r.decoder_start_token_id,bos_token_id:r.bos_token_id,generation_config:r}):f=u[l];let m=f.dims.at(-1);r.max_new_tokens!==null&&(r.max_length=m+r.max_new_tokens);const c=this._get_logits_processor(r,m,n),y=this._get_stopping_criteria(r,i),b=u[l].dims.at(0),v=Si.getSampler(r),C=new Array(b).fill(0),x=f.tolist();a&&a.put(x);let T=null;for(;;){u=this.prepare_inputs_for_generation(x,u,r);const A=await this.forward(u),R=A.logits.slice(null,-1,null),z=c(x,R),P=[];for(let K=0;KK)){r.return_dict_in_generate&&(T=this.getPastKeyValues(A,u.past_key_values,!1));break}u=this._update_model_kwargs_for_generation({generated_input_ids:P,outputs:A,model_inputs:u,is_encoder_decoder:h})}a&&a.end();const I=new ce("int64",x.flat(),[x.length,x[0].length]);return r.return_dict_in_generate?{sequences:I,past_key_values:T}:I}addAttentionsToBeam(e,r){if(this.config.is_encoder_decoder){if(!r.cross_attentions||r.cross_attentions.length===0)throw Error("`output_attentions` is true, but the model did not produce cross-attentions. This is most likely because the model was not exported with `output_attentions=True`.");e.cross_attentions||(e.cross_attentions=[]),e.cross_attentions.push(r.cross_attentions)}if(!r.decoder_attentions||r.decoder_attentions.length===0)throw Error("`output_attentions` is true, but the model did not produce decoder-attentions. This is most likely because the model was not exported with `output_attentions=True`.");e.decoder_attentions||(e.decoder_attentions=[]),e.decoder_attentions.push(r.decoder_attentions)}groupBeams(e){const r=Object.create(null);for(const n of e)r[n.id]===void 0?r[n.id]=[n]:r[n.id].push(n);return Object.values(r)}getPastKeyValues(e,r,n=!0){const i=Object.create(null);for(const a in e)if(a.startsWith("present")){let s=a.replace("present","past_key_values");if(r&&a.includes("encoder"))i[s]=r[s];else{if(n&&r){const o=r[s];o.location==="gpu-buffer"&&o.dispose()}i[s]=e[a]}}return i}getAttentions(e){const r=Object.create(null);for(const n of["cross_attentions","decoder_attentions"]){const i=[];for(const a in e)if(a.startsWith(n)){const s=a.split(".").pop();i[s]=e[a]}r[n]=i}return r}addPastKeyValues(e,r){if(r)Object.assign(e,r);else{const n=this.custom_config.kv_cache_dtype??"float32",i=n==="float16"?new Uint16Array:[],a=fm(this.config);for(const s in a)e[s]=new ce(n,i,a[s])}}}class Ot{}class ia extends X{}class Ry extends ia{}class Py extends ia{async _call(e){return new dt(await super._call(e))}}class By extends ia{async _call(e){return new Ae(await super._call(e))}}class Dy extends ia{async _call(e){return new lt(await super._call(e))}}class Ny extends ia{async _call(e){return new mt(await super._call(e))}}class Fy extends X{}class Ly extends Fy{}class sa extends X{}class Wy extends sa{}class Uy extends sa{async _call(e){return new dt(await super._call(e))}}class Vy extends sa{async _call(e){return new Ae(await super._call(e))}}class Gy extends sa{async _call(e){return new lt(await super._call(e))}}class Hy extends sa{async _call(e){return new mt(await super._call(e))}}class oa extends X{}class qy extends oa{}class jy extends oa{async _call(e){return new dt(await super._call(e))}}class Ky extends oa{async _call(e){return new Ae(await super._call(e))}}class Yy extends oa{async _call(e){return new lt(await super._call(e))}}class Xy extends oa{async _call(e){return new mt(await super._call(e))}}class ua extends X{}class Qy extends ua{}class Jy extends ua{async _call(e){return new dt(await super._call(e))}}class Zy extends ua{async _call(e){return new Ae(await super._call(e))}}class ew extends ua{async _call(e){return new lt(await super._call(e))}}class tw extends ua{async _call(e){return new mt(await super._call(e))}}class la extends X{}class rw extends la{}class nw extends la{async _call(e){return new dt(await super._call(e))}}class aw extends la{async _call(e){return new Ae(await super._call(e))}}class iw extends la{async _call(e){return new lt(await super._call(e))}}class sw extends la{async _call(e){return new mt(await super._call(e))}}class da extends X{}class ow extends da{}class uw extends da{async _call(e){return new dt(await super._call(e))}}class lw extends da{async _call(e){return new Ae(await super._call(e))}}class dw extends da{async _call(e){return new lt(await super._call(e))}}class cw extends da{async _call(e){return new mt(await super._call(e))}}class ca extends X{}class pw extends ca{}class hw extends ca{async _call(e){return new dt(await super._call(e))}}class fw extends ca{async _call(e){return new Ae(await super._call(e))}}class mw extends ca{async _call(e){return new lt(await super._call(e))}}class gw extends ca{async _call(e){return new mt(await super._call(e))}}class pa extends X{}class _w extends pa{}class yw extends pa{async _call(e){return new Ae(await super._call(e))}}class ww extends pa{async _call(e){return new lt(await super._call(e))}}class bw extends pa{async _call(e){return new mt(await super._call(e))}}class vw extends pa{async _call(e){return new dt(await super._call(e))}}class Ci extends X{}class $w extends Ci{}class xw extends Ci{async _call(e){return new dt(await super._call(e))}}class Sw extends Ci{async _call(e){return new Ae(await super._call(e))}}class Cw extends Ci{async _call(e){return new lt(await super._call(e))}}class Ei extends X{}class Ew extends Ei{}class Tw extends Ei{async _call(e){return new dt(await super._call(e))}}class kw extends Ei{async _call(e){return new Ae(await super._call(e))}}class Iw extends Ei{async _call(e){return new mt(await super._call(e))}}class ha extends X{}class Aw extends ha{}class Mw extends ha{async _call(e){return new dt(await super._call(e))}}class Ow extends ha{async _call(e){return new Ae(await super._call(e))}}class zw extends ha{async _call(e){return new lt(await super._call(e))}}class Rw extends ha{async _call(e){return new mt(await super._call(e))}}class Ti extends X{}class Pw extends Ti{}class Bw extends Ti{async _call(e){return new dt(await super._call(e))}}class Dw extends Ti{async _call(e){return new Ae(await super._call(e))}}class Nw extends Ti{async _call(e){return new mt(await super._call(e))}}class ki extends X{}class Fw extends ki{}class Lw extends ki{async _call(e){return new Ae(await super._call(e))}}class Ww extends ki{async _call(e){return new mt(await super._call(e))}}class Uw extends ki{async _call(e){return new dt(await super._call(e))}}class vm extends X{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,r,n){super(e,r),this.generation_config=n}}class Vw extends vm{}class Gw extends vm{}class $m extends X{constructor(e,r,n){super(e,r),this.generation_config=n}}class Hw extends $m{}class qw extends $m{}class xm extends X{constructor(e,r,n){super(e,r),this.generation_config=n}}class jw extends xm{}class Kw extends xm{}class Io extends X{constructor(e,r,n){super(e,r),this.generation_config=n}}class Yw extends Io{}class Xw extends Io{}class Qw extends Io{async _call(e){return new Ae(await super._call(e))}}class Ii extends X{constructor(e,r,n){super(e,r),this.generation_config=n}}class Jw extends Ii{}class Zw extends Ii{}class eb extends Ii{async _call(e){return new Ae(await super._call(e))}}class tb extends Ii{}class Sm extends X{constructor(e,r,n){super(e,r),this.generation_config=n}}class rb extends Sm{}class nb extends Sm{}class Cm extends X{constructor(e,r,n){super(e,r),this.generation_config=n}}class ab extends Cm{}class ib extends Cm{}class fa extends X{}class sb extends fa{}class ob extends fa{async _call(e){return new dt(await super._call(e))}}class ub extends fa{async _call(e){return new Ae(await super._call(e))}}class lb extends fa{async _call(e){return new lt(await super._call(e))}}class db extends fa{async _call(e){return new mt(await super._call(e))}}class ma extends X{}class cb extends ma{}class pb extends ma{async _call(e){return new dt(await super._call(e))}}class hb extends ma{async _call(e){return new Ae(await super._call(e))}}class fb extends ma{async _call(e){return new lt(await super._call(e))}}class mb extends ma{async _call(e){return new mt(await super._call(e))}}class ga extends X{}class gb extends ga{}class _b extends ga{async _call(e){return new dt(await super._call(e))}}class yb extends ga{async _call(e){return new Ae(await super._call(e))}}class wb extends ga{async _call(e){return new lt(await super._call(e))}}class bb extends ga{async _call(e){return new mt(await super._call(e))}}class Em extends X{}class vb extends Em{}class $b extends Em{}class Tm extends X{requires_attention_mask=!1;main_input_name="input_features";forward_params=["input_features","attention_mask","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,r,n){super(e,r),this.generation_config=n}}class xb extends Tm{}class Sb extends Tm{_prepare_generation_config(e,r){return super._prepare_generation_config(e,r,ky)}_retrieve_init_tokens(e){const r=[e.decoder_start_token_id];let n=e.language;const i=e.task;if(e.is_multilingual){n||(console.warn("No language specified - defaulting to English (en)."),n="en");const s=`<|${iy(n)}|>`;r.push(e.lang_to_id[s]),r.push(e.task_to_id[i??"transcribe"])}else if(n||i)throw new Error("Cannot specify `task` or `language` for an English-only model. If the model is intended to be multilingual, pass `is_multilingual=true` to generate, or update the generation config.");return!e.return_timestamps&&e.no_timestamps_token_id&&r.at(-1)!==e.no_timestamps_token_id?r.push(e.no_timestamps_token_id):e.return_timestamps&&r.at(-1)===e.no_timestamps_token_id&&(console.warn("<|notimestamps|> prompt token is removed from generation_config since `return_timestamps` is set to `true`."),r.pop()),r.filter(a=>a!=null)}async generate({inputs:e=null,generation_config:r=null,logits_processor:n=null,stopping_criteria:i=null,...a}){r=this._prepare_generation_config(r,a);const s=this._retrieve_init_tokens(r);return r.return_timestamps&&(n??=new Ys,n.push(new hy(r,s))),await super.generate({inputs:e,generation_config:r,logits_processor:n,decoder_input_ids:s,...a})}_extract_token_timestamps(e,r,n=null,i=.02){if(!e.cross_attentions)throw new Error("Model outputs must contain cross attentions to extract timestamps. This is most likely because the model was not exported with `output_attentions=True`.");let a=this.config.median_filter_width;a===void 0&&(console.warn("Model config has no `median_filter_width`, using default value of 7."),a=7);const s=e.cross_attentions.map(l=>{let h=Array.from({length:this.config.decoder_layers},(v,C)=>Jt(l.map(x=>x[C]),2)),f=Qn(r.map(([v,C])=>n?h[v].slice(null,C,null,[0,n]):h[v].slice(null,C)));f=f.transpose(1,0,2,3);let[m,c]=X_(f,-2,0,!0),y=f.clone();for(let v=0;vf[C+1]-f[C]),y=e0([1],c).map(v=>!!v),b=[];for(let v=0;vm.findIndex(c=>c==a)),u=o.every(m=>m===-1),l=o.every(m=>m!==-1);if(!u&&!l)throw new Error("Every input should contain either 0 or 1 image token.");if(u)return{inputs_embeds:e,attention_mask:i};const h=[],f=[];for(let m=0;ma*s,1);e.input_labels=new ce("int64",new BigInt64Array(i).fill(1n),n)}const r={image_embeddings:e.image_embeddings,image_positional_embeddings:e.image_positional_embeddings};return e.input_points&&(r.input_points=e.input_points),e.input_labels&&(r.input_labels=e.input_labels),e.input_boxes&&(r.input_boxes=e.input_boxes),await gr(this.sessions.prompt_encoder_mask_decoder,r)}async _call(e){return new p1(await super._call(e))}}class p1 extends Ot{constructor({iou_scores:e,pred_masks:r}){super(),this.iou_scores=e,this.pred_masks=r}}class pg extends X{constructor(e,r,n){super(e,r),this.generation_config=n}}class h1 extends pg{}class f1 extends pg{}class hg extends X{constructor(e,r,n){super(e,r),this.generation_config=n}}class m1 extends hg{}class g1 extends hg{}class Ur extends X{}class _1 extends Ur{}class y1 extends Ur{async _call(e){return new gn(await super._call(e))}}class w1 extends Ur{async _call(e){return new Ae(await super._call(e))}}class b1 extends Ur{async _call(e){return new lt(await super._call(e))}}class Mo extends X{}class v1 extends Mo{}class $1 extends Mo{async _call(e){return new gn(await super._call(e))}}class x1 extends Mo{async _call(e){return new Ae(await super._call(e))}}class Mi extends X{}class S1 extends Mi{}class C1 extends Mi{async _call(e){return new gn(await super._call(e))}}class E1 extends Mi{async _call(e){return new Ae(await super._call(e))}}class T1 extends Mi{async _call(e){return new lt(await super._call(e))}}class Oo extends X{}class k1 extends Oo{}class I1 extends Oo{async _call(e){return new gn(await super._call(e))}}class A1 extends Oo{async _call(e){return new Ae(await super._call(e))}}class M1 extends Ur{}class O1 extends Ur{async _call(e){return new gn(await super._call(e))}}class z1 extends Ur{async _call(e){return new Ae(await super._call(e))}}class _a extends X{}class R1 extends _a{}class P1 extends _a{async _call(e){return new gn(await super._call(e))}}class B1 extends _a{async _call(e){return new Ae(await super._call(e))}}class D1 extends _a{async _call(e){return new zv(await super._call(e))}}class N1 extends _a{async _call(e){return new lt(await super._call(e))}}class fg extends X{constructor(e,r,n){super(e,r),this.generation_config=n}}class F1 extends fg{}class L1 extends fg{async generate_speech(e,r,{threshold:n=.5,minlenratio:i=0,maxlenratio:a=20,vocoder:s=null}={}){const o={input_ids:e},{encoder_outputs:u,encoder_attention_mask:l}=await Jn(this,o),h=u.dims[1]/this.config.reduction_factor,f=Math.floor(h*a),m=Math.floor(h*i),c=this.config.num_mel_bins;let y=[],b=null,v=null,C=0;for(;;){++C;const I=bm(!!v);let A;v?A=v.output_sequence_out:A=new ce("float32",new Float32Array(c),[1,1,c]);let R={use_cache_branch:I,output_sequence:A,encoder_attention_mask:l,speaker_embeddings:r,encoder_hidden_states:u};this.addPastKeyValues(R,b),v=await gr(this.sessions.decoder_model_merged,R),b=this.getPastKeyValues(v,b);const{prob:z,spectrum:P}=v;if(y.push(P),C>=m&&(Array.from(z.data).filter(J=>J>=n).length>0||C>=f))break}const x=Jt(y),{waveform:T}=await gr(s.sessions.model,{spectrogram:x});return{spectrogram:x,waveform:T}}}class W1 extends X{main_input_name="spectrogram"}class U1 extends X{constructor(e,r,n){super(e,r),this.generation_config=n}}class V1 extends U1{}class mg extends X{constructor(e,r,n){super(e,r),this.generation_config=n}}class G1 extends mg{}class H1 extends mg{}class gg extends X{constructor(e,r,n){super(e,r),this.generation_config=n}}class q1 extends gg{}class j1 extends gg{}class _g extends X{constructor(e,r,n){super(e,r),this.generation_config=n}}class K1 extends _g{}class Y1 extends _g{}class zo extends X{}class X1 extends zo{}class Q1 extends zo{static async from_pretrained(e,r={}){return r.model_file_name??="text_model",super.from_pretrained(e,r)}}class J1 extends zo{static async from_pretrained(e,r={}){return r.model_file_name??="audio_model",super.from_pretrained(e,r)}}class Z1 extends X{}class yg extends Z1{async _call(e){return new Pv(await super._call(e))}}class wg extends X{}class ev extends wg{}class tv extends wg{}class bg extends X{constructor(e,r,n){super(e,r),this.generation_config=n}}class rv extends bg{}class nv extends bg{}class vg extends X{}class av extends vg{}class iv extends vg{async _call(e){return new Ae(await super._call(e))}}class $g extends X{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,r,n){super(e,r),this.generation_config=n}_apply_and_filter_by_delay_pattern_mask(e){const[r,n]=e.dims,i=this.config.decoder.num_codebooks,a=n-i;let s=0;for(let l=0;l0&&m<=a&&(e.data[s++]=e.data[l])}const o=Math.floor(r/i),u=s/(o*i);return new ce(e.type,e.data.slice(0,s),[o,i,u])}prepare_inputs_for_generation(e,r,n){let i=structuredClone(e);for(let s=0;s=o&&(i[s][o]=BigInt(this.config.decoder.pad_token_id));return n.guidance_scale!==null&&n.guidance_scale>1&&(i=i.concat(i)),super.prepare_inputs_for_generation(i,r,n)}async generate(e){const r=await super.generate(e),n=this._apply_and_filter_by_delay_pattern_mask(r).unsqueeze_(0),{audio_values:i}=await gr(this.sessions.encodec_decode,{audio_codes:n});return i}}class sv{static MODEL_CLASS_MAPPINGS=null;static BASE_IF_FAIL=!1;static async from_pretrained(e,{progress_callback:r=null,config:n=null,cache_dir:i=null,local_files_only:a=!1,revision:s="main",model_file_name:o=null,subfolder:u="onnx",device:l=null,dtype:h=null,use_external_data_format:f=null,session_options:m={}}={}){let c={progress_callback:r,config:n,cache_dir:i,local_files_only:a,revision:s,model_file_name:o,subfolder:u,device:l,dtype:h,use_external_data_format:f,session_options:m};if(c.config=await mm.from_pretrained(e,c),!this.MODEL_CLASS_MAPPINGS)throw new Error("`MODEL_CLASS_MAPPINGS` not implemented for this type of `AutoClass`: "+this.name);for(let y of this.MODEL_CLASS_MAPPINGS){const b=y.get(c.config.model_type);if(b)return await b[1].from_pretrained(e,c)}if(this.BASE_IF_FAIL)return console.warn(`Unknown model class "${c.config.model_type}", attempting to construct from base class.`),await X.from_pretrained(e,c);throw Error(`Unsupported model type: ${c.config.model_type}`)}}const ov=new Map([["bert",["BertModel",Ry]],["nomic_bert",["NomicBertModel",Ly]],["roformer",["RoFormerModel",Wy]],["electra",["ElectraModel",Qy]],["esm",["EsmModel",$w]],["convbert",["ConvBertModel",qy]],["camembert",["CamembertModel",rw]],["deberta",["DebertaModel",ow]],["deberta-v2",["DebertaV2Model",pw]],["mpnet",["MPNetModel",Aw]],["albert",["AlbertModel",Fw]],["distilbert",["DistilBertModel",_w]],["roberta",["RobertaModel",sb]],["xlm",["XLMModel",cb]],["xlm-roberta",["XLMRobertaModel",gb]],["clap",["ClapModel",X1]],["clip",["CLIPModel",kb]],["clipseg",["CLIPSegModel",Bb]],["chinese_clip",["ChineseCLIPModel",Pb]],["siglip",["SiglipModel",Mb]],["mobilebert",["MobileBertModel",Ew]],["squeezebert",["SqueezeBertModel",Pw]],["wav2vec2",["Wav2Vec2Model",_1]],["wav2vec2-bert",["Wav2Vec2BertModel",k1]],["unispeech",["UniSpeechModel",v1]],["unispeech-sat",["UniSpeechSatModel",S1]],["hubert",["HubertModel",M1]],["wavlm",["WavLMModel",R1]],["audio-spectrogram-transformer",["ASTModel",vb]],["vits",["VitsModel",yg]],["detr",["DetrModel",O2]],["table-transformer",["TableTransformerModel",B2]],["vit",["ViTModel",g2]],["fastvit",["FastViTModel",y2]],["mobilevit",["MobileViTModel",$2]],["mobilevitv2",["MobileViTV2Model",S2]],["owlvit",["OwlViTModel",E2]],["owlv2",["Owlv2Model",k2]],["beit",["BeitModel",A2]],["deit",["DeiTModel",F2]],["convnext",["ConvNextModel",t1]],["convnextv2",["ConvNextV2Model",n1]],["dinov2",["Dinov2Model",i1]],["resnet",["ResNetModel",W2]],["swin",["SwinModel",V2]],["swin2sr",["Swin2SRModel",H2]],["donut-swin",["DonutSwinModel",e1]],["yolos",["YolosModel",o1]],["dpt",["DPTModel",j2]],["glpn",["GLPNModel",Q2]],["hifigan",["SpeechT5HifiGan",W1]],["efficientnet",["EfficientNetModel",av]]]),uv=new Map([["t5",["T5Model",Vw]],["longt5",["LongT5Model",Hw]],["mt5",["MT5Model",jw]],["bart",["BartModel",Yw]],["mbart",["MBartModel",Jw]],["marian",["MarianModel",h1]],["whisper",["WhisperModel",xb]],["m2m_100",["M2M100Model",m1]],["blenderbot",["BlenderbotModel",rb]],["blenderbot-small",["BlenderbotSmallModel",ab]]]),lv=new Map([["bloom",["BloomModel",d2]],["gpt2",["GPT2Model",Nb]],["gptj",["GPTJModel",Gb]],["gpt_bigcode",["GPTBigCodeModel",qb]],["gpt_neo",["GPTNeoModel",Lb]],["gpt_neox",["GPTNeoXModel",Ub]],["codegen",["CodeGenModel",Kb]],["llama",["LlamaModel",Xb]],["cohere",["CohereModel",Jb]],["gemma",["GemmaModel",e2]],["openelm",["OpenELMModel",r2]],["qwen2",["Qwen2Model",a2]],["phi",["PhiModel",s2]],["phi3",["Phi3Model",u2]],["mpt",["MptModel",p2]],["opt",["OPTModel",f2]],["mistral",["MistralModel",G1]],["starcoder2",["Starcoder2Model",q1]],["falcon",["FalconModel",K1]],["stablelm",["StableLmModel",rv]]]),xg=new Map([["speecht5",["SpeechT5ForSpeechToText",F1]],["whisper",["WhisperForConditionalGeneration",Sb]]]),dv=new Map([["speecht5",["SpeechT5ForTextToSpeech",L1]]]),cv=new Map([["vits",["VitsModel",yg]],["musicgen",["MusicgenForConditionalGeneration",$g]]]),pv=new Map([["bert",["BertForSequenceClassification",By]],["roformer",["RoFormerForSequenceClassification",Vy]],["electra",["ElectraForSequenceClassification",Zy]],["esm",["EsmForSequenceClassification",Sw]],["convbert",["ConvBertForSequenceClassification",Ky]],["camembert",["CamembertForSequenceClassification",aw]],["deberta",["DebertaForSequenceClassification",lw]],["deberta-v2",["DebertaV2ForSequenceClassification",fw]],["mpnet",["MPNetForSequenceClassification",Ow]],["albert",["AlbertForSequenceClassification",Lw]],["distilbert",["DistilBertForSequenceClassification",yw]],["roberta",["RobertaForSequenceClassification",ub]],["xlm",["XLMForSequenceClassification",hb]],["xlm-roberta",["XLMRobertaForSequenceClassification",yb]],["bart",["BartForSequenceClassification",Qw]],["mbart",["MBartForSequenceClassification",eb]],["mobilebert",["MobileBertForSequenceClassification",kw]],["squeezebert",["SqueezeBertForSequenceClassification",Dw]]]),hv=new Map([["bert",["BertForTokenClassification",Dy]],["roformer",["RoFormerForTokenClassification",Gy]],["electra",["ElectraForTokenClassification",ew]],["esm",["EsmForTokenClassification",Cw]],["convbert",["ConvBertForTokenClassification",Yy]],["camembert",["CamembertForTokenClassification",iw]],["deberta",["DebertaForTokenClassification",dw]],["deberta-v2",["DebertaV2ForTokenClassification",mw]],["mpnet",["MPNetForTokenClassification",zw]],["distilbert",["DistilBertForTokenClassification",ww]],["roberta",["RobertaForTokenClassification",lb]],["xlm",["XLMForTokenClassification",fb]],["xlm-roberta",["XLMRobertaForTokenClassification",wb]]]),Sg=new Map([["t5",["T5ForConditionalGeneration",Gw]],["longt5",["LongT5ForConditionalGeneration",qw]],["mt5",["MT5ForConditionalGeneration",Kw]],["bart",["BartForConditionalGeneration",Xw]],["mbart",["MBartForConditionalGeneration",Zw]],["marian",["MarianMTModel",f1]],["m2m_100",["M2M100ForConditionalGeneration",g1]],["blenderbot",["BlenderbotForConditionalGeneration",nb]],["blenderbot-small",["BlenderbotSmallForConditionalGeneration",ib]]]),Cg=new Map([["bloom",["BloomForCausalLM",c2]],["gpt2",["GPT2LMHeadModel",Fb]],["gptj",["GPTJForCausalLM",Hb]],["gpt_bigcode",["GPTBigCodeForCausalLM",jb]],["gpt_neo",["GPTNeoForCausalLM",Wb]],["gpt_neox",["GPTNeoXForCausalLM",Vb]],["codegen",["CodeGenForCausalLM",Yb]],["llama",["LlamaForCausalLM",Qb]],["cohere",["CohereForCausalLM",Zb]],["gemma",["GemmaForCausalLM",t2]],["openelm",["OpenELMForCausalLM",n2]],["qwen2",["Qwen2ForCausalLM",i2]],["phi",["PhiForCausalLM",o2]],["phi3",["Phi3ForCausalLM",l2]],["mpt",["MptForCausalLM",h2]],["opt",["OPTForCausalLM",m2]],["mbart",["MBartForCausalLM",tb]],["mistral",["MistralForCausalLM",H1]],["starcoder2",["Starcoder2ForCausalLM",j1]],["falcon",["FalconForCausalLM",Y1]],["trocr",["TrOCRForCausalLM",V1]],["stablelm",["StableLmForCausalLM",nv]]]),fv=new Map([["bert",["BertForMaskedLM",Py]],["roformer",["RoFormerForMaskedLM",Uy]],["electra",["ElectraForMaskedLM",Jy]],["esm",["EsmForMaskedLM",xw]],["convbert",["ConvBertForMaskedLM",jy]],["camembert",["CamembertForMaskedLM",nw]],["deberta",["DebertaForMaskedLM",uw]],["deberta-v2",["DebertaV2ForMaskedLM",hw]],["mpnet",["MPNetForMaskedLM",Mw]],["albert",["AlbertForMaskedLM",Uw]],["distilbert",["DistilBertForMaskedLM",vw]],["roberta",["RobertaForMaskedLM",ob]],["xlm",["XLMWithLMHeadModel",pb]],["xlm-roberta",["XLMRobertaForMaskedLM",_b]],["mobilebert",["MobileBertForMaskedLM",Tw]],["squeezebert",["SqueezeBertForMaskedLM",Bw]]]),mv=new Map([["bert",["BertForQuestionAnswering",Ny]],["roformer",["RoFormerForQuestionAnswering",Hy]],["electra",["ElectraForQuestionAnswering",tw]],["convbert",["ConvBertForQuestionAnswering",Xy]],["camembert",["CamembertForQuestionAnswering",sw]],["deberta",["DebertaForQuestionAnswering",cw]],["deberta-v2",["DebertaV2ForQuestionAnswering",gw]],["mpnet",["MPNetForQuestionAnswering",Rw]],["albert",["AlbertForQuestionAnswering",Ww]],["distilbert",["DistilBertForQuestionAnswering",bw]],["roberta",["RobertaForQuestionAnswering",db]],["xlm",["XLMForQuestionAnswering",mb]],["xlm-roberta",["XLMRobertaForQuestionAnswering",bb]],["mobilebert",["MobileBertForQuestionAnswering",Iw]],["squeezebert",["SqueezeBertForQuestionAnswering",Nw]]]),Eg=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",Cb]]]),gv=new Map([["llava",["LlavaForConditionalGeneration",km]],["moondream1",["Moondream1ForConditionalGeneration",Tb]]]),_v=new Map([["vit",["ViTForImageClassification",_2]],["fastvit",["FastViTForImageClassification",w2]],["mobilevit",["MobileViTForImageClassification",x2]],["mobilevitv2",["MobileViTV2ForImageClassification",C2]],["beit",["BeitForImageClassification",M2]],["deit",["DeiTForImageClassification",L2]],["convnext",["ConvNextForImageClassification",r1]],["convnextv2",["ConvNextV2ForImageClassification",a1]],["dinov2",["Dinov2ForImageClassification",s1]],["resnet",["ResNetForImageClassification",U2]],["swin",["SwinForImageClassification",G2]],["segformer",["SegformerForImageClassification",ev]],["efficientnet",["EfficientNetForImageClassification",iv]]]),yv=new Map([["detr",["DetrForObjectDetection",z2]],["table-transformer",["TableTransformerForObjectDetection",D2]],["yolos",["YolosForObjectDetection",u1]]]),wv=new Map([["owlvit",["OwlViTForObjectDetection",T2]],["owlv2",["Owlv2ForObjectDetection",I2]]]),bv=new Map([["detr",["DetrForSegmentation",R2]],["clipseg",["CLIPSegForImageSegmentation",Db]]]),vv=new Map([["segformer",["SegformerForSemanticSegmentation",tv]]]),$v=new Map([["sam",["SamModel",c1]]]),xv=new Map([["wav2vec2",["Wav2Vec2ForCTC",y1]],["wav2vec2-bert",["Wav2Vec2BertForCTC",I1]],["unispeech",["UniSpeechForCTC",$1]],["unispeech-sat",["UniSpeechSatForCTC",C1]],["wavlm",["WavLMForCTC",P1]],["hubert",["HubertForCTC",O1]]]),Sv=new Map([["wav2vec2",["Wav2Vec2ForSequenceClassification",w1]],["wav2vec2-bert",["Wav2Vec2BertForSequenceClassification",A1]],["unispeech",["UniSpeechForSequenceClassification",x1]],["unispeech-sat",["UniSpeechSatForSequenceClassification",E1]],["wavlm",["WavLMForSequenceClassification",B1]],["hubert",["HubertForSequenceClassification",z1]],["audio-spectrogram-transformer",["ASTForAudioClassification",$b]]]),Cv=new Map([["wavlm",["WavLMForXVector",D1]]]),Ev=new Map([["unispeech-sat",["UniSpeechSatForAudioFrameClassification",T1]],["wavlm",["WavLMForAudioFrameClassification",N1]],["wav2vec2",["Wav2Vec2ForAudioFrameClassification",b1]]]),Tv=new Map([["vitmatte",["VitMatteForImageMatting",v2]]]),kv=new Map([["swin2sr",["Swin2SRForImageSuperResolution",q2]]]),Iv=new Map([["dpt",["DPTForDepthEstimation",K2]],["depth_anything",["DepthAnythingForDepthEstimation",X2]],["glpn",["GLPNForDepthEstimation",J2]]]),Av=new Map([["clip",["CLIPVisionModelWithProjection",Ab]],["siglip",["SiglipVisionModel",zb]]]),Tg=[[ov,be.EncoderOnly],[uv,be.EncoderDecoder],[lv,be.DecoderOnly],[pv,be.EncoderOnly],[hv,be.EncoderOnly],[Sg,be.Seq2Seq],[xg,be.Seq2Seq],[Cg,be.DecoderOnly],[fv,be.EncoderOnly],[mv,be.EncoderOnly],[Eg,be.Vision2Seq],[gv,be.ImageTextToText],[_v,be.EncoderOnly],[bv,be.EncoderOnly],[vv,be.EncoderOnly],[Tv,be.EncoderOnly],[kv,be.EncoderOnly],[Iv,be.EncoderOnly],[yv,be.EncoderOnly],[wv,be.EncoderOnly],[$v,be.MaskGeneration],[xv,be.EncoderOnly],[Sv,be.EncoderOnly],[dv,be.Seq2Seq],[cv,be.EncoderOnly],[Cv,be.EncoderOnly],[Ev,be.EncoderOnly],[Av,be.EncoderOnly]];for(const[t,e]of Tg)for(const[r,n]of t.values())wi.set(r,e),qn.set(n,r),_m.set(r,n);const Mv=[["MusicgenForConditionalGeneration",$g,be.Musicgen],["CLIPTextModelWithProjection",Ib,be.EncoderOnly],["SiglipTextModel",Ob,be.EncoderOnly],["ClapTextModelWithProjection",Q1,be.EncoderOnly],["ClapAudioModelWithProjection",J1,be.EncoderOnly]];for(const[t,e,r]of Mv)wi.set(t,r),qn.set(e,t),_m.set(t,e);class Ov extends sv{static MODEL_CLASS_MAPPINGS=Tg.map(e=>e[0]);static BASE_IF_FAIL=!0}class Ae extends Ot{constructor({logits:e}){super(),this.logits=e}}class zv extends Ot{constructor({logits:e,embeddings:r}){super(),this.logits=e,this.embeddings=r}}class lt extends Ot{constructor({logits:e}){super(),this.logits=e}}class dt extends Ot{constructor({logits:e}){super(),this.logits=e}}class mt extends Ot{constructor({start_logits:e,end_logits:r}){super(),this.start_logits=e,this.end_logits=r}}class gn extends Ot{constructor({logits:e}){super(),this.logits=e}}class Rv extends Ot{constructor({alphas:e}){super(),this.alphas=e}}class Pv extends Ot{constructor({waveform:e,spectrogram:r}){super(),this.waveform=e,this.spectrogram=r}}const It=typeof self<"u",Bv=It&&self.constructor.name==="DedicatedWorkerGlobalScope";let kr,kg,fr;if(It)kr=(t,e)=>{if(!self.OffscreenCanvas)throw new Error("OffscreenCanvas not supported by this browser.");return new self.OffscreenCanvas(t,e)},fr=self.createImageBitmap,kg=self.ImageData;else if(Ye)fr=async t=>{const r=(await t.metadata()).channels,{data:n,info:i}=await t.rotate().raw().toBuffer({resolveWithObject:!0}),a=new Vt(new Uint8ClampedArray(n),i.width,i.height,i.channels);return r!==void 0&&r!==i.channels&&a.convert(r),a};else throw new Error("Unable to load image processing library.");const Dv={0:"nearest",1:"lanczos",2:"bilinear",3:"bicubic",4:"box",5:"hamming"},Nv=new Map([["png","image/png"],["jpg","image/jpeg"],["jpeg","image/jpeg"],["gif","image/gif"]]);class Vt{constructor(e,r,n,i){this.data=e,this.width=r,this.height=n,this.channels=i}get size(){return[this.width,this.height]}static async read(e){if(e instanceof Vt)return e;if(typeof e=="string"||e instanceof URL)return await this.fromURL(e);throw new Error(`Unsupported input type: ${typeof e}`)}static fromCanvas(e){if(!It)throw new Error("fromCanvas() is only supported in browser environments.");const n=e.getContext("2d").getImageData(0,0,e.width,e.height).data;return new Vt(n,e.width,e.height,4)}static async fromURL(e){const r=await Rs(e);if(r.status!==200)throw new Error(`Unable to read image from "${e}" (${r.status} ${r.statusText})`);const n=await r.blob();return this.fromBlob(n)}static async fromBlob(e){if(It){const r=await fr(e),n=kr(r.width,r.height).getContext("2d");return n.drawImage(r,0,0),new this(n.getImageData(0,0,r.width,r.height).data,r.width,r.height,4)}else{const r=Ye(await e.arrayBuffer());return await fr(r)}}static fromTensor(e,r="CHW"){if(e.dims.length!==3)throw new Error(`Tensor should have 3 dimensions, but has ${e.dims.length} dimensions.`);if(r==="CHW")e=e.transpose(1,2,0);else if(r!=="HWC")throw new Error(`Unsupported channel format: ${r}`);if(!(e.data instanceof Uint8ClampedArray||e.data instanceof Uint8Array))throw new Error(`Unsupported tensor type: ${e.type}`);switch(e.dims[2]){case 1:case 2:case 3:case 4:return new Vt(e.data,e.dims[1],e.dims[0],e.dims[2]);default:throw new Error(`Unsupported number of channels: ${e.dims[2]}`)}}grayscale(){if(this.channels===1)return this;const e=new Uint8ClampedArray(this.width*this.height*1);switch(this.channels){case 3:case 4:for(let r=0,n=0;r=0?u=n:h=-n,i>=0?l=i:f=-i,o.drawImage(s,u,l,e,r,h,f,e,r),new Vt(o.getImageData(0,0,e,r).data,e,r,4).convert(a)}else{let a=this.toSharp();if(n>=0&&i>=0)a=a.extract({left:Math.floor(n),top:Math.floor(i),width:e,height:r});else if(n<=0&&i<=0){const s=Math.floor(-i),o=Math.floor(-n);a=a.extend({top:s,left:o,right:e-this.width-o,bottom:r-this.height-s})}else{let s=[0,0],o=0;i<0?(s[0]=Math.floor(-i),s[1]=r-this.height-s[0]):o=Math.floor(i);let u=[0,0],l=0;n<0?(u[0]=Math.floor(-n),u[1]=e-this.width-u[0]):l=Math.floor(n),a=a.extend({top:s[0],bottom:s[1],left:u[0],right:u[1]}).extract({left:l,top:o,width:e,height:r})}return await fr(a)}}async toBlob(e="image/png",r=1){if(!It)throw new Error("toBlob() is only supported in browser environments.");return await this.toCanvas().convertToBlob({type:e,quality:r})}toTensor(e="CHW"){let r=new ce("uint8",new Uint8Array(this.data),[this.height,this.width,this.channels]);if(e!=="HWC")if(e==="CHW")r=r.permute(2,0,1);else throw new Error(`Unsupported channel format: ${e}`);return r}toCanvas(){if(!It)throw new Error("toCanvas() is only supported in browser environments.");const e=this.clone().rgba(),r=kr(e.width,e.height),n=new kg(e.data,e.width,e.height);return r.getContext("2d").putImageData(n,0,0),r}_update(e,r,n,i=null){return this.data=e,this.width=r,this.height=n,i!==null&&(this.channels=i),this}clone(){return new Vt(this.data.slice(),this.width,this.height,this.channels)}convert(e){if(this.channels===e)return this;switch(e){case 1:this.grayscale();break;case 3:this.rgb();break;case 4:this.rgba();break;default:throw new Error(`Conversion failed due to unsupported number of channels: ${this.channels}`)}return this}async save(e){if(It){if(Bv)throw new Error("Unable to save an image from a Web Worker.");const r=e.split(".").pop().toLowerCase(),n=Nv.get(r)??"image/png",i=await this.toBlob(n),a=URL.createObjectURL(i),s=document.createElement("a");s.href=a,s.download=e,s.click(),s.remove()}else{if(bt.useFS)return await this.toSharp().toFile(e);throw new Error("Unable to save the image because filesystem is disabled in this environment.")}}toSharp(){if(It)throw new Error("toSharp() is only supported in server-side environments.");return Ye(this.data,{raw:{width:this.width,height:this.height,channels:this.channels}})}}function Tc(t){if(t<1)return new Float64Array;if(t===1)return new Float64Array([1]);const e=t-1,r=Math.PI/e,n=new Float64Array(t);for(let i=0;i2595*Math.log10(1+t/700),kaldi:t=>1127*Math.log(1+t/700),slaney:(t,e=1e3,r=15,n=27/Math.log(6.4))=>t>=e?r+Math.log(t/e)*n:3*t/200};function Is(t,e="htk"){const r=Fv[e];if(!r)throw new Error('mel_scale should be one of "htk", "slaney" or "kaldi".');return typeof t=="number"?r(t):t.map(n=>r(n))}const Lv={htk:t=>700*(10**(t/2595)-1),kaldi:t=>700*(Math.exp(t/1127)-1),slaney:(t,e=1e3,r=15,n=Math.log(6.4)/27)=>t>=r?e*Math.exp(n*(t-r)):200*t/3};function Wv(t,e="htk"){const r=Lv[e];if(!r)throw new Error('mel_scale should be one of "htk", "slaney" or "kaldi".');return typeof t=="number"?r(t):t.map(n=>r(n))}function Uv(t,e){const r=Float64Array.from({length:e.length-1},(s,o)=>e[o+1]-e[o]),n=Array.from({length:t.length},()=>new Array(e.length));for(let s=0;snew Array(t.length));for(let s=0;st+n*a)}function Zn(t,e,r,n,i,a=null,s="htk",o=!1){if(a!==null&&a!=="slaney")throw new Error('norm must be one of null or "slaney"');const u=Is(r,s),l=Is(n,s),h=kc(u,l,e+2);let f=Wv(h,s),m;if(o){const y=i/(t*2);m=Is(Float64Array.from({length:t},(b,v)=>v*y),s),f=h}else m=kc(0,Math.floor(i/2),t);const c=Uv(m,f);if(a!==null&&a==="slaney")for(let y=0;yi)throw Error(`frame_length (${r}) may not be larger than fft_length (${i})`);if(I!==r)throw new Error(`Length of the window (${I}) must equal frame_length (${r})`);if(n<=0)throw new Error("hop_length must be greater than zero");if(a===null&&h!==null)throw new Error("You have provided `mel_filters` but `power` is `None`. Mel spectrogram computation is not yet supported for complex-valued spectrogram. Specify `power` to fix this issue.");if(s){if(o!=="reflect")throw new Error(`pad_mode="${o}" not implemented yet.`);const U=Math.floor((i-1)/2)+1;t=Vv(t,U,U)}const A=Math.floor(1+Math.floor((t.length-r)/n)),R=u?Math.floor(i/2)+1:i;let z=A,P=A;C!==null&&(C>A?x&&(P=C):P=z=C);const J=new d0(i),K=new Float64Array(i),ue=new Float64Array(J.outputBufferSize),ie=new Float32Array(R*P);for(let U=0;U=1;--re)K[re]-=l*K[re-1];K[0]*=1-l}for(let re=0;reMath.pow(o,.85));break;default:throw new Error(`Unknown window type ${e}.`)}if(r&&(s=s.subarray(0,t)),n===null)return s;if(t>n)throw new Error(`Length of the window (${t}) may not be larger than frame_length (${n})`);return s}function qv([t,e,r,n]){return[t-r/2,e-n/2,t+r/2,e+n/2]}function Ro(t,e=.5,r=null,n=!1){const i=t.logits,a=t.pred_boxes,[s,o,u]=i.dims;if(r!==null&&r.length!==s)throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits");let l=[];for(let h=0;he&&C.push(T)}else{let T=_r(v.data)[1];if(T===u-1||(x=ta(v.data),x[T]A*f[(R+1)%2])),m.boxes.push(I),m.classes.push(T),m.scores.push(x[T])}}l.push(m)}return l}function ya(t,e){if(!(t instanceof Float32Array||t instanceof Float64Array))throw new Error(`${e} expects input to be a Float32Array or a Float64Array, but got ${t?.constructor?.name??typeof t} instead. If using the feature extractor directly, remember to use \`read_audio(url, sampling_rate)\` to obtain the raw audio data of the file/url.`)}function Ic(t,e,r=0,n=null){const i=t/e;let a=p0(i)*e;return n!==null&&a>n&&(a=Math.floor(i)*e),aa?l=Math.floor(a*u/i):a>i&&(u=Math.floor(i*l/a)),await e.resize(l,u,{resample:n}))}async crop_margin(e,r=200){const n=e.clone().grayscale(),i=u0(n.data)[0],s=_r(n.data)[0]-i;if(s===0)return e;const o=r/255;let u=n.width,l=n.height,h=0,f=0;const m=n.data;for(let c=0;cthis.preprocess(a)));return{pixel_values:Qn(n.map(a=>a.pixel_values),0),original_sizes:n.map(a=>a.original_size),reshaped_input_sizes:n.map(a=>a.reshaped_input_size)}}}class jv extends je{post_process_semantic_segmentation(e,r=null){const n=e.logits,i=n.dims[0];if(r!==null&&r.length!==i)throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits");const a=[];for(let s=0;sm[T]&&(m[T]=x[T],c[T]=C)}const y=new Array(u.dims[0]),b=f.data;for(let C=0;CC!==void 0);a.push({segmentation:f,labels:v})}return a}}class Ag extends je{}class Kv extends Ag{}class Yv extends je{}class Xv extends je{}class Mg extends je{}class Qv extends Mg{}class Jv extends je{}class Zv extends je{}class Og extends je{constructor(e){super(e),this.crop_pct=this.config.crop_pct??224/256}async resize(e){const r=this.size?.shortest_edge;if(r===void 0)throw new Error("Size dictionary must contain 'shortest_edge' key.");if(r<384){const n=Math.floor(r/this.crop_pct),[i,a]=this.get_resize_output_image_size(e,{shortest_edge:n});e=await e.resize(i,a,{resample:this.resample}),e=await e.center_crop(r,r)}else e=await e.resize(r,r,{resample:this.resample});return e}}class e$ extends Og{}class t$ extends je{}class r$ extends je{}class n$ extends je{constructor(e){super(e),this.include_top=this.config.include_top??!0,this.include_top&&(this.image_std=this.image_std.map(r=>r*r))}}class zg extends je{}class a$ extends zg{}class Rg extends je{post_process_object_detection(...e){return Ro(...e)}}class i$ extends Rg{}class s$ extends je{}class o$ extends je{}class Pg extends je{pad_image(e,r,n,i={}){const[a,s,o]=r;let u=this.image_mean;Array.isArray(this.image_mean)||(u=new Array(o).fill(u));let l=this.image_std;Array.isArray(l)||(l=new Array(o).fill(u));const h=u.map((f,m)=>-f/l[m]);return super.pad_image(e,r,n,{center:!0,constant_values:h,...i})}}class u$ extends Pg{}class l$ extends je{async _call(e){const r=await super._call(e),n=[r.pixel_values.dims[0],64,64],i=new ce("int64",new BigInt64Array(n.reduce((a,s)=>a*s)).fill(1n),n);return{...r,pixel_mask:i}}post_process_object_detection(...e){return Ro(...e)}remove_low_and_no_objects(e,r,n,i){let a=[],s=[],o=[];for(let u=0;un&&(a.push(h),s.push(c),o.push(f))}return[a,s,o]}check_segment_validity(e,r,n,i=.5,a=.8){let s=[],o=0,u=0;const l=r[n].data;for(let f=0;f=i&&++u;let h=o>0&&u>0;return h&&(h=o/u>a),[h,s]}compute_segments(e,r,n,i,a,s=null,o=null){let[u,l]=o??e[0].dims,h=new ce("int32",new Int32Array(u*l),[u,l]),f=[];if(o!==null)for(let v=0;vc[T]&&(m[T]=v,c[T]=x[T])}let y=0;const b=h.data;for(let v=0;vi!==r.dims[a]))throw Error(`The first ${n.length} dimensions of 'input_points' and 'input_labels' must be the same.`);return new ce("int64",e.flat(1/0).map(BigInt),n)}async _call(e,{input_points:r=null,input_labels:n=null,input_boxes:i=null}={}){const a=await super._call(e);if(r&&(a.input_points=this.reshape_input_points(r,a.original_sizes,a.reshaped_input_sizes)),n){if(!a.input_points)throw Error("`input_points` must be provided if `input_labels` are provided.");a.input_labels=this.add_input_labels(n,a.input_points)}return i&&(a.input_boxes=this.reshape_input_points(i,a.original_sizes,a.reshaped_input_sizes,!0)),a}async post_process_masks(e,r,n,{mask_threshold:i=0,binarize:a=!0,pad_size:s=null}={}){const o=[];s=s??this.pad_size;const u=[s.height,s.width];for(let l=0;li&&(y[b]=1);m=new ce("bool",y,m.dims)}o.push(m)}return o}generate_crop_boxes(e,r,{crop_n_layers:n=0,overlap_ratio:i=512/1500,points_per_crop:a=32,crop_n_points_downscale_factor:s=1}={}){}}class p$ extends je{pad_image(e,r,n,i={}){const[a,s,o]=r;return super.pad_image(e,r,{width:s+(n-s%n)%n,height:a+(n-a%n)%n},{mode:"symmetric",center:!1,constant_values:-1,...i})}}class h$ extends je{async _call(e,r){Array.isArray(e)||(e=[e]),Array.isArray(r)||(r=[r]);const n=await Promise.all(e.map(s=>this.preprocess(s))),i=await Promise.all(r.map(s=>this.preprocess(s,{do_normalize:!1,do_convert_rgb:!1,do_convert_grayscale:!0})));return{pixel_values:Qn(n.map((s,o)=>Jt([s.pixel_values,i[o].pixel_values],0)),0),original_sizes:n.map(s=>s.original_size),reshaped_input_sizes:n.map(s=>s.reshaped_input_size)}}}class f$ extends Vr{constructor(e){super(e),this.config.mel_filters??=Zn(Math.floor(1+this.config.n_fft/2),this.config.feature_size,0,8e3,this.config.sampling_rate,"slaney","slaney"),this.window=zi(this.config.n_fft,"hann")}async _extract_fbank_features(e){const r=await Oi(e,this.window,this.config.n_fft,this.config.hop_length,{power:2,mel_filters:this.config.mel_filters,log_mel:"log10",max_num_frames:this.config.nb_max_frames}),n=r.data,i=_r(n)[0];for(let a=0;athis.config.n_samples?(console.warn("Attempting to extract features for audio longer than 30 seconds. If using a pipeline to extract transcript from a long audio clip, remember to specify `chunk_length_s` and/or `stride_length_s`."),r=e.slice(0,this.config.n_samples)):(r=new Float32Array(this.config.n_samples),r.set(e)),{input_features:(await this._extract_fbank_features(r)).unsqueeze_(0)}}}class m$ extends Vr{_zero_mean_unit_var_norm(e){const n=e.reduce((a,s)=>a+s,0)/e.length,i=e.reduce((a,s)=>a+(s-n)**2,0)/e.length;return e.map(a=>(a-n)/Math.sqrt(i+1e-7))}async _call(e){ya(e,"Wav2Vec2FeatureExtractor"),e instanceof Float64Array&&(e=new Float32Array(e));let r=e;this.config.do_normalize&&(r=this._zero_mean_unit_var_norm(r));const n=[1,r.length];return{input_values:new ce("float32",r,n),attention_mask:new ce("int64",new BigInt64Array(r.length).fill(1n),n)}}}class g$ extends Vr{constructor(e){super(e);const r=this.config.sampling_rate,n=Zn(256,this.config.num_mel_bins,20,Math.floor(r/2),r,null,"kaldi",!0);for(let i=0;in*32768),Oi(e,this.window,400,160,{fft_length:512,power:2,center:!1,preemphasis:.97,mel_filters:this.mel_filters,log_mel:"log",mel_floor:1192092955078125e-22,remove_dc_offset:!0,max_num_frames:r,transpose:!0})}async _call(e,{padding:r=!0,pad_to_multiple_of:n=2,do_normalize_per_mel_bins:i=!0,return_attention_mask:a=!0}={}){ya(e,"SeamlessM4TFeatureExtractor");let s=await this._extract_fbank_features(e,this.config.max_length);if(i){const[y,b]=s.dims,v=s.data;for(let C=0;C0){const x=new Float32Array(b*(y+C));x.set(v),x.fill(this.config.padding_value,v.length);const T=y+C;s=new ce(s.type,x,[T,b]),a&&(o=new ce("int64",new BigInt64Array(T),[1,T]),o.data.fill(1n,0,y))}}const[u,l]=s.dims,h=this.config.stride;if(u%h!==0)throw new Error(`The number of frames (${u}) must be a multiple of the stride (${h}).`);const m=s.view(1,Math.floor(u/h),l*h),c={input_features:m};if(a){const y=m.dims[1],b=new BigInt64Array(y);if(o){const v=o.data;for(let C=1,x=0;C0)if(n==="rand_trunc"){const o=Math.floor(Math.random()*(s+1));e=e.subarray(o,o+r),a=await this._extract_fbank_features(e,this.mel_filters_slaney,this.config.nb_max_samples)}else throw new Error(`Truncation strategy "${n}" not implemented`);else{if(s<0){let o=new Float64Array(r);if(o.set(e),i==="repeat")for(let u=e.length;u{dn=Number(Xs.value),Po.feature_extractor.size={width:dn,height:dn},T$.textContent=dn});Xs.disabled=!1;let cn=.4;Qs.addEventListener("input",()=>{cn=Number(Qs.value),Bg(Fr.videoWidth*cn,Fr.videoHeight*cn),k$.textContent=cn});Qs.disabled=!1;Ri.textContent="Ready";let Ms=!1,Os;const Ac=bi.getContext("2d",{willReadFrequently:!0}),I$=ea.getContext("2d",{willReadFrequently:!0});function Fg(){const{width:t,height:e}=bi;Ms||(Ms=!0,async function(){Ac.drawImage(Fr,0,0,t,e);const r=Ac.getImageData(0,0,t,e),n=new Vt(r.data,t,e,4),i=await Po(n),{predicted_depth:a}=await Ng(i),s=a.data,[o,u,l]=a.dims;let h=1/0,f=-1/0;ea.width=l,ea.height=u;for(let b=0;bf&&(f=v)}const m=f-h,c=new Uint8ClampedArray(4*s.length);for(let b=0;b{Fr.srcObject=t,Fr.play();const e=t.getVideoTracks()[0],{width:r,height:n}=e.getSettings();Bg(r*cn,n*cn),setTimeout(Fg,50)}).catch(t=>{alert(t)});
\ No newline at end of file