Ex0bit Claude Opus 4.6 commited on
Commit
f265480
·
1 Parent(s): b33a606

Fix vision image freeze: reduce maxPixels, parallelize ViT attention

Browse files

- maxPixels: 262144 → 65536 (ViT O(N²) attention on 950 patches froze GPU)
- ViT attention shader: WG=256→32, parallelize D dimension with tree reduction
(was 256 threads doing identical sequential work, only thread 0 wrote output)
- Periodic GPU queue drain every 64 tokens during vision prefill
- Fix _readVisionPosEmbed allocation (reuse conversion buffer)
- Fix replay GQA numSplits mismatch (batch=8 garbage text)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

assets/{gpu-ops-BbLjsC0p.js → gpu-ops-flxI8RuZ.js} RENAMED
@@ -1,4 +1,4 @@
1
- (function(){const e=document.createElement("link").relList;if(e&&e.supports&&e.supports("modulepreload"))return;for(const r of document.querySelectorAll('link[rel="modulepreload"]'))u(r);new MutationObserver(r=>{for(const a of r)if(a.type==="childList")for(const i of a.addedNodes)i.tagName==="LINK"&&i.rel==="modulepreload"&&u(i)}).observe(document,{childList:!0,subtree:!0});function t(r){const a={};return r.integrity&&(a.integrity=r.integrity),r.referrerPolicy&&(a.referrerPolicy=r.referrerPolicy),r.crossOrigin==="use-credentials"?a.credentials="include":r.crossOrigin==="anonymous"?a.credentials="omit":a.credentials="same-origin",a}function u(r){if(r.ep)return;r.ep=!0;const a=t(r);fetch(r.href,a)}})();class b{constructor(){this.device=null,this.adapter=null,this.adapterInfo=null,this.pipelineCache=new Map,this.bufferCache=new Map,this.bindGroupCache=new Map}async init(){if(!navigator.gpu)throw new Error("WebGPU not supported");if(this.adapter=await navigator.gpu.requestAdapter({powerPreference:"high-performance"}),!this.adapter)throw new Error("No WebGPU adapter found");this.adapterInfo=await this.adapter.requestAdapterInfo?.()??{};const e={},t={maxBufferSize:4*1024*1024*1024,maxStorageBufferBindingSize:4*1024*1024*1024,maxComputeWorkgroupStorageSize:32768,maxComputeInvocationsPerWorkgroup:256,maxComputeWorkgroupSizeX:256,maxStorageBuffersPerShaderStage:10};for(const[r,a]of Object.entries(t))this.adapter.limits[r]!==void 0&&(e[r]=Math.min(a,this.adapter.limits[r]));const u=[];return this.adapter.features.has("shader-f16")&&u.push("shader-f16"),this.adapter.features.has("subgroups")&&u.push("subgroups"),this.device=await this.adapter.requestDevice({requiredLimits:e,requiredFeatures:u}),this.hasF16=this.device.features.has("shader-f16"),this.hasSubgroups=this.device.features.has("subgroups"),this.device.lost.then(r=>console.error("WebGPU device lost:",r)),this}createBuffer(e,t,u=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST){const r=Math.ceil(t/4)*4,a=this.device.createBuffer({size:r,usage:u,label:e});return this.bufferCache.set(e,a),a}createBufferFromData(e,t,u=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC){const r=this.createBuffer(e,t.byteLength,u);return this.device.queue.writeBuffer(r,0,t),r}createReadbackBuffer(e,t){const u=Math.ceil(t/4)*4;return this.device.createBuffer({size:u,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST,label:e+"_readback"})}getOrCreatePipeline(e,t,u="main"){if(this.pipelineCache.has(e))return this.pipelineCache.get(e);const r=this.device.createShaderModule({code:t,label:e}),a=this.device.createComputePipeline({layout:"auto",compute:{module:r,entryPoint:u},label:e});return this.pipelineCache.set(e,a),a}initTimestamps(){this.device.features.has("timestamp-query")&&(this._tsQuerySet=this.device.createQuerySet({type:"timestamp",count:2}),this._tsResolveBuf=this.device.createBuffer({size:16,usage:GPUBufferUsage.QUERY_RESOLVE|GPUBufferUsage.COPY_SRC}),this._tsReadBuf=this.device.createBuffer({size:16,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST}),this._tsEnabled=!0,this._tsResults=[])}beginBatch(){this._encoder=this.device.createCommandEncoder(),this._passCount=0,this.singlePassMode&&(this._singlePass=this._encoder.beginComputePass()),this._tsEnabled&&this._encoder.beginComputePass({timestampWrites:{querySet:this._tsQuerySet,beginningOfPassWriteIndex:0}}).end()}endBatch(){this._singlePass&&(this._singlePass.end(),this._singlePass=null),this._tsEnabled&&this._encoder&&(this._encoder.beginComputePass({timestampWrites:{querySet:this._tsQuerySet,beginningOfPassWriteIndex:1}}).end(),this._encoder.resolveQuerySet(this._tsQuerySet,0,2,this._tsResolveBuf,0),this._encoder.copyBufferToBuffer(this._tsResolveBuf,0,this._tsReadBuf,0,16)),this._encoder&&(this.device.queue.submit([this._encoder.finish()]),this._encoder=null)}async readTimestamp(){if(!this._tsEnabled)return null;await this._tsReadBuf.mapAsync(GPUMapMode.READ);const e=new BigInt64Array(this._tsReadBuf.getMappedRange().slice(0));this._tsReadBuf.unmap();const u=Number(e[1]-e[0])/1e6;return this._tsResults.push(u),u}copyBuffer(e,t,u,r=0,a=0){if(this._singlePass){this._singlePass.end(),this._encoder.copyBufferToBuffer(e,r,t,a,u),this._singlePass=this._encoder.beginComputePass();return}const i=this._encoder||this.device.createCommandEncoder();i.copyBufferToBuffer(e,r,t,a,u),this._encoder||this.device.queue.submit([i.finish()])}startRecording(){this._recording=[]}stopRecording(){const e=this._recording;return this._recording=null,e}replay(e,t){if(t)for(const r of t)this.device.queue.writeBuffer(r.buffer,r.offset,r.data,r.dataOffset,r.size);const u=this._encoder;for(let r=0;r<e.length;r++){const a=e[r];if(a.multi){const i=u.beginComputePass(),o=a.ops;for(let n=0;n<o.length;n++){const s=o[n];i.setPipeline(s.pipeline),i.setBindGroup(0,s.bindGroup),i.dispatchWorkgroups(s.wgX,s.wgY)}i.end()}else{const i=u.beginComputePass();i.setPipeline(a.pipeline),i.setBindGroup(0,a.bindGroup),i.dispatchWorkgroups(a.wgX,a.wgY),i.end()}}}dispatch(e,t,u,r=1,a=1){if(this._recording&&this._recording.push({pipeline:e,bindGroup:t[0],wgX:u,wgY:r}),this._singlePass){const n=this._singlePass;this._passCount!==void 0&&this._passCount++,n.setPipeline(e);for(let s=0;s<t.length;s++)n.setBindGroup(s,t[s]);n.dispatchWorkgroups(u,r,a);return}const i=this._encoder||this.device.createCommandEncoder(),o=i.beginComputePass();this._passCount!==void 0&&this._passCount++,o.setPipeline(e);for(let n=0;n<t.length;n++)o.setBindGroup(n,t[n]);o.dispatchWorkgroups(u,r,a),o.end(),this._encoder||this.device.queue.submit([i.finish()])}dispatchMulti(e){if(this._recording&&this._recording.push({multi:!0,ops:e.map(r=>({pipeline:r.pipeline,bindGroup:r.bindGroups[0],wgX:r.workgroupsX,wgY:r.workgroupsY||1}))}),this._singlePass){this._passCount!==void 0&&this._passCount++;for(const r of e){this._singlePass.setPipeline(r.pipeline);for(let a=0;a<r.bindGroups.length;a++)this._singlePass.setBindGroup(a,r.bindGroups[a]);this._singlePass.dispatchWorkgroups(r.workgroupsX,r.workgroupsY||1,r.workgroupsZ||1)}return}const t=this._encoder||this.device.createCommandEncoder(),u=t.beginComputePass();this._passCount!==void 0&&this._passCount++;for(const r of e){u.setPipeline(r.pipeline);for(let a=0;a<r.bindGroups.length;a++)u.setBindGroup(a,r.bindGroups[a]);u.dispatchWorkgroups(r.workgroupsX,r.workgroupsY||1,r.workgroupsZ||1)}u.end(),this._encoder||this.device.queue.submit([t.finish()])}async readBuffer(e,t){const u=this.createReadbackBuffer("_readback",t),r=this.device.createCommandEncoder();r.copyBufferToBuffer(e,0,u,0,t),this.device.queue.submit([r.finish()]),await u.mapAsync(GPUMapMode.READ);const a=new Float32Array(u.getMappedRange().slice(0));return u.unmap(),u.destroy(),a}createBindGroup(e,t,u){return this.device.createBindGroup({layout:e.getBindGroupLayout(t),entries:u.map((r,a)=>({binding:a,resource:{buffer:r}}))})}createBindGroupWithOffsets(e,t,u){return this.device.createBindGroup({layout:e.getBindGroupLayout(t),entries:u.map((r,a)=>({binding:a,resource:r.buffer?{buffer:r.buffer,offset:r.offset||0,size:r.size}:{buffer:r}}))})}getCachedBindGroup(e,t,u,r){let a=this.bindGroupCache.get(e);return a||(a=this.createBindGroup(t,u,r),this.bindGroupCache.set(e,a)),a}destroy(){for(const e of this.bufferCache.values())e.destroy();this.bufferCache.clear(),this.bindGroupCache.clear(),this.device?.destroy()}}const Tr=Object.freeze(Object.defineProperty({__proto__:null,GPUContext:b},Symbol.toStringTag,{value:"Module"})),v="modulepreload",m=function(d){return"/"+d},f={},Hr=function(e,t,u){let r=Promise.resolve();if(t&&t.length>0){let n=function(s){return Promise.all(s.map(l=>Promise.resolve(l).then(p=>({status:"fulfilled",value:p}),p=>({status:"rejected",reason:p}))))};document.getElementsByTagName("link");const i=document.querySelector("meta[property=csp-nonce]"),o=i?.nonce||i?.getAttribute("nonce");r=n(t.map(s=>{if(s=m(s),s in f)return;f[s]=!0;const l=s.endsWith(".css"),p=l?'[rel="stylesheet"]':"";if(document.querySelector(`link[href="${s}"]${p}`))return;const g=document.createElement("link");if(g.rel=l?"stylesheet":v,l||(g.as="script"),g.crossOrigin="",g.href=s,o&&g.setAttribute("nonce",o),document.head.appendChild(g),l)return new Promise((c,w)=>{g.addEventListener("load",c),g.addEventListener("error",()=>w(new Error(`Unable to preload CSS for ${s}`)))})}))}function a(i){const o=new Event("vite:preloadError",{cancelable:!0});if(o.payload=i,window.dispatchEvent(o),!o.defaultPrevented)throw i}return r.then(i=>{for(const o of i||[])o.status==="rejected"&&a(o.reason);return e().catch(a)})},k=`
2
  struct Params { K: u32, N: u32, group_size: u32, }
3
 
4
  @group(0) @binding(0) var<storage, read> input: array<f32>;
@@ -270,7 +270,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
270
  let i = gid.x;
271
  if (i >= params.N) { return; }
272
  a[i] = a[i] + b[i];
273
- }`,S=`
274
  struct Params { N: u32, num_heads: u32, head_dim: u32, }
275
 
276
  @group(0) @binding(0) var<storage, read> src: array<f32>;
@@ -287,7 +287,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
287
  let d = i % hd;
288
  dst_a[i] = src[head * hd * 2u + d];
289
  dst_b[i] = src[head * hd * 2u + hd + d];
290
- }`,E=`
291
  struct Params { N: u32, }
292
 
293
  @group(0) @binding(0) var<storage, read> x: array<f32>;
@@ -338,7 +338,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
338
  if (i >= params.dim) { return; }
339
  let flat = argmax_result.idx * params.dim + i;
340
  output[i] = unpack_bf16(embeddings[flat / 2u], flat % 2u);
341
- }`,A=`
342
  struct Params { N: u32, }
343
  struct Result { idx: u32, val: f32, }
344
 
@@ -381,7 +381,7 @@ fn main(@builtin(local_invocation_id) lid: vec3u) {
381
  result.idx = s_idx[0];
382
  result.val = s_val[0];
383
  }
384
- }`,B=`
385
  struct Params { N: u32, }
386
 
387
  @group(0) @binding(0) var<storage, read> logits: array<f32>;
@@ -406,7 +406,7 @@ fn main(@builtin(local_invocation_id) lid: vec3u) {
406
  // Output (idx, val) pair — 256 candidates total
407
  result[tid * 2u] = best_idx;
408
  result[tid * 2u + 1u] = bitcast<u32>(best_val);
409
- }`,D=`
410
  struct ArgmaxResult { idx: u32, val: f32, }
411
  struct Params { recent_count: u32, history_slot: u32, }
412
 
@@ -710,7 +710,7 @@ fn main(@builtin(workgroup_id) wg: vec3u, @builtin(local_invocation_id) lid: vec
710
  let w = unpack_bf16(weight[i / 2u], i % 2u);
711
  x[off + i] = x[off + i] * rms * (1.0 + w);
712
  }
713
- }`,U=`
714
  struct Params { num_heads: u32, head_dim: u32, eps: f32, }
715
 
716
  @group(0) @binding(0) var<storage, read_write> x: array<f32>;
@@ -749,7 +749,7 @@ fn main(@builtin(workgroup_id) wg: vec3u, @builtin(local_invocation_id) lid: vec
749
  let w = unpack_bf16(weight[i / 2u], i % 2u);
750
  x[off + i] = x[off + i] * rms * w;
751
  }
752
- }`,O=`
753
  struct Params { channels: u32, }
754
 
755
  @group(0) @binding(0) var<storage, read_write> x: array<f32>;
@@ -1746,7 +1746,7 @@ fn main(@builtin(local_invocation_id) lid: vec3u, @builtin(workgroup_id) wid: ve
1746
  if (lane == 0u && col < N) {
1747
  output[col] = partial[tid] + partial[tid+1u] + partial[tid+2u] + partial[tid+3u];
1748
  }
1749
- }`,ur=`
1750
  enable f16;
1751
 
1752
  struct Params { K: u32, N: u32, group_size: u32, }
@@ -1803,7 +1803,7 @@ fn main(@builtin(local_invocation_id) lid: vec3u, @builtin(workgroup_id) wid: ve
1803
  if (lane == 0u && col < N) {
1804
  output[col] = partial[tid] + partial[tid+1u] + partial[tid+2u] + partial[tid+3u];
1805
  }
1806
- }`,tr=`
1807
  struct Params {
1808
  K: u32, // hidden_size
1809
  N: u32, // intermediate_size
@@ -2404,10 +2404,10 @@ fn main(@builtin(local_invocation_id) lid: vec3u) {
2404
  result.idx = wg_idx[selected];
2405
  result.val = wg_val[selected];
2406
  }
2407
- }`;function _(d=320,e=1e7,t=33,u=30,r=128){return`
2408
  const ROPE_THETA: f32 = ${e};
2409
- const MROPE_S1_LIMIT: u32 = ${t}u;
2410
- const MROPE_S2_LIMIT: u32 = ${u}u;
2411
  const PARTIAL_DIM: u32 = ${r}u;
2412
 
2413
  struct Params {
@@ -3411,7 +3411,7 @@ fn main(@builtin(local_invocation_id) lid: vec3u, @builtin(workgroup_id) wid: ve
3411
  let b = unpack_bf16(bias[i >> 1u], i);
3412
  output[base + i] = normalized * w + b;
3413
  }
3414
- }`,Sr=`
3415
  struct Params { K: u32, N: u32, }
3416
 
3417
  @group(0) @binding(0) var<storage, read> input: array<f32>;
@@ -3447,7 +3447,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
3447
  let bp = bias[col >> 1u];
3448
  let b = bitcast<f32>(select((bp & 0xFFFFu) << 16u, bp & 0xFFFF0000u, (col & 1u) == 1u));
3449
  output[token * N + col] = sum + b;
3450
- }`,Er=`
3451
  @group(0) @binding(0) var<storage, read_write> data: array<f32>;
3452
  @group(0) @binding(1) var<uniform> len: u32;
3453
 
@@ -3515,7 +3515,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
3515
 
3516
  q_out[idx] = q_in[idx] * c + sign * q_in[partner_idx] * s;
3517
  k_out[idx] = k_in[idx] * c + sign * k_in[partner_idx] * s;
3518
- }`,Ar=`
3519
  struct Params { seq_len: u32, num_heads: u32, head_dim: u32, scale: f32, }
3520
 
3521
  @group(0) @binding(0) var<storage, read> q: array<f32>;
@@ -3524,9 +3524,9 @@ struct Params { seq_len: u32, num_heads: u32, head_dim: u32, scale: f32, }
3524
  @group(0) @binding(3) var<storage, read_write> output: array<f32>;
3525
  @group(0) @binding(4) var<uniform> params: Params;
3526
 
3527
- var<workgroup> shared_scores: array<f32, 256>;
3528
 
3529
- @compute @workgroup_size(256)
3530
  fn main(@builtin(local_invocation_id) lid: vec3u, @builtin(workgroup_id) wid: vec3u) {
3531
  let q_pos = wid.x;
3532
  let head = wid.y;
@@ -3536,68 +3536,72 @@ fn main(@builtin(local_invocation_id) lid: vec3u, @builtin(workgroup_id) wid: ve
3536
  let D = params.head_dim;
3537
  let scale = params.scale;
3538
 
3539
- // Base offset for this head's Q at position q_pos
3540
  let q_base = (q_pos * H + head) * D;
3541
 
3542
- // Phase 1: Compute all attention scores (Q @ K^T * scale)
3543
- // Each thread handles a subset of K positions
3544
- var max_score: f32 = -1e30;
3545
- for (var kp = tid; kp < S; kp += 256u) {
3546
- let k_base = (kp * H + head) * D;
3547
- var dot: f32 = 0.0;
3548
- for (var d: u32 = 0u; d < D; d++) {
3549
- dot += q[q_base + d] * k[k_base + d];
3550
- }
3551
- let s = dot * scale;
3552
- shared_scores[tid] = s; // temp store (only valid for this kp)
3553
- max_score = max(max_score, s);
3554
- }
3555
-
3556
- // For simplicity with variable S, use multi-pass approach:
3557
- // Pass 1: find max, Pass 2: compute exp and sum, Pass 3: weighted V sum
3558
- // Since head_dim is small (64), we can accumulate V in registers.
3559
 
3560
- // Re-compute scores and do online softmax + V accumulation
3561
  var running_max: f32 = -1e30;
3562
  var running_sum: f32 = 0.0;
3563
- var acc = array<f32, 128>(); // max head_dim = 128
3564
 
3565
  for (var kp: u32 = 0u; kp < S; kp++) {
3566
  let k_base = (kp * H + head) * D;
3567
- var dot: f32 = 0.0;
3568
- for (var d: u32 = 0u; d < D; d++) {
3569
- dot += q[q_base + d] * k[k_base + d];
3570
- }
3571
- let s = dot * scale;
3572
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3573
  let old_max = running_max;
3574
  running_max = max(running_max, s);
3575
- let correction = exp(old_max - running_max);
3576
-
3577
- // Correct previous accumulation
3578
- running_sum = running_sum * correction;
3579
- for (var d: u32 = 0u; d < D; d++) {
3580
- acc[d] = acc[d] * correction;
3581
- }
3582
 
3583
  let w = exp(s - running_max);
3584
  running_sum += w;
3585
 
 
3586
  let v_base = (kp * H + head) * D;
3587
- for (var d: u32 = 0u; d < D; d++) {
3588
- acc[d] += w * v[v_base + d];
3589
- }
3590
- }
3591
-
3592
- // Write output (only thread 0 does the work since this is sequential per-query)
3593
- if (tid == 0u) {
3594
- let inv_sum = 1.0 / running_sum;
3595
- let out_base = (q_pos * H + head) * D;
3596
- for (var d: u32 = 0u; d < D; d++) {
3597
- output[out_base + d] = acc[d] * inv_sum;
3598
- }
3599
- }
3600
- }`,Br=`
3601
  @group(0) @binding(0) var<storage, read_write> a: array<f32>;
3602
  @group(0) @binding(1) var<storage, read> b: array<f32>;
3603
  @group(0) @binding(2) var<uniform> len: u32;
@@ -3607,7 +3611,7 @@ fn main(@builtin(global_invocation_id) gid: vec3u) {
3607
  let i = gid.x;
3608
  if (i >= len) { return; }
3609
  a[i] += b[i];
3610
- }`,Dr=`
3611
  struct Params { H: u32, }
3612
 
3613
  @group(0) @binding(0) var<storage, read> vision: array<f32>;
@@ -3625,4 +3629,4 @@ fn main(@builtin(local_invocation_id) lid: vec3u, @builtin(workgroup_id) wid: ve
3625
  for (var i = tid; i < H; i += 256u) {
3626
  embeds[pos * H + i] = vision[vit_idx * H + i];
3627
  }
3628
- }`,zr={gptq_matvec:k,gptq_matvec_f16:h,gptq_matvec_4t:pr,gptq_matvec_4t_f16:fr,gptq_splitk:x,reduce_splitk:q,bf16_matvec:y,rmsnorm:F,silu_mul:N,add:P,embedding:R,embed_from_argmax:K,argmax:A,topk_extract:B,kv_cache_store:z,gqa_attention_head:T,gqa_reduce:H,deltanet_recurrent:M,head_rmsnorm:G,head_rmsnorm_nogated:U,causal_conv1d:O,split:S,sigmoid_mul:E,fused_gate_up_silu:C,fused_gate_up_silu_f16:L,fused_gate_up_silu_4t:Q,fused_gate_up_silu_4t_f16:j,add_rmsnorm:V,add_rmsnorm_ro:W,three_way_add_rmsnorm:Y,norm_gptq_lite:or,norm_gptq_lite_noadd:nr,fused_sigmoid_gptq:X,fused_sigmoid_gptq_f16:$,fused_sigmoid_gptq_4t:er,fused_sigmoid_gptq_4t_f16:ur,fused_silu_gptq:Z,fused_silu_gptq_f16:J,fused_silu_gptq_4t:rr,fused_silu_gptq_4t_f16:ar,fused_addnorm_gate_up_silu:tr,rep_penalty:gr,gpu_sample:dr,append_token:D,fused_norm_gptq:ir,fused_norm_gptq_noadd:sr,fused_conv_deltanet_norm:I,fused_split_qknorm_kvstore:lr,gptq_matvec_4t_f16_sk:_r,gather_rows_bf16:cr,quantize_bf16_to_int4:wr,pack_f32_to_f16_pairs:br,gptq_matmul_b2:yr,gptq_matmul_b2_f16:Fr,gptq_matmul_b2_4t_f16:Nr,add_rmsnorm_b2:vr,add_rmsnorm_ro_b2:mr,three_way_add_rmsnorm_b2:kr,fused_gate_up_silu_b2_f16:hr,fused_silu_gptq_b2_f16:xr,fused_sigmoid_gptq_b2_f16:qr,vit_layernorm:Pr,vit_bf16_matvec_bias:Sr,vit_gelu_tanh:Er,vit_gelu:Rr,vit_rope:Kr,vit_attention:Ar,vit_add:Br,vit_scatter_embed:Dr},Mr=Object.freeze(Object.defineProperty({__proto__:null,SHADERS:zr,SHADER_FUSED_SPLIT_QKNORM_KVSTORE_FN:_},Symbol.toStringTag,{value:"Module"}));export{b as G,zr as S,Hr as _,_ as a,Mr as b,Tr as g};
 
1
+ (function(){const e=document.createElement("link").relList;if(e&&e.supports&&e.supports("modulepreload"))return;for(const r of document.querySelectorAll('link[rel="modulepreload"]'))t(r);new MutationObserver(r=>{for(const a of r)if(a.type==="childList")for(const i of a.addedNodes)i.tagName==="LINK"&&i.rel==="modulepreload"&&t(i)}).observe(document,{childList:!0,subtree:!0});function u(r){const a={};return r.integrity&&(a.integrity=r.integrity),r.referrerPolicy&&(a.referrerPolicy=r.referrerPolicy),r.crossOrigin==="use-credentials"?a.credentials="include":r.crossOrigin==="anonymous"?a.credentials="omit":a.credentials="same-origin",a}function t(r){if(r.ep)return;r.ep=!0;const a=u(r);fetch(r.href,a)}})();class b{constructor(){this.device=null,this.adapter=null,this.adapterInfo=null,this.pipelineCache=new Map,this.bufferCache=new Map,this.bindGroupCache=new Map}async init(){if(!navigator.gpu)throw new Error("WebGPU not supported");if(this.adapter=await navigator.gpu.requestAdapter({powerPreference:"high-performance"}),!this.adapter)throw new Error("No WebGPU adapter found");this.adapterInfo=await this.adapter.requestAdapterInfo?.()??{};const e={},u={maxBufferSize:4*1024*1024*1024,maxStorageBufferBindingSize:4*1024*1024*1024,maxComputeWorkgroupStorageSize:32768,maxComputeInvocationsPerWorkgroup:256,maxComputeWorkgroupSizeX:256,maxStorageBuffersPerShaderStage:10};for(const[r,a]of Object.entries(u))this.adapter.limits[r]!==void 0&&(e[r]=Math.min(a,this.adapter.limits[r]));const t=[];return this.adapter.features.has("shader-f16")&&t.push("shader-f16"),this.adapter.features.has("subgroups")&&t.push("subgroups"),this.device=await this.adapter.requestDevice({requiredLimits:e,requiredFeatures:t}),this.hasF16=this.device.features.has("shader-f16"),this.hasSubgroups=this.device.features.has("subgroups"),this.device.lost.then(r=>console.error("WebGPU device lost:",r)),this}createBuffer(e,u,t=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST){const r=Math.ceil(u/4)*4,a=this.device.createBuffer({size:r,usage:t,label:e});return this.bufferCache.set(e,a),a}createBufferFromData(e,u,t=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC){const r=this.createBuffer(e,u.byteLength,t);return this.device.queue.writeBuffer(r,0,u),r}createReadbackBuffer(e,u){const t=Math.ceil(u/4)*4;return this.device.createBuffer({size:t,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST,label:e+"_readback"})}getOrCreatePipeline(e,u,t="main"){if(this.pipelineCache.has(e))return this.pipelineCache.get(e);const r=this.device.createShaderModule({code:u,label:e}),a=this.device.createComputePipeline({layout:"auto",compute:{module:r,entryPoint:t},label:e});return this.pipelineCache.set(e,a),a}initTimestamps(){this.device.features.has("timestamp-query")&&(this._tsQuerySet=this.device.createQuerySet({type:"timestamp",count:2}),this._tsResolveBuf=this.device.createBuffer({size:16,usage:GPUBufferUsage.QUERY_RESOLVE|GPUBufferUsage.COPY_SRC}),this._tsReadBuf=this.device.createBuffer({size:16,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST}),this._tsEnabled=!0,this._tsResults=[])}beginBatch(){this._encoder=this.device.createCommandEncoder(),this._passCount=0,this.singlePassMode&&(this._singlePass=this._encoder.beginComputePass()),this._tsEnabled&&this._encoder.beginComputePass({timestampWrites:{querySet:this._tsQuerySet,beginningOfPassWriteIndex:0}}).end()}endBatch(){this._singlePass&&(this._singlePass.end(),this._singlePass=null),this._tsEnabled&&this._encoder&&(this._encoder.beginComputePass({timestampWrites:{querySet:this._tsQuerySet,beginningOfPassWriteIndex:1}}).end(),this._encoder.resolveQuerySet(this._tsQuerySet,0,2,this._tsResolveBuf,0),this._encoder.copyBufferToBuffer(this._tsResolveBuf,0,this._tsReadBuf,0,16)),this._encoder&&(this.device.queue.submit([this._encoder.finish()]),this._encoder=null)}async readTimestamp(){if(!this._tsEnabled)return null;await this._tsReadBuf.mapAsync(GPUMapMode.READ);const e=new BigInt64Array(this._tsReadBuf.getMappedRange().slice(0));this._tsReadBuf.unmap();const t=Number(e[1]-e[0])/1e6;return this._tsResults.push(t),t}copyBuffer(e,u,t,r=0,a=0){if(this._singlePass){this._singlePass.end(),this._encoder.copyBufferToBuffer(e,r,u,a,t),this._singlePass=this._encoder.beginComputePass();return}const i=this._encoder||this.device.createCommandEncoder();i.copyBufferToBuffer(e,r,u,a,t),this._encoder||this.device.queue.submit([i.finish()])}startRecording(){this._recording=[]}stopRecording(){const e=this._recording;return this._recording=null,e}replay(e,u){if(u)for(const r of u)this.device.queue.writeBuffer(r.buffer,r.offset,r.data,r.dataOffset,r.size);const t=this._encoder;for(let r=0;r<e.length;r++){const a=e[r];if(a.multi){const i=t.beginComputePass(),o=a.ops;for(let n=0;n<o.length;n++){const s=o[n];i.setPipeline(s.pipeline),i.setBindGroup(0,s.bindGroup),i.dispatchWorkgroups(s.wgX,s.wgY)}i.end()}else{const i=t.beginComputePass();i.setPipeline(a.pipeline),i.setBindGroup(0,a.bindGroup),i.dispatchWorkgroups(a.wgX,a.wgY),i.end()}}}dispatch(e,u,t,r=1,a=1){if(this._recording&&this._recording.push({pipeline:e,bindGroup:u[0],wgX:t,wgY:r}),this._singlePass){const n=this._singlePass;this._passCount!==void 0&&this._passCount++,n.setPipeline(e);for(let s=0;s<u.length;s++)n.setBindGroup(s,u[s]);n.dispatchWorkgroups(t,r,a);return}const i=this._encoder||this.device.createCommandEncoder(),o=i.beginComputePass();this._passCount!==void 0&&this._passCount++,o.setPipeline(e);for(let n=0;n<u.length;n++)o.setBindGroup(n,u[n]);o.dispatchWorkgroups(t,r,a),o.end(),this._encoder||this.device.queue.submit([i.finish()])}dispatchMulti(e){if(this._recording&&this._recording.push({multi:!0,ops:e.map(r=>({pipeline:r.pipeline,bindGroup:r.bindGroups[0],wgX:r.workgroupsX,wgY:r.workgroupsY||1}))}),this._singlePass){this._passCount!==void 0&&this._passCount++;for(const r of e){this._singlePass.setPipeline(r.pipeline);for(let a=0;a<r.bindGroups.length;a++)this._singlePass.setBindGroup(a,r.bindGroups[a]);this._singlePass.dispatchWorkgroups(r.workgroupsX,r.workgroupsY||1,r.workgroupsZ||1)}return}const u=this._encoder||this.device.createCommandEncoder(),t=u.beginComputePass();this._passCount!==void 0&&this._passCount++;for(const r of e){t.setPipeline(r.pipeline);for(let a=0;a<r.bindGroups.length;a++)t.setBindGroup(a,r.bindGroups[a]);t.dispatchWorkgroups(r.workgroupsX,r.workgroupsY||1,r.workgroupsZ||1)}t.end(),this._encoder||this.device.queue.submit([u.finish()])}async readBuffer(e,u){const t=this.createReadbackBuffer("_readback",u),r=this.device.createCommandEncoder();r.copyBufferToBuffer(e,0,t,0,u),this.device.queue.submit([r.finish()]),await t.mapAsync(GPUMapMode.READ);const a=new Float32Array(t.getMappedRange().slice(0));return t.unmap(),t.destroy(),a}createBindGroup(e,u,t){return this.device.createBindGroup({layout:e.getBindGroupLayout(u),entries:t.map((r,a)=>({binding:a,resource:{buffer:r}}))})}createBindGroupWithOffsets(e,u,t){return this.device.createBindGroup({layout:e.getBindGroupLayout(u),entries:t.map((r,a)=>({binding:a,resource:r.buffer?{buffer:r.buffer,offset:r.offset||0,size:r.size}:{buffer:r}}))})}getCachedBindGroup(e,u,t,r){let a=this.bindGroupCache.get(e);return a||(a=this.createBindGroup(u,t,r),this.bindGroupCache.set(e,a)),a}destroy(){for(const e of this.bufferCache.values())e.destroy();this.bufferCache.clear(),this.bindGroupCache.clear(),this.device?.destroy()}}const Tr=Object.freeze(Object.defineProperty({__proto__:null,GPUContext:b},Symbol.toStringTag,{value:"Module"})),v="modulepreload",m=function(d){return"/"+d},f={},Hr=function(e,u,t){let r=Promise.resolve();if(u&&u.length>0){let n=function(s){return Promise.all(s.map(l=>Promise.resolve(l).then(p=>({status:"fulfilled",value:p}),p=>({status:"rejected",reason:p}))))};document.getElementsByTagName("link");const i=document.querySelector("meta[property=csp-nonce]"),o=i?.nonce||i?.getAttribute("nonce");r=n(u.map(s=>{if(s=m(s),s in f)return;f[s]=!0;const l=s.endsWith(".css"),p=l?'[rel="stylesheet"]':"";if(document.querySelector(`link[href="${s}"]${p}`))return;const g=document.createElement("link");if(g.rel=l?"stylesheet":v,l||(g.as="script"),g.crossOrigin="",g.href=s,o&&g.setAttribute("nonce",o),document.head.appendChild(g),l)return new Promise((c,w)=>{g.addEventListener("load",c),g.addEventListener("error",()=>w(new Error(`Unable to preload CSS for ${s}`)))})}))}function a(i){const o=new Event("vite:preloadError",{cancelable:!0});if(o.payload=i,window.dispatchEvent(o),!o.defaultPrevented)throw i}return r.then(i=>{for(const o of i||[])o.status==="rejected"&&a(o.reason);return e().catch(a)})},k=`
2
  struct Params { K: u32, N: u32, group_size: u32, }
3
 
4
  @group(0) @binding(0) var<storage, read> input: array<f32>;
 
270
  let i = gid.x;
271
  if (i >= params.N) { return; }
272
  a[i] = a[i] + b[i];
273
+ }`,E=`
274
  struct Params { N: u32, num_heads: u32, head_dim: u32, }
275
 
276
  @group(0) @binding(0) var<storage, read> src: array<f32>;
 
287
  let d = i % hd;
288
  dst_a[i] = src[head * hd * 2u + d];
289
  dst_b[i] = src[head * hd * 2u + hd + d];
290
+ }`,S=`
291
  struct Params { N: u32, }
292
 
293
  @group(0) @binding(0) var<storage, read> x: array<f32>;
 
338
  if (i >= params.dim) { return; }
339
  let flat = argmax_result.idx * params.dim + i;
340
  output[i] = unpack_bf16(embeddings[flat / 2u], flat % 2u);
341
+ }`,B=`
342
  struct Params { N: u32, }
343
  struct Result { idx: u32, val: f32, }
344
 
 
381
  result.idx = s_idx[0];
382
  result.val = s_val[0];
383
  }
384
+ }`,D=`
385
  struct Params { N: u32, }
386
 
387
  @group(0) @binding(0) var<storage, read> logits: array<f32>;
 
406
  // Output (idx, val) pair — 256 candidates total
407
  result[tid * 2u] = best_idx;
408
  result[tid * 2u + 1u] = bitcast<u32>(best_val);
409
+ }`,A=`
410
  struct ArgmaxResult { idx: u32, val: f32, }
411
  struct Params { recent_count: u32, history_slot: u32, }
412
 
 
710
  let w = unpack_bf16(weight[i / 2u], i % 2u);
711
  x[off + i] = x[off + i] * rms * (1.0 + w);
712
  }
713
+ }`,O=`
714
  struct Params { num_heads: u32, head_dim: u32, eps: f32, }
715
 
716
  @group(0) @binding(0) var<storage, read_write> x: array<f32>;
 
749
  let w = unpack_bf16(weight[i / 2u], i % 2u);
750
  x[off + i] = x[off + i] * rms * w;
751
  }
752
+ }`,U=`
753
  struct Params { channels: u32, }
754
 
755
  @group(0) @binding(0) var<storage, read_write> x: array<f32>;
 
1746
  if (lane == 0u && col < N) {
1747
  output[col] = partial[tid] + partial[tid+1u] + partial[tid+2u] + partial[tid+3u];
1748
  }
1749
+ }`,tr=`
1750
  enable f16;
1751
 
1752
  struct Params { K: u32, N: u32, group_size: u32, }
 
1803
  if (lane == 0u && col < N) {
1804
  output[col] = partial[tid] + partial[tid+1u] + partial[tid+2u] + partial[tid+3u];
1805
  }
1806
+ }`,ur=`
1807
  struct Params {
1808
  K: u32, // hidden_size
1809
  N: u32, // intermediate_size
 
2404
  result.idx = wg_idx[selected];
2405
  result.val = wg_val[selected];
2406
  }
2407
+ }`;function _(d=320,e=1e7,u=33,t=30,r=128){return`
2408
  const ROPE_THETA: f32 = ${e};
2409
+ const MROPE_S1_LIMIT: u32 = ${u}u;
2410
+ const MROPE_S2_LIMIT: u32 = ${t}u;
2411
  const PARTIAL_DIM: u32 = ${r}u;
2412
 
2413
  struct Params {
 
3411
  let b = unpack_bf16(bias[i >> 1u], i);
3412
  output[base + i] = normalized * w + b;
3413
  }
3414
+ }`,Er=`
3415
  struct Params { K: u32, N: u32, }
3416
 
3417
  @group(0) @binding(0) var<storage, read> input: array<f32>;
 
3447
  let bp = bias[col >> 1u];
3448
  let b = bitcast<f32>(select((bp & 0xFFFFu) << 16u, bp & 0xFFFF0000u, (col & 1u) == 1u));
3449
  output[token * N + col] = sum + b;
3450
+ }`,Sr=`
3451
  @group(0) @binding(0) var<storage, read_write> data: array<f32>;
3452
  @group(0) @binding(1) var<uniform> len: u32;
3453
 
 
3515
 
3516
  q_out[idx] = q_in[idx] * c + sign * q_in[partner_idx] * s;
3517
  k_out[idx] = k_in[idx] * c + sign * k_in[partner_idx] * s;
3518
+ }`,Br=`
3519
  struct Params { seq_len: u32, num_heads: u32, head_dim: u32, scale: f32, }
3520
 
3521
  @group(0) @binding(0) var<storage, read> q: array<f32>;
 
3524
  @group(0) @binding(3) var<storage, read_write> output: array<f32>;
3525
  @group(0) @binding(4) var<uniform> params: Params;
3526
 
3527
+ var<workgroup> wg_dot: array<f32, 32>;
3528
 
3529
+ @compute @workgroup_size(32)
3530
  fn main(@builtin(local_invocation_id) lid: vec3u, @builtin(workgroup_id) wid: vec3u) {
3531
  let q_pos = wid.x;
3532
  let head = wid.y;
 
3536
  let D = params.head_dim;
3537
  let scale = params.scale;
3538
 
 
3539
  let q_base = (q_pos * H + head) * D;
3540
 
3541
+ // Each thread handles D elements with stride 32 (D=64→2, D=128→4 per thread)
3542
+ // Pre-load Q into registers
3543
+ var qc0: f32 = 0.0; var qc1: f32 = 0.0; var qc2: f32 = 0.0; var qc3: f32 = 0.0;
3544
+ qc0 = q[q_base + tid];
3545
+ if (tid + 32u < D) { qc1 = q[q_base + tid + 32u]; }
3546
+ if (tid + 64u < D) { qc2 = q[q_base + tid + 64u]; }
3547
+ if (tid + 96u < D) { qc3 = q[q_base + tid + 96u]; }
 
 
 
 
 
 
 
 
 
 
3548
 
3549
+ // Online softmax + V accumulation, parallelized across D dimension
3550
  var running_max: f32 = -1e30;
3551
  var running_sum: f32 = 0.0;
3552
+ var a0: f32 = 0.0; var a1: f32 = 0.0; var a2: f32 = 0.0; var a3: f32 = 0.0;
3553
 
3554
  for (var kp: u32 = 0u; kp < S; kp++) {
3555
  let k_base = (kp * H + head) * D;
 
 
 
 
 
3556
 
3557
+ // Partial dot product: each thread multiplies its D elements
3558
+ var pd: f32 = qc0 * k[k_base + tid];
3559
+ if (tid + 32u < D) { pd += qc1 * k[k_base + tid + 32u]; }
3560
+ if (tid + 64u < D) { pd += qc2 * k[k_base + tid + 64u]; }
3561
+ if (tid + 96u < D) { pd += qc3 * k[k_base + tid + 96u]; }
3562
+
3563
+ // Tree reduction: 32 → 1
3564
+ wg_dot[tid] = pd;
3565
+ workgroupBarrier();
3566
+ if (tid < 16u) { wg_dot[tid] += wg_dot[tid + 16u]; }
3567
+ workgroupBarrier();
3568
+ if (tid < 8u) { wg_dot[tid] += wg_dot[tid + 8u]; }
3569
+ workgroupBarrier();
3570
+ if (tid < 4u) { wg_dot[tid] += wg_dot[tid + 4u]; }
3571
+ workgroupBarrier();
3572
+ if (tid < 2u) { wg_dot[tid] += wg_dot[tid + 2u]; }
3573
+ workgroupBarrier();
3574
+ if (tid == 0u) { wg_dot[0] += wg_dot[1]; }
3575
+ workgroupBarrier();
3576
+
3577
+ let s = wg_dot[0] * scale;
3578
+
3579
+ // Online softmax (all threads have same s after reduction)
3580
  let old_max = running_max;
3581
  running_max = max(running_max, s);
3582
+ let corr = exp(old_max - running_max);
3583
+ running_sum = running_sum * corr;
3584
+ a0 = a0 * corr; a1 = a1 * corr; a2 = a2 * corr; a3 = a3 * corr;
 
 
 
 
3585
 
3586
  let w = exp(s - running_max);
3587
  running_sum += w;
3588
 
3589
+ // Accumulate weighted V for this thread's D elements
3590
  let v_base = (kp * H + head) * D;
3591
+ a0 += w * v[v_base + tid];
3592
+ if (tid + 32u < D) { a1 += w * v[v_base + tid + 32u]; }
3593
+ if (tid + 64u < D) { a2 += w * v[v_base + tid + 64u]; }
3594
+ if (tid + 96u < D) { a3 += w * v[v_base + tid + 96u]; }
3595
+ }
3596
+
3597
+ // Write output
3598
+ let inv_sum = 1.0 / running_sum;
3599
+ let out_base = (q_pos * H + head) * D;
3600
+ output[out_base + tid] = a0 * inv_sum;
3601
+ if (tid + 32u < D) { output[out_base + tid + 32u] = a1 * inv_sum; }
3602
+ if (tid + 64u < D) { output[out_base + tid + 64u] = a2 * inv_sum; }
3603
+ if (tid + 96u < D) { output[out_base + tid + 96u] = a3 * inv_sum; }
3604
+ }`,Dr=`
3605
  @group(0) @binding(0) var<storage, read_write> a: array<f32>;
3606
  @group(0) @binding(1) var<storage, read> b: array<f32>;
3607
  @group(0) @binding(2) var<uniform> len: u32;
 
3611
  let i = gid.x;
3612
  if (i >= len) { return; }
3613
  a[i] += b[i];
3614
+ }`,Ar=`
3615
  struct Params { H: u32, }
3616
 
3617
  @group(0) @binding(0) var<storage, read> vision: array<f32>;
 
3629
  for (var i = tid; i < H; i += 256u) {
3630
  embeds[pos * H + i] = vision[vit_idx * H + i];
3631
  }
3632
+ }`,zr={gptq_matvec:k,gptq_matvec_f16:h,gptq_matvec_4t:pr,gptq_matvec_4t_f16:fr,gptq_splitk:x,reduce_splitk:q,bf16_matvec:y,rmsnorm:F,silu_mul:N,add:P,embedding:R,embed_from_argmax:K,argmax:B,topk_extract:D,kv_cache_store:z,gqa_attention_head:T,gqa_reduce:H,deltanet_recurrent:M,head_rmsnorm:G,head_rmsnorm_nogated:O,causal_conv1d:U,split:E,sigmoid_mul:S,fused_gate_up_silu:C,fused_gate_up_silu_f16:L,fused_gate_up_silu_4t:Q,fused_gate_up_silu_4t_f16:j,add_rmsnorm:V,add_rmsnorm_ro:W,three_way_add_rmsnorm:Y,norm_gptq_lite:or,norm_gptq_lite_noadd:nr,fused_sigmoid_gptq:X,fused_sigmoid_gptq_f16:$,fused_sigmoid_gptq_4t:er,fused_sigmoid_gptq_4t_f16:tr,fused_silu_gptq:Z,fused_silu_gptq_f16:J,fused_silu_gptq_4t:rr,fused_silu_gptq_4t_f16:ar,fused_addnorm_gate_up_silu:ur,rep_penalty:gr,gpu_sample:dr,append_token:A,fused_norm_gptq:ir,fused_norm_gptq_noadd:sr,fused_conv_deltanet_norm:I,fused_split_qknorm_kvstore:lr,gptq_matvec_4t_f16_sk:_r,gather_rows_bf16:cr,quantize_bf16_to_int4:wr,pack_f32_to_f16_pairs:br,gptq_matmul_b2:yr,gptq_matmul_b2_f16:Fr,gptq_matmul_b2_4t_f16:Nr,add_rmsnorm_b2:vr,add_rmsnorm_ro_b2:mr,three_way_add_rmsnorm_b2:kr,fused_gate_up_silu_b2_f16:hr,fused_silu_gptq_b2_f16:xr,fused_sigmoid_gptq_b2_f16:qr,vit_layernorm:Pr,vit_bf16_matvec_bias:Er,vit_gelu_tanh:Sr,vit_gelu:Rr,vit_rope:Kr,vit_attention:Br,vit_add:Dr,vit_scatter_embed:Ar},Mr=Object.freeze(Object.defineProperty({__proto__:null,SHADERS:zr,SHADER_FUSED_SPLIT_QKNORM_KVSTORE_FN:_},Symbol.toStringTag,{value:"Module"}));export{b as G,zr as S,Hr as _,_ as a,Mr as b,Tr as g};
assets/{main-p04e1WzX.js → main-Cji2l4fL.js} RENAMED
@@ -1,4 +1,4 @@
1
- import{G as Pd}from"./gpu-ops-BbLjsC0p.js";import{Qwen35Model as Sd}from"./qwen35-model-7KVn_FLm.js";import{loadConfig as Cd,loadQuantConfig as Fd,loadModelWeights as Ad}from"./safetensors-loader-CwGm5mJX.js";class Wn{}let si=class{static create(){throw new Error("ONNX not available")}};class ni{}const ri={},Ld={Tensor:Wn,InferenceSession:si,OrtEnv:ni,env:ri},Id=Object.freeze(Object.defineProperty({__proto__:null,InferenceSession:si,OrtEnv:ni,Tensor:Wn,default:Ld,env:ri},Symbol.toStringTag,{value:"Module"}));var kn={},Od=Object.defineProperty,Wt=(e,t)=>{for(var s in t)Od(e,s,{get:t[s],enumerable:!0})},Ie={},Ze={},Nd={},zd="4.0.0-next.6",Hn=typeof self<"u",Vt=!li(Ie),ai=!li(Ze),Rs=Hn&&"caches"in self,Dd=typeof globalThis.Deno<"u",en=Dd&&Rs&&!Vt,oi=typeof process<"u",ii=oi&&process?.release?.name==="node"&&!en,Qn=typeof window<"u"&&typeof window.document<"u",Xn=Hn&&["DedicatedWorkerGlobalScope","ServiceWorkerGlobalScope","SharedWorkerGlobalScope"].includes(self.constructor?.name),Bd=Qn||Xn||en,Vd=ii||typeof navigator<"u"&&"gpu"in navigator,Gd=typeof navigator<"u"&&"ml"in navigator,$d=typeof crypto<"u"&&typeof crypto.getRandomValues=="function",Rd=typeof chrome<"u"&&typeof chrome.runtime<"u"&&typeof chrome.runtime.id=="string",jd=typeof ServiceWorkerGlobalScope<"u"&&Hn&&self instanceof ServiceWorkerGlobalScope,qd=()=>{if(typeof navigator>"u")return!1;const e=navigator.userAgent,s=(navigator.vendor||"").indexOf("Apple")>-1,n=!e.match(/CriOS|FxiOS|EdgiOS|OPiOS|mercury|brave/i)&&!e.includes("Chrome")&&!e.includes("Android");return s&&n},Ud=qd(),K=Object.freeze({IS_BROWSER_ENV:Qn,IS_WEBWORKER_ENV:Xn,IS_WEB_ENV:Bd,IS_SERVICE_WORKER_ENV:jd,IS_DENO_WEB_RUNTIME:en,IS_WEB_CACHE_AVAILABLE:Rs,IS_WEBGPU_AVAILABLE:Vd,IS_WEBNN_AVAILABLE:Gd,IS_SAFARI:Ud,IS_PROCESS_AVAILABLE:oi,IS_NODE_ENV:ii,IS_FS_AVAILABLE:Vt,IS_PATH_AVAILABLE:ai,IS_CRYPTO_AVAILABLE:$d,IS_CHROME_AVAILABLE:Rd}),Kn=Vt&&ai,js="./";if(Kn){const e=Object(import.meta).url;e?js=Ze.dirname(Ze.dirname(Nd.fileURLToPath(e))):typeof __dirname<"u"&&(js=Ze.dirname(__dirname))}var Wd=Kn?Ze.join(js,"/.cache/"):null,co="/models/",Hd=Kn?Ze.join(js,co):co,Qd=typeof globalThis.fetch=="function"?globalThis.fetch.bind(globalThis):void 0,$e=Object.freeze({DEBUG:10,INFO:20,WARNING:30,ERROR:40,NONE:50}),uo=$e.WARNING,te={version:zd,backends:{onnx:{}},get logLevel(){return uo},set logLevel(e){uo=e,te.backends.onnx?.setLogLevel?.(e)},allowRemoteModels:!0,remoteHost:"https://huggingface.co/",remotePathTemplate:"{model}/resolve/{revision}/",allowLocalModels:!(Qn||Xn||en),localModelPath:Hd,useFS:Vt,useBrowserCache:Rs,useFSCache:Vt,cacheDir:Wd,useCustomCache:!1,customCache:null,useWasmCache:Rs||Vt,cacheKey:"transformers-cache",fetch:Qd};function li(e){return Object.keys(e).length===0}function Dt(e,t){e&&e(t)}function Xd(e){return Number.isInteger(e)||typeof e=="bigint"}function _o(e){return e==null||e===-1}function ho(e){const t=[];let s=e;for(;Array.isArray(s);)t.push(s.length),s=s[0];return t}function et(...e){return Array.prototype.concat.apply([],e)}function qs(e,t){return Math.abs((e+t)%(2*t)-t)}function Ce(e,t){return Object.assign({},...t.map(s=>{if(e[s]!==void 0)return{[s]:e[s]}}))}function Kd(e,t){let s=0;for(const n of e)n===t&&++s;return s}var J={error(...e){te.logLevel<=$e.ERROR&&console.error(...e)},warn(...e){te.logLevel<=$e.WARNING&&console.warn(...e)},info(...e){te.logLevel<=$e.INFO&&console.log(...e)},debug(...e){te.logLevel<=$e.DEBUG&&console.log(...e)},log(...e){this.info(...e)}},Jd=class{constructor(e){this.trie=this._build_trie(e)}_build_trie(e){const t=Object.create(null);for(const s of e){let n=t;for(let r=0;r<s.length;++r){const a=s[r];n=n[a]??=Object.create(null)}n.end=s}return t}split(e){const t=[],s=e.length;let n=0,r=0;for(;r<s;){let a=this.trie,o=null,i=r;for(;i<s&&(a=a[e[i]]);)a.end&&(o=a.end),++i;o?(r>n&&t.push(e.slice(n,r)),t.push(o),r+=o.length,n=r):++r}return n<s&&t.push(e.slice(n)),t}},po=Jd,Yd=class{constructor(e){this.content=e.content,this.id=e.id,this.single_word=e.single_word??!1,this.lstrip=e.lstrip??!1,this.rstrip=e.rstrip??!1,this.special=e.special??!1,this.normalized=e.normalized??!this.special}},Zd=Yd,ci=(()=>{const e=[...Array.from({length:94},(r,a)=>a+33),...Array.from({length:12},(r,a)=>a+161),...Array.from({length:82},(r,a)=>a+174)],t=e.slice();let s=0;for(let r=0;r<256;++r)e.includes(r)||(e.push(r),t.push(256+s),s+=1);const n=t.map(r=>String.fromCharCode(r));return Object.fromEntries(e.map((r,a)=>[r,n[a]]))})(),eu=e=>Object.fromEntries(Object.entries(e).map(([t,s])=>[s,t])),tu=eu(ci),fo=".,!?…。,、।۔،",su=new Map([["(?i:'s|'t|'re|'ve|'m|'ll|'d)","(?:'([sS]|[tT]|[rR][eE]|[vV][eE]|[mM]|[lL][lL]|[dD]))"],["(?i:[sdmt]|ll|ve|re)","(?:[sS]|[dD]|[mM]|[tT]|[lL][lL]|[vV][eE]|[rR][eE])"],["[^\\r\\n\\p{L}\\p{N}]?+","[^\\r\\n\\p{L}\\p{N}]?"],["[^\\s\\p{L}\\p{N}]++","[^\\s\\p{L}\\p{N}]+"],["(?>\\p{Nd}{510})","(?:\\p{Nd}{510})"],["\\p{Nd}{3}+","(?:\\p{Nd}{3})+"],["\\G",""],[` ?[^(\\s|[${fo}])]+`,` ?[^\\s${fo}]+`]]),Us="\\p{P}\\u0021-\\u002F\\u003A-\\u0040\\u005B-\\u0060\\u007B-\\u007E",Jn=e=>e.replace(/ \./g,".").replace(/ \?/g,"?").replace(/ \!/g,"!").replace(/ ,/g,",").replace(/ \' /g,"'").replace(/ n't/g,"n't").replace(/ 'm/g,"'m").replace(/ 's/g,"'s").replace(/ 've/g,"'ve").replace(/ 're/g,"'re"),tn=(e,t=!0)=>{if(e.Regex!==void 0){let s=e.Regex.replace(/\\([#&~])/g,"$1");s=s.replace(/\\A/g,"^").replace(/\\z/g,"$").replace(/\\Z/g,"(?=\\r?\\n?$)");for(const[n,r]of su)s=s.replaceAll(n,r);try{return new RegExp(s,"gu")}catch(n){if(!(n instanceof SyntaxError)||!n.message.toLowerCase().includes("invalid property name"))throw n;let r=!1;const a=s.replace(/(\\[pP])\{([^}=]+)\}/g,(o,i,l)=>{try{return new RegExp(`\\p{${l}}`,"u"),`${i}{${l}}`}catch{return r=!0,`${i}{Script=${l}}`}});if(!r)throw n;try{return new RegExp(a,"gu")}catch{throw n}}}else if(e.String!==void 0){const s=nu(e.String);return new RegExp(t?s:`(${s})`,"gu")}else return console.warn("Unknown pattern type:",e),null},nu=e=>e.replace(/[.*+?^${}()|[\]\\]/g,"\\$&"),ru=(e,t,s)=>{const n=[];let r=0;for(;r<e.length;){if(n.push(e[r]),(t.get(e[r])??s)!==s){++r;continue}for(;++r<e.length&&(t.get(e[r])??s)===s;)t.get(n.at(-1))!==s&&(n[n.length-1]+=e[r])}return n},au=e=>e>=19968&&e<=40959||e>=13312&&e<=19903||e>=131072&&e<=173791||e>=173824&&e<=177983||e>=177984&&e<=178207||e>=178208&&e<=183983||e>=63744&&e<=64255||e>=194560&&e<=195103,ou=e=>Number.isInteger(e)||typeof e=="bigint",iu=e=>{let t=0;for(const s of e)++t;return t},lu=e=>di(e.toLowerCase()),Re=(...e)=>Array.prototype.concat.apply([],e),Yn=e=>new Map(Object.entries(e)),cu=(e,t)=>{const s=[];let n=0;for(const r of e.matchAll(t)){const a=r[0];n<r.index&&s.push(e.slice(n,r.index)),a.length>0&&s.push(a),n=r.index+a.length}return n<e.length&&s.push(e.slice(n)),s},di=e=>e.replace(new RegExp("\\p{M}","gu"),""),mo=(e,t,s=[])=>{if(!e||Array.isArray(e)||typeof e!="object")return`${t} must be a valid object`;for(const n of s)if(!(n in e))return`${t} must contain a "${n}" property`;return null},du=e=>e.match(/\S+/g)||[],uu=class{constructor(){const e=function(...t){return e._call(...t)};return Object.setPrototypeOf(e,new.target.prototype)}},ws=uu,_u=class extends ws{constructor(e){super(),this.config=e}_call(e){return this.normalize(e)}},it=_u,hu=class extends it{tokenize_chinese_chars(e){const t=[];for(let s=0;s<e.length;++s){const n=e[s],r=n.charCodeAt(0);au(r)?(t.push(" "),t.push(n),t.push(" ")):t.push(n)}return t.join("")}strip_accents(e){return e.normalize("NFD").replace(new RegExp("\\p{Mn}","gu"),"")}is_control(e){switch(e){case" ":case`
2
  `:case"\r":return!1;default:return new RegExp("^\\p{Cc}|\\p{Cf}|\\p{Co}|\\p{Cs}$","u").test(e)}}clean_text(e){const t=[];for(const s of e){const n=s.charCodeAt(0);n===0||n===65533||this.is_control(s)||(/^\s$/.test(s)?t.push(" "):t.push(s))}return t.join("")}normalize(e){return this.config.clean_text&&(e=this.clean_text(e)),this.config.handle_chinese_chars&&(e=this.tokenize_chinese_chars(e)),this.config.lowercase?(e=e.toLowerCase(),this.config.strip_accents!==!1&&(e=this.strip_accents(e))):this.config.strip_accents&&(e=this.strip_accents(e)),e}},pu=hu,fu=class extends it{constructor(e){super(e),this.charsmap=e.precompiled_charsmap??null}normalize(e){return e=e.replace(/[\u0001-\u0008\u000B\u000E-\u001F\u007F\u008F\u009F]/gm,""),e=e.replace(/[\u0009\u000A\u000C\u000D\u00A0\u1680\u2000-\u200F\u2028\u2029\u202F\u205F\u2581\u3000\uFEFF\uFFFD]/gm," "),e.includes("~")?e=e.split("~").map(s=>s.normalize("NFKC")).join("~"):e=e.normalize("NFKC"),e}},mu=fu,gu=class extends it{constructor(e){super(e),this.normalizers=(e.normalizers??[]).map(t=>ui(t))}normalize(e){return this.normalizers.reduce((t,s)=>s?s.normalize(t):t,e)}},wu=gu,vu=class extends it{normalize(e){const t=tn(this.config.pattern??{});return t===null?e:e.replaceAll(t,this.config.content??"")}},Mu=vu,xu=class extends it{constructor(){super(...arguments),this.form="NFC"}normalize(e){return e=e.normalize(this.form),e}},sn=xu,yu=class extends sn{constructor(){super(...arguments),this.form="NFC"}},bu=yu,ku=class extends sn{constructor(){super(...arguments),this.form="NFD"}},Tu=ku,Eu=class extends sn{constructor(){super(...arguments),this.form="NFKC"}},Pu=Eu,Su=class extends sn{constructor(){super(...arguments),this.form="NFKD"}},Cu=Su,Fu=class extends it{normalize(e){return this.config.strip_left&&this.config.strip_right?e=e.trim():(this.config.strip_left&&(e=e.trimStart()),this.config.strip_right&&(e=e.trimEnd())),e}},Au=Fu,Lu=class extends it{normalize(e){return di(e)}},Iu=Lu,Ou=class extends it{normalize(e){return e.toLowerCase()}},Nu=Ou,zu=class extends it{normalize(e){return e=this.config.prepend+e,e}},Du=zu;function Bu(e){if(e===null)return null;switch(e.type){case"BertNormalizer":return new pu(e);case"Precompiled":return new mu(e);case"Sequence":return new wu(e);case"Replace":return new Mu(e);case"NFC":return new bu(e);case"NFD":return new Tu(e);case"NFKC":return new Pu(e);case"NFKD":return new Cu(e);case"Strip":return new Au(e);case"StripAccents":return new Iu(e);case"Lowercase":return new Nu(e);case"Prepend":return new Du(e);default:throw new Error(`Unknown Normalizer type: ${e.type}`)}}var ui=Bu,Vu=class extends ws{pre_tokenize(e,t){return(Array.isArray(e)?e.map(s=>this.pre_tokenize_text(s,t)):this.pre_tokenize_text(e,t)).flat()}_call(e,t){return this.pre_tokenize(e,t)}},qe=Vu,Gu=class extends qe{constructor(e){super(),this.config=e,this.add_prefix_space=this.config.add_prefix_space??!1,this.trim_offsets=this.config.trim_offsets??!1,this.use_regex=this.config.use_regex??!0,this.pattern=new RegExp("'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+","gu"),this.byte_encoder=ci,this.text_encoder=new TextEncoder}pre_tokenize_text(e,t){return this.add_prefix_space&&!e.startsWith(" ")&&(e=" "+e),(this.use_regex?e.match(this.pattern)||[]:[e]).map(n=>Array.from(this.text_encoder.encode(n),r=>this.byte_encoder[r]).join(""))}},$u=Gu,Ru=class extends qe{pre_tokenize_text(e,t){return e.match(/\w+|[^\w\s]+/g)||[]}},ju=Ru,qu=class extends qe{constructor(e){super(),this.replacement=e.replacement??"▁",this.str_rep=e.str_rep||this.replacement,this.prepend_scheme=e.prepend_scheme??"always"}pre_tokenize_text(e,t){const{section_index:s=void 0}=t??{};let n=e.replaceAll(" ",this.str_rep);return!n.startsWith(this.replacement)&&(this.prepend_scheme==="always"||this.prepend_scheme==="first"&&s===0)&&(n=this.str_rep+n),[n]}},Uu=qu,Wu=class extends qe{constructor(e){super(),this.config=e,this.pattern=tn(this.config.pattern??{},this.config.invert??!0)}pre_tokenize_text(e){return this.pattern===null?[]:this.config.invert?e.match(this.pattern)||[]:this.config.behavior?.toLowerCase()==="removed"?e.split(this.pattern).filter(t=>t):cu(e,this.pattern)}},Hu=Wu,Qu=class extends qe{constructor(e){super(),this.config=e,this.pattern=new RegExp(`[^${Us}]+|[${Us}]+`,"gu")}pre_tokenize_text(e){return e.match(this.pattern)||[]}},Xu=Qu,Ku=class extends qe{constructor(e){super(),this.config=e;const t=`[^\\d]+|\\d${this.config.individual_digits?"":"+"}`;this.pattern=new RegExp(t,"gu")}pre_tokenize_text(e){return e.match(this.pattern)||[]}},Ju=Ku,Yu=class extends qe{constructor(){super(),this.pattern=new RegExp(`[^\\s${Us}]+|[${Us}]`,"gu")}pre_tokenize_text(e,t){return e.trim().match(this.pattern)||[]}},Zu=Yu,e_=class extends qe{constructor(e){super(),this.config=e,this.pattern=tn(this.config.pattern??{}),this.content=this.config.content??""}pre_tokenize_text(e){return this.pattern===null?[e]:[e.replaceAll(this.pattern,this.config.content??"")]}},t_=e_,s_=class extends qe{constructor(e){super(),this.tokenizers=(e.pretokenizers??[]).map(t=>_i(t))}pre_tokenize_text(e,t){return this.tokenizers.reduce((s,n)=>n?n.pre_tokenize(s,t):s,[e])}},n_=s_,r_=class extends qe{pre_tokenize_text(e){return du(e)}},a_=r_,o_=class extends qe{constructor(e){super(),this.config=e,this._length=e.length}pre_tokenize_text(e){const t=[];for(let s=0;s<e.length;s+=this._length)t.push(e.slice(s,s+this._length));return t}},i_=o_;function l_(e){if(e===null)return null;switch(e.type){case"BertPreTokenizer":return new Zu;case"Sequence":return new n_(e);case"Whitespace":return new ju;case"WhitespaceSplit":return new a_;case"Metaspace":return new Uu(e);case"ByteLevel":return new $u(e);case"Split":return new Hu(e);case"Punctuation":return new Xu(e);case"Digits":return new Ju(e);case"Replace":return new t_(e);case"FixedLength":return new i_(e);default:throw new Error(`Unknown PreTokenizer type: ${e.type}`)}}var _i=l_,c_=class extends ws{constructor(e){super(),this.config=e,this.vocab=[],this.tokens_to_ids=new Map,this.unk_token_id=void 0,this.unk_token=void 0,this.end_of_word_suffix=void 0,this.fuse_unk=this.config.fuse_unk??!1}_call(e){let t=this.encode(e);return this.fuse_unk&&(t=ru(t,this.tokens_to_ids,this.unk_token_id)),t}},nn=c_,d_=class extends nn{constructor(e){super(e),this.max_input_chars_per_word=100,this.tokens_to_ids=Yn(e.vocab),this.unk_token_id=this.tokens_to_ids.get(e.unk_token),this.unk_token=e.unk_token,this.max_input_chars_per_word=e.max_input_chars_per_word??100,this.vocab=new Array(this.tokens_to_ids.size);for(const[t,s]of this.tokens_to_ids)this.vocab[s]=t}encode(e){const t=[];for(const s of e){const n=[...s];if(n.length>this.max_input_chars_per_word){t.push(this.unk_token);continue}let r=!1,a=0;const o=[];for(;a<n.length;){let i=n.length,l=null;for(;a<i;){let c=n.slice(a,i).join("");if(a>0&&(c=this.config.continuing_subword_prefix+c),this.tokens_to_ids.has(c)){l=c;break}--i}if(l===null){r=!0;break}o.push(l),a=i}r?t.push(this.unk_token):t.push(...o)}return t}},go=d_,wo=class hi{constructor(t,s){this.is_leaf=t,this.children=s}static default(){return new hi(!1,new Map)}},u_=class{constructor(){this.root=wo.default()}extend(e){for(const t of e)this.push(t)}push(e){let t=this.root;for(const s of e){let n=t.children.get(s);n===void 0&&(n=wo.default(),t.children.set(s,n)),t=n}t.is_leaf=!0}*common_prefix_search(e){let t=this.root;if(t===void 0)return;let s="";for(const n of e){if(s+=n,t=t.children.get(n),t===void 0)return;t.is_leaf&&(yield s)}}},__=u_,Tn=class pi{constructor(t,s,n,r,a){this.token_id=t,this.node_id=s,this.pos=n,this.length=r,this.score=a,this.prev=null,this.backtrace_score=0}clone(){const t=new pi(this.token_id,this.node_id,this.pos,this.length,this.score);return t.prev=this.prev,t.backtrace_score=this.backtrace_score,t}},h_=class{constructor(e,t,s){this.chars=Array.from(e),this.len=this.chars.length,this.bos_token_id=t,this.eos_token_id=s,this.nodes=[],this.begin_nodes=Array.from({length:this.len+1},()=>[]),this.end_nodes=Array.from({length:this.len+1},()=>[]);const n=new Tn(this.bos_token_id??0,0,0,0,0),r=new Tn(this.eos_token_id??0,1,this.len,0,0);this.nodes.push(n.clone()),this.nodes.push(r.clone()),this.begin_nodes[this.len].push(r),this.end_nodes[0].push(n)}insert(e,t,s,n){const r=this.nodes.length,a=new Tn(n,r,e,t,s);this.begin_nodes[e].push(a),this.end_nodes[e+t].push(a),this.nodes.push(a)}viterbi(){const e=this.len;let t=0;for(;t<=e;){if(this.begin_nodes[t].length==0)return[];for(let o of this.begin_nodes[t]){o.prev=null;let i=0,l=null;for(let c of this.end_nodes[t]){const d=c.backtrace_score+o.score;(l===null||d>i)&&(l=c.clone(),i=d)}if(l!==null)o.prev=l,o.backtrace_score=i;else return[]}++t}const s=[],r=this.begin_nodes[e][0].prev;if(r===null)return[];let a=r.clone();for(;a.prev!==null;)s.push(a.clone()),a=a.clone().prev.clone();return s.reverse(),s}piece(e){return this.chars.slice(e.pos,e.pos+e.length).join("")}tokens(){return this.viterbi().map(t=>this.piece(t))}token_ids(){return this.viterbi().map(t=>t.token_id)}},p_=h_;function f_(e){if(e.length===0)throw new Error("Array must not be empty");let t=e[0],s=0;for(let n=1;n<e.length;++n)e[n]<t&&(t=e[n],s=n);return[t,s]}var m_=class extends nn{constructor(e,t){super(e);const s=e.vocab.length;this.vocab=new Array(s),this.scores=new Array(s);for(let n=0;n<s;++n)[this.vocab[n],this.scores[n]]=e.vocab[n];this.unk_token_id=e.unk_id,this.unk_token=this.vocab[e.unk_id],this.tokens_to_ids=new Map(this.vocab.map((n,r)=>[n,r])),this.bos_token=" ",this.bos_token_id=this.tokens_to_ids.get(this.bos_token),this.eos_token=t,this.eos_token_id=this.tokens_to_ids.get(this.eos_token),this.unk_token=this.vocab[this.unk_token_id],this.min_score=f_(this.scores)[0],this.unk_score=this.min_score-10,this.scores[this.unk_token_id]=this.unk_score,this.trie=new __,this.trie.extend(this.vocab),this.fuse_unk=!0}populate_nodes(e){const t=e.chars,s=1;let n=0;for(;n<t.length;){let r=!1;const a=t.slice(n).join(""),o=this.trie.common_prefix_search(a);for(const i of o){const l=this.tokens_to_ids.get(i),c=this.scores[l],d=iu(i);e.insert(n,d,c,l),!r&&d===s&&(r=!0)}r||e.insert(n,s,this.unk_score,this.unk_token_id),n+=s}}tokenize(e){const t=new p_(e,this.bos_token_id,this.eos_token_id);return this.populate_nodes(t),t.tokens()}encode(e){const t=[];for(const s of e){const n=this.tokenize(s);t.push(...n)}return t}},vo=m_,g_=class{constructor(e=(s,n)=>s>n,t=1/0){this._heap=[],this._comparator=e,this._max_size=t}get size(){return this._heap.length}is_empty(){return this.size===0}peek(){return this._heap[0]}push(...e){return this.extend(e)}extend(e){for(const t of e)if(this.size<this._max_size)this._heap.push(t),this._sift_up();else{const s=this._smallest();this._comparator(t,this._heap[s])&&(this._heap[s]=t,this._sift_up_from(s))}return this.size}pop(){const e=this.peek(),t=this.size-1;return t>0&&this._swap(0,t),this._heap.pop(),this._sift_down(),e}replace(e){const t=this.peek();return this._heap[0]=e,this._sift_down(),t}_parent(e){return(e+1>>>1)-1}_left(e){return(e<<1)+1}_right(e){return e+1<<1}_greater(e,t){return this._comparator(this._heap[e],this._heap[t])}_swap(e,t){const s=this._heap[e];this._heap[e]=this._heap[t],this._heap[t]=s}_sift_up(){this._sift_up_from(this.size-1)}_sift_up_from(e){for(;e>0&&this._greater(e,this._parent(e));)this._swap(e,this._parent(e)),e=this._parent(e)}_sift_down(){let e=0;for(;this._left(e)<this.size&&this._greater(this._left(e),e)||this._right(e)<this.size&&this._greater(this._right(e),e);){const t=this._right(e)<this.size&&this._greater(this._right(e),this._left(e))?this._right(e):this._left(e);this._swap(e,t),e=t}}_smallest(){return 2**Math.floor(Math.log2(this.size))-1}},w_=g_,v_=class{constructor(e){this.capacity=e,this.cache=new Map}get(e){if(!this.cache.has(e))return;const t=this.cache.get(e);return this.cache.delete(e),this.cache.set(e,t),t}put(e,t){this.cache.has(e)&&this.cache.delete(e),this.cache.set(e,t),this.cache.size>this.capacity&&this.cache.delete(this.cache.keys().next().value)}clear(){this.cache.clear()}},M_=v_,x_=class extends nn{constructor(e){super(e),this.tokens_to_ids=Yn(e.vocab),this.unk_token_id=this.tokens_to_ids.get(e.unk_token),this.unk_token=e.unk_token,this.vocab=new Array(this.tokens_to_ids.size);for(const[s,n]of this.tokens_to_ids)this.vocab[n]=s;const t=Array.isArray(e.merges[0]);this.merges=t?e.merges:e.merges.map(s=>s.split(" ",2)),this.bpe_ranks=new Map(this.merges.map((s,n)=>[JSON.stringify(s),n])),this.end_of_word_suffix=e.end_of_word_suffix,this.continuing_subword_suffix=e.continuing_subword_suffix??null,this.byte_fallback=this.config.byte_fallback??!1,this.byte_fallback&&(this.text_encoder=new TextEncoder),this.ignore_merges=this.config.ignore_merges??!1,this.max_length_to_cache=256,this.cache_capacity=1e4,this.cache=new M_(this.cache_capacity)}clear_cache(){this.cache.clear()}bpe(e){if(e.length===0)return[];const t=this.cache.get(e);if(t!==void 0)return t;const s=Array.from(e);this.end_of_word_suffix&&(s[s.length-1]+=this.end_of_word_suffix);let n=[];if(s.length>1){const r=new w_((i,l)=>i.score<l.score);let a={token:s[0],bias:0,prev:null,next:null},o=a;for(let i=1;i<s.length;++i){const l={bias:i/s.length,token:s[i],prev:o,next:null};o.next=l,this.add_node(r,o),o=l}for(;!r.is_empty();){const i=r.pop();if(i.deleted||!i.next||i.next.deleted)continue;if(i.deleted=!0,i.next.deleted=!0,i.prev){const c={...i.prev};i.prev.deleted=!0,i.prev=c,c.prev?c.prev.next=c:a=c}const l={token:i.token+i.next.token,bias:i.bias,prev:i.prev,next:i.next.next};l.prev?(l.prev.next=l,this.add_node(r,l.prev)):a=l,l.next&&(l.next.prev=l,this.add_node(r,l))}for(let i=a;i!==null;i=i.next)n.push(i.token)}else n=s;if(this.continuing_subword_suffix)for(let r=0;r<n.length-1;++r)n[r]+=this.continuing_subword_suffix;return e.length<this.max_length_to_cache&&this.cache.put(e,n),n}add_node(e,t){const s=this.bpe_ranks.get(JSON.stringify([t.token,t.next.token]));s!==void 0&&(t.score=s+t.bias,e.push(t))}encode(e){const t=[];for(const s of e){if(this.ignore_merges&&this.tokens_to_ids.has(s)){t.push(s);continue}const n=this.bpe(s);for(const r of n)if(this.tokens_to_ids.has(r))t.push(r);else if(this.byte_fallback){const a=Array.from(this.text_encoder.encode(r)).map(o=>`<0x${o.toString(16).toUpperCase().padStart(2,"0")}>`);a.every(o=>this.tokens_to_ids.has(o))?t.push(...a):t.push(this.unk_token)}else t.push(this.unk_token)}return t}},Mo=x_,y_=class extends nn{constructor(e,t){super(e);const s=e.vocab;this.tokens_to_ids=Yn(t.target_lang?s[t.target_lang]:s),this.bos_token=t.bos_token,this.bos_token_id=this.tokens_to_ids.get(this.bos_token),this.eos_token=t.eos_token,this.eos_token_id=this.tokens_to_ids.get(this.eos_token),this.pad_token=t.pad_token,this.pad_token_id=this.tokens_to_ids.get(this.pad_token),this.unk_token=t.unk_token,this.unk_token_id=this.tokens_to_ids.get(this.unk_token),this.vocab=new Array(this.tokens_to_ids.size);for(const[n,r]of this.tokens_to_ids)this.vocab[r]=n}encode(e){return e}},b_=y_;function k_(e,t){switch(e.type){case"WordPiece":return new go(e);case"Unigram":return new vo(e,t.eos_token);case"BPE":return new Mo(e);default:if(e.vocab)return Array.isArray(e.vocab)?new vo(e,t.eos_token):Object.hasOwn(e,"continuing_subword_prefix")&&Object.hasOwn(e,"unk_token")?Object.hasOwn(e,"merges")?new Mo(e):new go(e):new b_(e,{target_lang:t.target_lang,bos_token:t.bos_token,eos_token:t.eos_token,pad_token:t.pad_token,unk_token:t.unk_token});throw new Error(`Unknown TokenizerModel type: ${e?.type}`)}}var T_=k_,E_=class extends ws{constructor(e){super(),this.config=e}_call(e,...t){return this.post_process(e,...t)}},vs=E_,P_=class extends vs{post_process(e,t=null,s=!0){const n=t===null?this.config.single:this.config.pair;let r=[],a=[];for(const o of n)"SpecialToken"in o?s&&(r.push(o.SpecialToken.id),a.push(o.SpecialToken.type_id)):"Sequence"in o&&(o.Sequence.id==="A"?(r=Re(r,e),a=Re(a,new Array(e.length).fill(o.Sequence.type_id))):o.Sequence.id==="B"&&(r=Re(r,t),a=Re(a,new Array(t.length).fill(o.Sequence.type_id))));return{tokens:r,token_type_ids:a}}},S_=P_,C_=class extends vs{post_process(e,t=null){return{tokens:e,tokens_pair:t}}},F_=C_,A_=class extends vs{constructor(e){super(e),this.sep=e.sep,this.cls=e.cls}post_process(e,t=null,s=!0){s&&(e=Re([this.cls[0]],e,[this.sep[0]]));let n=new Array(e.length).fill(0);if(t){const r=[],a=s?[this.sep[0]]:[];e=Re(e,r,t,a),n=Re(n,new Array(t.length+r.length+a.length).fill(1))}return{tokens:e,token_type_ids:n}}},L_=A_,I_=class extends vs{constructor(e){super(e),this.sep=e.sep,this.cls=e.cls}post_process(e,t,s=!0){s&&(e=Re([this.cls[0]],e,[this.sep[0]]));let n=new Array(e.length).fill(0);if(t){const r=s?[this.sep[0]]:[],a=s?[this.sep[0]]:[];e=Re(e,r,t,a),n=Re(n,new Array(t.length+r.length+a.length).fill(1))}return{tokens:e,token_type_ids:n}}},O_=I_,N_=class extends vs{constructor(e){super(e),this.processors=(e.processors??[]).map(t=>fi(t))}post_process(e,t=null,s=!0){let n={tokens:e,tokens_pair:t};for(const r of this.processors)n=r.post_process(n.tokens,n.tokens_pair,s);return n}},z_=N_;function D_(e){if(e===null)return null;switch(e.type){case"TemplateProcessing":return new S_(e);case"ByteLevel":return new F_(e);case"BertProcessing":return new L_(e);case"RobertaProcessing":return new O_(e);case"Sequence":return new z_(e);default:throw new Error(`Unknown PostProcessor type: ${e.type}`)}}var fi=D_,B_=class extends ws{constructor(e){super(),this.config=e,this.added_tokens=[],this.end_of_word_suffix=null,this.trim_offsets="trim_offsets"in e?e.trim_offsets:!1}_call(e){return this.decode(e)}decode(e){return this.decode_chain(e).join("")}},Ue=B_,V_=class extends Ue{constructor(e){super(e),this.byte_decoder=tu,this.text_decoder=new TextDecoder("utf-8",{fatal:!1,ignoreBOM:!0}),this.end_of_word_suffix=null}convert_tokens_to_string(e){const t=e.join(""),s=new Uint8Array([...t].map(n=>this.byte_decoder[n]));return this.text_decoder.decode(s)}decode_chain(e){const t=[];let s=[];for(const n of e)this.added_tokens.find(r=>r.content===n)!==void 0?(s.length>0&&(t.push(this.convert_tokens_to_string(s)),s=[]),t.push(n)):s.push(n);return s.length>0&&t.push(this.convert_tokens_to_string(s)),t}},G_=V_,$_=class extends Ue{constructor(e){super(e),this.cleanup=e.cleanup}decode_chain(e){return e.map((t,s)=>{if(s!==0){const n=this.config.prefix;n&&t.startsWith(n)?t=t.replace(n,""):t=" "+t}return this.cleanup&&(t=Jn(t)),t})}},R_=$_,j_=class extends Ue{constructor(e){super(e),this.replacement=e.replacement??"▁"}decode_chain(e){const t=[];for(let s=0;s<e.length;++s){let n=e[s].replaceAll(this.replacement," ");s==0&&n.startsWith(" ")&&(n=n.substring(1)),t.push(n)}return t}},q_=j_,U_=class extends Ue{constructor(e){super(e),this.suffix=e.suffix??""}decode_chain(e){return e.map((t,s)=>t.replaceAll(this.suffix,s===e.length-1?"":" "))}},W_=U_,H_=class extends Ue{constructor(e){super(e),this.pad_token=e.pad_token??"",this.word_delimiter_token=e.word_delimiter_token??"",this.cleanup=e.cleanup}convert_tokens_to_string(e){if(e.length===0)return"";const t=[e[0]];for(let r=1;r<e.length;++r)e[r]!==t.at(-1)&&t.push(e[r]);let n=t.filter(r=>r!==this.pad_token).join("");return this.cleanup&&(n=Jn(n).replaceAll(this.word_delimiter_token," ").trim()),n}decode_chain(e){return[this.convert_tokens_to_string(e)]}},Q_=H_,X_=class extends Ue{constructor(e){super(e),this.decoders=(e.decoders??[]).map(t=>mi(t))}decode_chain(e){return this.decoders.reduce((t,s)=>s.decode_chain(t),e)}},K_=X_,J_=class extends Ue{decode_chain(e){const t=tn(this.config.pattern),s=this.config.content??"";return t===null?e:e.map(n=>n.replaceAll(t,s))}},Y_=J_,Z_=class extends Ue{decode_chain(e){return[e.join("")]}},eh=Z_,th=class extends Ue{constructor(e){super(e),this.content=e.content??"",this.start=e.start??0,this.stop=e.stop??0}decode_chain(e){return e.map(t=>{let s=0;for(let r=0;r<this.start&&t[r]===this.content;++r){s=r+1;continue}let n=t.length;for(let r=0;r<this.stop;++r){const a=t.length-r-1;if(t[a]===this.content){n=a;continue}else break}return t.slice(s,n)})}},sh=th,nh=class extends Ue{constructor(e){super(e),this.text_decoder=new TextDecoder}decode_chain(e){const t=[];let s=[];for(const n of e){let r=null;if(n.length===6&&n.startsWith("<0x")&&n.endsWith(">")){const a=parseInt(n.slice(3,5),16);isNaN(a)||(r=a)}if(r!==null)s.push(r);else{if(s.length>0){const a=this.text_decoder.decode(Uint8Array.from(s));t.push(a),s=[]}t.push(n)}}if(s.length>0){const n=this.text_decoder.decode(Uint8Array.from(s));t.push(n),s=[]}return t}},rh=nh;function ah(e){if(e===null)return null;switch(e.type){case"ByteLevel":return new G_(e);case"WordPiece":return new R_(e);case"Metaspace":return new q_(e);case"BPEDecoder":return new W_(e);case"CTC":return new Q_(e);case"Sequence":return new K_(e);case"Replace":return new Y_(e);case"Fuse":return new eh(e);case"Strip":return new sh(e);case"ByteFallback":return new rh(e);default:throw new Error(`Unknown Decoder type: ${e.type}`)}}var mi=ah,oh=class{constructor(e,t){const s=mo(e,"Tokenizer",["model","decoder","post_processor","pre_tokenizer","normalizer"]);if(s)throw new Error(s);const n=mo(t,"Config");if(n)throw new Error(n);this.tokenizer=e,this.config=t,this.normalizer=ui(this.tokenizer.normalizer),this.pre_tokenizer=_i(this.tokenizer.pre_tokenizer),this.model=T_(this.tokenizer.model,this.config),this.post_processor=fi(this.tokenizer.post_processor),this.decoder=mi(this.tokenizer.decoder),this.special_tokens=[],this.all_special_ids=[],this.added_tokens=[];const r=[],a=[];this.added_tokens_map=new Map;for(const o of this.tokenizer.added_tokens){const i=new Zd(o);if(this.added_tokens.push(i),this.model.tokens_to_ids.set(i.content,i.id),this.model.vocab[i.id]=i.content,i.special&&(this.special_tokens.push(i.content),this.all_special_ids.push(i.id)),this.added_tokens_map.set(i.content,i),i.normalized&&this.normalizer!==null){const l=this.normalizer(i.content);a.push(l),this.added_tokens_map.set(l,i)}else r.push(i.content)}(this.config.additional_special_tokens??[]).forEach(o=>{this.special_tokens.includes(o)||this.special_tokens.push(o)}),this.decoder&&(this.decoder.added_tokens=this.added_tokens,this.decoder.end_of_word_suffix=this.model.end_of_word_suffix),this.splitter_unnormalized=new po(r),this.splitter_normalized=new po(a),this.remove_space=this.config.remove_space,this.clean_up_tokenization_spaces=this.config.clean_up_tokenization_spaces??!0,this.do_lowercase_and_remove_accent=this.config.do_lowercase_and_remove_accent??!1}encode(e,{text_pair:t=null,add_special_tokens:s=!0,return_token_type_ids:n=null}={}){const{tokens:r,token_type_ids:a}=this.tokenize_helper(e,{text_pair:t,add_special_tokens:s}),o=r.map(l=>this.added_tokens_map.get(l)?.id??this.model.tokens_to_ids.get(l)??this.model.unk_token_id),i={ids:o,tokens:r,attention_mask:new Array(o.length).fill(1)};return n&&a&&(i.token_type_ids=a),i}decode(e,t={}){if(!Array.isArray(e)||e.length===0||!ou(e[0]))throw Error("token_ids must be a non-empty array of integers.");let s=e.map(r=>this.model.vocab[Number(r)]??this.model.unk_token);t.skip_special_tokens&&(s=s.filter(r=>!this.special_tokens.includes(r)));let n=this.decoder?this.decoder(s):s.join(" ");return this.decoder&&this.decoder.end_of_word_suffix&&(n=n.replaceAll(this.decoder.end_of_word_suffix," "),t.skip_special_tokens&&(n=n.trim())),(t.clean_up_tokenization_spaces??this.clean_up_tokenization_spaces)&&(n=Jn(n)),n}tokenize(e,{text_pair:t=null,add_special_tokens:s=!1}={}){return this.tokenize_helper(e,{text_pair:t,add_special_tokens:s}).tokens}encode_text(e){if(e===null)return null;const t=this.splitter_unnormalized.split(e);return t.forEach((s,n)=>{const r=this.added_tokens_map.get(s);r&&(r.lstrip&&n>0&&(t[n-1]=t[n-1].trimEnd()),r.rstrip&&n<t.length-1&&(t[n+1]=t[n+1].trimStart()))}),t.flatMap((s,n)=>{if(s.length===0)return[];if(this.added_tokens_map.has(s))return[s];if(this.remove_space===!0&&(s=s.trim().split(/\s+/).join(" ")),this.do_lowercase_and_remove_accent&&(s=lu(s)),this.normalizer!==null&&(s=this.normalizer(s)),s.length===0)return[];const r=this.splitter_normalized.split(s);return r.forEach((a,o)=>{const i=this.added_tokens_map.get(a);i&&(i.lstrip&&o>0&&(r[o-1]=r[o-1].trimEnd()),i.rstrip&&o<r.length-1&&(r[o+1]=r[o+1].trimStart()))}),r.flatMap(a=>{if(a.length===0)return[];if(this.added_tokens_map.has(a))return[a];const o=this.pre_tokenizer!==null?this.pre_tokenizer(a,{section_index:n}):[a];return this.model(o)})})}tokenize_helper(e,{text_pair:t=null,add_special_tokens:s=!0}){const n=this.encode_text(e),r=this.encode_text(t||null);return this.post_processor?this.post_processor(n,r,s):{tokens:Re(n??[],r??[])}}token_to_id(e){return this.model.tokens_to_ids.get(e)}id_to_token(e){return this.model.vocab[e]}get_added_tokens_decoder(){const e=new Map;for(const t of this.added_tokens)e.set(t.id,t);return e}get_vocab(e=!0){const t=new Map;for(let s=0;s<this.model.vocab.length;++s){const n=this.model.vocab[s];(e||!this.added_tokens_map.has(n))&&t.set(n,s)}return t}},ih=oh,k=Object.freeze({Text:"Text",NumericLiteral:"NumericLiteral",StringLiteral:"StringLiteral",Identifier:"Identifier",Equals:"Equals",OpenParen:"OpenParen",CloseParen:"CloseParen",OpenStatement:"OpenStatement",CloseStatement:"CloseStatement",OpenExpression:"OpenExpression",CloseExpression:"CloseExpression",OpenSquareBracket:"OpenSquareBracket",CloseSquareBracket:"CloseSquareBracket",OpenCurlyBracket:"OpenCurlyBracket",CloseCurlyBracket:"CloseCurlyBracket",Comma:"Comma",Dot:"Dot",Colon:"Colon",Pipe:"Pipe",CallOperator:"CallOperator",AdditiveBinaryOperator:"AdditiveBinaryOperator",MultiplicativeBinaryOperator:"MultiplicativeBinaryOperator",ComparisonBinaryOperator:"ComparisonBinaryOperator",UnaryOperator:"UnaryOperator",Comment:"Comment"}),Ne=class{constructor(e,t){this.value=e,this.type=t}};function xo(e){return/\w/.test(e)}function us(e){return/[0-9]/.test(e)}function yo(e){return/\s/.test(e)}var lh=[["{%",k.OpenStatement],["%}",k.CloseStatement],["{{",k.OpenExpression],["}}",k.CloseExpression],["(",k.OpenParen],[")",k.CloseParen],["{",k.OpenCurlyBracket],["}",k.CloseCurlyBracket],["[",k.OpenSquareBracket],["]",k.CloseSquareBracket],[",",k.Comma],[".",k.Dot],[":",k.Colon],["|",k.Pipe],["<=",k.ComparisonBinaryOperator],[">=",k.ComparisonBinaryOperator],["==",k.ComparisonBinaryOperator],["!=",k.ComparisonBinaryOperator],["<",k.ComparisonBinaryOperator],[">",k.ComparisonBinaryOperator],["+",k.AdditiveBinaryOperator],["-",k.AdditiveBinaryOperator],["~",k.AdditiveBinaryOperator],["*",k.MultiplicativeBinaryOperator],["/",k.MultiplicativeBinaryOperator],["%",k.MultiplicativeBinaryOperator],["=",k.Equals]],ch=new Map([["n",`
3
  `],["t"," "],["r","\r"],["b","\b"],["f","\f"],["v","\v"],["'","'"],['"','"'],["\\","\\"]]);function dh(e,t={}){return e.endsWith(`
4
  `)&&(e=e.slice(0,-1)),t.lstrip_blocks&&(e=e.replace(/^[ \t]*({[#%-])/gm,"$1")),t.trim_blocks&&(e=e.replace(/([#%-]})\n/g,"$1")),e.replace(/{%\s*(end)?generation\s*%}/gs,"")}function uh(e,t={}){const s=[],n=dh(e,t);let r=0,a=0;const o=c=>{let d="";for(;c(n[r]);){if(n[r]==="\\"){if(++r,r>=n.length)throw new SyntaxError("Unexpected end of input");const u=n[r++],_=ch.get(u);if(_===void 0)throw new SyntaxError(`Unexpected escaped character: ${u}`);d+=_;continue}if(d+=n[r++],r>=n.length)throw new SyntaxError("Unexpected end of input")}return d},i=()=>{const c=s.at(-1);c&&c.type===k.Text&&(c.value=c.value.trimEnd(),c.value===""&&s.pop())},l=()=>{for(;r<n.length&&yo(n[r]);)++r};e:for(;r<n.length;){const c=s.at(-1)?.type;if(c===void 0||c===k.CloseStatement||c===k.CloseExpression||c===k.Comment){let u="";for(;r<n.length&&!(n[r]==="{"&&(n[r+1]==="%"||n[r+1]==="{"||n[r+1]==="#"));)u+=n[r++];if(u.length>0){s.push(new Ne(u,k.Text));continue}}if(n[r]==="{"&&n[r+1]==="#"){r+=2;const u=n[r]==="-";u&&++r;let _="";for(;n[r]!=="#"||n[r+1]!=="}";){if(r+2>=n.length)throw new SyntaxError("Missing end of comment tag");_+=n[r++]}const h=_.endsWith("-");h&&(_=_.slice(0,-1)),u&&i(),s.push(new Ne(_,k.Comment)),r+=2,h&&l();continue}if(n.slice(r,r+3)==="{%-"){i(),s.push(new Ne("{%",k.OpenStatement)),r+=3;continue}if(n.slice(r,r+3)==="{{-"){i(),s.push(new Ne("{{",k.OpenExpression)),a=0,r+=3;continue}if(o(yo),n.slice(r,r+3)==="-%}"){s.push(new Ne("%}",k.CloseStatement)),r+=3,l();continue}if(n.slice(r,r+3)==="-}}"){s.push(new Ne("}}",k.CloseExpression)),r+=3,l();continue}const d=n[r];if(d==="-"||d==="+"){const u=s.at(-1)?.type;if(u===k.Text||u===void 0)throw new SyntaxError(`Unexpected character: ${d}`);switch(u){case k.Identifier:case k.NumericLiteral:case k.StringLiteral:case k.CloseParen:case k.CloseSquareBracket:break;default:{++r;const _=o(us);s.push(new Ne(`${d}${_}`,_.length>0?k.NumericLiteral:k.UnaryOperator));continue}}}for(const[u,_]of lh){if(u==="}}"&&a>0)continue;if(n.slice(r,r+u.length)===u){s.push(new Ne(u,_)),_===k.OpenExpression?a=0:_===k.OpenCurlyBracket?++a:_===k.CloseCurlyBracket&&--a,r+=u.length;continue e}}if(d==="'"||d==='"'){++r;const u=o(_=>_!==d);s.push(new Ne(u,k.StringLiteral)),++r;continue}if(us(d)){let u=o(us);if(n[r]==="."&&us(n[r+1])){++r;const _=o(us);u=`${u}.${_}`}s.push(new Ne(u,k.NumericLiteral));continue}if(xo(d)){const u=o(xo);s.push(new Ne(u,k.Identifier));continue}throw new SyntaxError(`Unexpected character: ${d}`)}return s}var We=class{type="Statement"},_h=class extends We{constructor(e){super(),this.body=e}type="Program"},hh=class extends We{constructor(e,t,s){super(),this.test=e,this.body=t,this.alternate=s}type="If"},ph=class extends We{constructor(e,t,s,n){super(),this.loopvar=e,this.iterable=t,this.body=s,this.defaultBlock=n}type="For"},fh=class extends We{type="Break"},mh=class extends We{type="Continue"},gh=class extends We{constructor(e,t,s){super(),this.assignee=e,this.value=t,this.body=s}type="Set"},wh=class extends We{constructor(e,t,s){super(),this.name=e,this.args=t,this.body=s}type="Macro"},vh=class extends We{constructor(e){super(),this.value=e}type="Comment"},Oe=class extends We{type="Expression"},Mh=class extends Oe{constructor(e,t,s){super(),this.object=e,this.property=t,this.computed=s}type="MemberExpression"},bo=class extends Oe{constructor(e,t){super(),this.callee=e,this.args=t}type="CallExpression"},Lt=class extends Oe{constructor(e){super(),this.value=e}type="Identifier"},Ht=class extends Oe{constructor(e){super(),this.value=e}type="Literal"},xh=class extends Ht{type="IntegerLiteral"},yh=class extends Ht{type="FloatLiteral"},ko=class extends Ht{type="StringLiteral"},bh=class extends Ht{type="ArrayLiteral"},To=class extends Ht{type="TupleLiteral"},kh=class extends Ht{type="ObjectLiteral"},_s=class extends Oe{constructor(e,t,s){super(),this.operator=e,this.left=t,this.right=s}type="BinaryExpression"},Th=class extends Oe{constructor(e,t){super(),this.operand=e,this.filter=t}type="FilterExpression"},Eh=class extends We{constructor(e,t){super(),this.filter=e,this.body=t}type="FilterStatement"},Ph=class extends Oe{constructor(e,t){super(),this.lhs=e,this.test=t}type="SelectExpression"},Sh=class extends Oe{constructor(e,t,s){super(),this.operand=e,this.negate=t,this.test=s}type="TestExpression"},Ch=class extends Oe{constructor(e,t){super(),this.operator=e,this.argument=t}type="UnaryExpression"},Fh=class extends Oe{constructor(e=void 0,t=void 0,s=void 0){super(),this.start=e,this.stop=t,this.step=s}type="SliceExpression"},Ah=class extends Oe{constructor(e,t){super(),this.key=e,this.value=t}type="KeywordArgumentExpression"},Lh=class extends Oe{constructor(e){super(),this.argument=e}type="SpreadExpression"},Ih=class extends We{constructor(e,t,s){super(),this.call=e,this.callerArgs=t,this.body=s}type="CallStatement"},Oh=class extends Oe{constructor(e,t,s){super(),this.condition=e,this.trueExpr=t,this.falseExpr=s}type="Ternary"};function Nh(e){const t=new _h([]);let s=0;function n(M,y){const E=e[s++];if(!E||E.type!==M)throw new Error(`Parser Error: ${y}. ${E.type} !== ${M}.`);return E}function r(M){if(!l(M))throw new SyntaxError(`Expected ${M}`);++s}function a(){switch(e[s].type){case k.Comment:return new vh(e[s++].value);case k.Text:return c();case k.OpenStatement:return d();case k.OpenExpression:return u();default:throw new SyntaxError(`Unexpected token type: ${e[s].type}`)}}function o(...M){return s+M.length<=e.length&&M.every((y,E)=>y===e[s+E].type)}function i(...M){return e[s]?.type===k.OpenStatement&&e[s+1]?.type===k.Identifier&&M.includes(e[s+1]?.value)}function l(...M){return s+M.length<=e.length&&M.every((y,E)=>e[s+E].type==="Identifier"&&y===e[s+E].value)}function c(){return new ko(n(k.Text,"Expected text token").value)}function d(){if(n(k.OpenStatement,"Expected opening statement token"),e[s].type!==k.Identifier)throw new SyntaxError(`Unknown statement, got ${e[s].type}`);const M=e[s].value;let y;switch(M){case"set":++s,y=_();break;case"if":++s,y=h(),n(k.OpenStatement,"Expected {% token"),r("endif"),n(k.CloseStatement,"Expected %} token");break;case"macro":++s,y=p(),n(k.OpenStatement,"Expected {% token"),r("endmacro"),n(k.CloseStatement,"Expected %} token");break;case"for":++s,y=m(),n(k.OpenStatement,"Expected {% token"),r("endfor"),n(k.CloseStatement,"Expected %} token");break;case"call":{++s;let E=null;o(k.OpenParen)&&(E=B());const U=O();if(U.type!=="Identifier")throw new SyntaxError("Expected identifier following call statement");const Q=B();n(k.CloseStatement,"Expected closing statement token");const pe=[];for(;!i("endcall");)pe.push(a());n(k.OpenStatement,"Expected '{%'"),r("endcall"),n(k.CloseStatement,"Expected closing statement token");const me=new bo(U,Q);y=new Ih(me,E,pe);break}case"break":++s,n(k.CloseStatement,"Expected closing statement token"),y=new fh;break;case"continue":++s,n(k.CloseStatement,"Expected closing statement token"),y=new mh;break;case"filter":{++s;let E=O();E instanceof Lt&&o(k.OpenParen)&&(E=I(E)),n(k.CloseStatement,"Expected closing statement token");const U=[];for(;!i("endfilter");)U.push(a());n(k.OpenStatement,"Expected '{%'"),r("endfilter"),n(k.CloseStatement,"Expected '%}'"),y=new Eh(E,U);break}default:throw new SyntaxError(`Unknown statement type: ${M}`)}return y}function u(){n(k.OpenExpression,"Expected opening expression token");const M=g();return n(k.CloseExpression,"Expected closing expression token"),M}function _(){const M=f();let y=null;const E=[];if(o(k.Equals))++s,y=f();else{for(n(k.CloseStatement,"Expected %} token");!i("endset");)E.push(a());n(k.OpenStatement,"Expected {% token"),r("endset")}return n(k.CloseStatement,"Expected closing statement token"),new gh(M,y,E)}function h(){const M=g();n(k.CloseStatement,"Expected closing statement token");const y=[],E=[];for(;!i("elif","else","endif");)y.push(a());if(i("elif")){++s,++s;const U=h();E.push(U)}else if(i("else"))for(++s,++s,n(k.CloseStatement,"Expected closing statement token");!i("endif");)E.push(a());return new hh(M,y,E)}function p(){const M=O();if(M.type!=="Identifier")throw new SyntaxError("Expected identifier following macro statement");const y=B();n(k.CloseStatement,"Expected closing statement token");const E=[];for(;!i("endmacro");)E.push(a());return new wh(M,y,E)}function f(M=!1){const y=M?O:g,E=[y()],U=o(k.Comma);for(;U&&(++s,E.push(y()),!!o(k.Comma)););return U?new To(E):E[0]}function m(){const M=f(!0);if(!(M instanceof Lt||M instanceof To))throw new SyntaxError(`Expected identifier/tuple for the loop variable, got ${M.type} instead`);if(!l("in"))throw new SyntaxError("Expected `in` keyword following loop variable");++s;const y=g();n(k.CloseStatement,"Expected closing statement token");const E=[];for(;!i("endfor","else");)E.push(a());const U=[];if(i("else"))for(++s,++s,n(k.CloseStatement,"Expected closing statement token");!i("endfor");)U.push(a());return new ph(M,y,E,U)}function g(){return v()}function v(){const M=x();if(l("if")){++s;const y=x();if(l("else")){++s;const E=v();return new Oh(y,M,E)}else return new Ph(M,y)}return M}function x(){let M=b();for(;l("or");){const y=e[s];++s;const E=b();M=new _s(y,M,E)}return M}function b(){let M=T();for(;l("and");){const y=e[s];++s;const E=T();M=new _s(y,M,E)}return M}function T(){let M;for(;l("not");){const y=e[s];++s;const E=T();M=new Ch(y,E)}return M??S()}function S(){let M=A();for(;;){let y;if(l("not","in"))y=new Ne("not in",k.Identifier),s+=2;else if(l("in"))y=e[s++];else if(o(k.ComparisonBinaryOperator))y=e[s++];else break;const E=A();M=new _s(y,M,E)}return M}function A(){let M=j();for(;o(k.AdditiveBinaryOperator);){const y=e[s];++s;const E=j();M=new _s(y,M,E)}return M}function N(){const M=z(O());return o(k.OpenParen)?I(M):M}function I(M){let y=new bo(M,B());return y=z(y),o(k.OpenParen)&&(y=I(y)),y}function B(){n(k.OpenParen,"Expected opening parenthesis for arguments list");const M=G();return n(k.CloseParen,"Expected closing parenthesis for arguments list"),M}function G(){const M=[];for(;!o(k.CloseParen);){let y;if(e[s].type===k.MultiplicativeBinaryOperator&&e[s].value==="*"){++s;const E=g();y=new Lh(E)}else if(y=g(),o(k.Equals)){if(++s,!(y instanceof Lt))throw new SyntaxError("Expected identifier for keyword argument");const E=g();y=new Ah(y,E)}M.push(y),o(k.Comma)&&++s}return M}function D(){const M=[];let y=!1;for(;!o(k.CloseSquareBracket);)o(k.Colon)?(M.push(void 0),++s,y=!0):(M.push(g()),o(k.Colon)&&(++s,y=!0));if(M.length===0)throw new SyntaxError("Expected at least one argument for member/slice expression");if(y){if(M.length>3)throw new SyntaxError("Expected 0-3 arguments for slice expression");return new Fh(...M)}return M[0]}function z(M){for(;o(k.Dot)||o(k.OpenSquareBracket);){const y=e[s];++s;let E;const U=y.type===k.OpenSquareBracket;if(U)E=D(),n(k.CloseSquareBracket,"Expected closing square bracket");else if(E=O(),E.type!=="Identifier")throw new SyntaxError("Expected identifier following dot operator");M=new Mh(M,E,U)}return M}function j(){let M=q();for(;o(k.MultiplicativeBinaryOperator);){const y=e[s++],E=q();M=new _s(y,M,E)}return M}function q(){let M=ee();for(;l("is");){++s;const y=l("not");y&&++s;const E=O();if(!(E instanceof Lt))throw new SyntaxError("Expected identifier for the test");M=new Sh(M,y,E)}return M}function ee(){let M=N();for(;o(k.Pipe);){++s;let y=O();if(!(y instanceof Lt))throw new SyntaxError("Expected identifier for the filter");o(k.OpenParen)&&(y=I(y)),M=new Th(M,y)}return M}function O(){const M=e[s++];switch(M.type){case k.NumericLiteral:{const y=M.value;return y.includes(".")?new yh(Number(y)):new xh(Number(y))}case k.StringLiteral:{let y=M.value;for(;o(k.StringLiteral);)y+=e[s++].value;return new ko(y)}case k.Identifier:return new Lt(M.value);case k.OpenParen:{const y=f();return n(k.CloseParen,"Expected closing parenthesis, got ${tokens[current].type} instead."),y}case k.OpenSquareBracket:{const y=[];for(;!o(k.CloseSquareBracket);)y.push(g()),o(k.Comma)&&++s;return++s,new bh(y)}case k.OpenCurlyBracket:{const y=new Map;for(;!o(k.CloseCurlyBracket);){const E=g();n(k.Colon,"Expected colon between key and value in object literal");const U=g();y.set(E,U),o(k.Comma)&&++s}return++s,new kh(y)}default:throw new SyntaxError(`Unexpected token: ${M.type}`)}}for(;s<e.length;)t.body.push(a());return t}function zh(e,t,s=1){if(t===void 0&&(t=e,e=0),s===0)throw new Error("range() step must not be zero");const n=[];if(s>0)for(let r=e;r<t;r+=s)n.push(r);else for(let r=e;r>t;r+=s)n.push(r);return n}function Eo(e,t,s,n=1){const r=Math.sign(n);r>=0?(t=(t??=0)<0?Math.max(e.length+t,0):Math.min(t,e.length),s=(s??=e.length)<0?Math.max(e.length+s,0):Math.min(s,e.length)):(t=(t??=e.length-1)<0?Math.max(e.length+t,-1):Math.min(t,e.length-1),s=(s??=-1)<-1?Math.max(e.length+s,-1):Math.min(s,e.length-1));const a=[];for(let o=t;r*o<r*s;o+=n)a.push(e[o]);return a}function Dh(e){return e.replace(/\b\w/g,t=>t.toUpperCase())}function Bh(e){return Vh(new Date,e)}function Vh(e,t){const s=new Intl.DateTimeFormat(void 0,{month:"long"}),n=new Intl.DateTimeFormat(void 0,{month:"short"}),r=a=>a<10?"0"+a:a.toString();return t.replace(/%[YmdbBHM%]/g,a=>{switch(a){case"%Y":return e.getFullYear().toString();case"%m":return r(e.getMonth()+1);case"%d":return r(e.getDate());case"%b":return n.format(e);case"%B":return s.format(e);case"%H":return r(e.getHours());case"%M":return r(e.getMinutes());case"%%":return"%";default:return a}})}function Gh(e){return e.replace(/[.*+?^${}()|[\]\\]/g,"\\$&")}function $h(e,t,s,n){if(n===0)return e;let r=n==null||n<0?1/0:n;const a=t.length===0?new RegExp("(?=)","gu"):new RegExp(Gh(t),"gu");return e.replaceAll(a,o=>r>0?(--r,s):o)}var Po=class extends Error{},So=class extends Error{},tt=class{type="RuntimeValue";value;builtins=new Map;constructor(e=void 0){this.value=e}__bool__(){return new V(!!this.value)}toString(){return String(this.value)}},W=class extends tt{type="IntegerValue"},he=class extends tt{type="FloatValue";toString(){return this.value%1===0?this.value.toFixed(1):this.value.toString()}},L=class extends tt{type="StringValue";builtins=new Map([["upper",new ie(()=>new L(this.value.toUpperCase()))],["lower",new ie(()=>new L(this.value.toLowerCase()))],["strip",new ie(()=>new L(this.value.trim()))],["title",new ie(()=>new L(Dh(this.value)))],["capitalize",new ie(()=>new L(this.value.charAt(0).toUpperCase()+this.value.slice(1)))],["length",new W(this.value.length)],["rstrip",new ie(()=>new L(this.value.trimEnd()))],["lstrip",new ie(()=>new L(this.value.trimStart()))],["startswith",new ie(e=>{if(e.length===0)throw new Error("startswith() requires at least one argument");const t=e[0];if(t instanceof L)return new V(this.value.startsWith(t.value));if(t instanceof Y){for(const s of t.value){if(!(s instanceof L))throw new Error("startswith() tuple elements must be strings");if(this.value.startsWith(s.value))return new V(!0)}return new V(!1)}throw new Error("startswith() argument must be a string or tuple of strings")})],["endswith",new ie(e=>{if(e.length===0)throw new Error("endswith() requires at least one argument");const t=e[0];if(t instanceof L)return new V(this.value.endsWith(t.value));if(t instanceof Y){for(const s of t.value){if(!(s instanceof L))throw new Error("endswith() tuple elements must be strings");if(this.value.endsWith(s.value))return new V(!0)}return new V(!1)}throw new Error("endswith() argument must be a string or tuple of strings")})],["split",new ie(e=>{const t=e[0]??new ce;if(!(t instanceof L||t instanceof ce))throw new Error("sep argument must be a string or null");const s=e[1]??new W(-1);if(!(s instanceof W))throw new Error("maxsplit argument must be a number");let n=[];if(t instanceof ce){const r=this.value.trimStart();for(const{0:a,index:o}of r.matchAll(/\S+/g)){if(s.value!==-1&&n.length>=s.value&&o!==void 0){n.push(a+r.slice(o+a.length));break}n.push(a)}}else{if(t.value==="")throw new Error("empty separator");n=this.value.split(t.value),s.value!==-1&&n.length>s.value&&n.push(n.splice(s.value).join(t.value))}return new Y(n.map(r=>new L(r)))})],["replace",new ie(e=>{if(e.length<2)throw new Error("replace() requires at least two arguments");const t=e[0],s=e[1];if(!(t instanceof L&&s instanceof L))throw new Error("replace() arguments must be strings");let n;if(e.length>2?e[2].type==="KeywordArgumentsValue"?n=e[2].value.get("count")??new ce:n=e[2]:n=new ce,!(n instanceof W||n instanceof ce))throw new Error("replace() count argument must be a number or null");return new L($h(this.value,t.value,s.value,n.value))})]])},V=class extends tt{type="BooleanValue"},Rh=/[\x7f-\uffff]/g;function Co(e){return e.replace(Rh,t=>"\\u"+t.charCodeAt(0).toString(16).padStart(4,"0"))}function Et(e,t={},s=0,n=!0){const{indent:r=null,ensureAscii:a=!1,separators:o=null,sortKeys:i=!1}=t;let l,c;switch(o?[l,c]=o:r?(l=",",c=": "):(l=", ",c=": "),e.type){case"NullValue":return"null";case"UndefinedValue":return n?"null":"undefined";case"IntegerValue":case"FloatValue":case"BooleanValue":return JSON.stringify(e.value);case"StringValue":{let d=JSON.stringify(e.value);return a&&(d=Co(d)),d}case"ArrayValue":case"ObjectValue":{const d=r?" ".repeat(r):"",u=`
 
1
+ import{G as Pd}from"./gpu-ops-flxI8RuZ.js";import{Qwen35Model as Sd}from"./qwen35-model-BJNcT5Rw.js";import{loadConfig as Cd,loadQuantConfig as Fd,loadModelWeights as Ad}from"./safetensors-loader-CwGm5mJX.js";class Wn{}let si=class{static create(){throw new Error("ONNX not available")}};class ni{}const ri={},Ld={Tensor:Wn,InferenceSession:si,OrtEnv:ni,env:ri},Id=Object.freeze(Object.defineProperty({__proto__:null,InferenceSession:si,OrtEnv:ni,Tensor:Wn,default:Ld,env:ri},Symbol.toStringTag,{value:"Module"}));var kn={},Od=Object.defineProperty,Wt=(e,t)=>{for(var s in t)Od(e,s,{get:t[s],enumerable:!0})},Ie={},Ze={},Nd={},zd="4.0.0-next.6",Hn=typeof self<"u",Vt=!li(Ie),ai=!li(Ze),Rs=Hn&&"caches"in self,Dd=typeof globalThis.Deno<"u",en=Dd&&Rs&&!Vt,oi=typeof process<"u",ii=oi&&process?.release?.name==="node"&&!en,Qn=typeof window<"u"&&typeof window.document<"u",Xn=Hn&&["DedicatedWorkerGlobalScope","ServiceWorkerGlobalScope","SharedWorkerGlobalScope"].includes(self.constructor?.name),Bd=Qn||Xn||en,Vd=ii||typeof navigator<"u"&&"gpu"in navigator,Gd=typeof navigator<"u"&&"ml"in navigator,$d=typeof crypto<"u"&&typeof crypto.getRandomValues=="function",Rd=typeof chrome<"u"&&typeof chrome.runtime<"u"&&typeof chrome.runtime.id=="string",jd=typeof ServiceWorkerGlobalScope<"u"&&Hn&&self instanceof ServiceWorkerGlobalScope,qd=()=>{if(typeof navigator>"u")return!1;const e=navigator.userAgent,s=(navigator.vendor||"").indexOf("Apple")>-1,n=!e.match(/CriOS|FxiOS|EdgiOS|OPiOS|mercury|brave/i)&&!e.includes("Chrome")&&!e.includes("Android");return s&&n},Ud=qd(),K=Object.freeze({IS_BROWSER_ENV:Qn,IS_WEBWORKER_ENV:Xn,IS_WEB_ENV:Bd,IS_SERVICE_WORKER_ENV:jd,IS_DENO_WEB_RUNTIME:en,IS_WEB_CACHE_AVAILABLE:Rs,IS_WEBGPU_AVAILABLE:Vd,IS_WEBNN_AVAILABLE:Gd,IS_SAFARI:Ud,IS_PROCESS_AVAILABLE:oi,IS_NODE_ENV:ii,IS_FS_AVAILABLE:Vt,IS_PATH_AVAILABLE:ai,IS_CRYPTO_AVAILABLE:$d,IS_CHROME_AVAILABLE:Rd}),Kn=Vt&&ai,js="./";if(Kn){const e=Object(import.meta).url;e?js=Ze.dirname(Ze.dirname(Nd.fileURLToPath(e))):typeof __dirname<"u"&&(js=Ze.dirname(__dirname))}var Wd=Kn?Ze.join(js,"/.cache/"):null,co="/models/",Hd=Kn?Ze.join(js,co):co,Qd=typeof globalThis.fetch=="function"?globalThis.fetch.bind(globalThis):void 0,$e=Object.freeze({DEBUG:10,INFO:20,WARNING:30,ERROR:40,NONE:50}),uo=$e.WARNING,te={version:zd,backends:{onnx:{}},get logLevel(){return uo},set logLevel(e){uo=e,te.backends.onnx?.setLogLevel?.(e)},allowRemoteModels:!0,remoteHost:"https://huggingface.co/",remotePathTemplate:"{model}/resolve/{revision}/",allowLocalModels:!(Qn||Xn||en),localModelPath:Hd,useFS:Vt,useBrowserCache:Rs,useFSCache:Vt,cacheDir:Wd,useCustomCache:!1,customCache:null,useWasmCache:Rs||Vt,cacheKey:"transformers-cache",fetch:Qd};function li(e){return Object.keys(e).length===0}function Dt(e,t){e&&e(t)}function Xd(e){return Number.isInteger(e)||typeof e=="bigint"}function _o(e){return e==null||e===-1}function ho(e){const t=[];let s=e;for(;Array.isArray(s);)t.push(s.length),s=s[0];return t}function et(...e){return Array.prototype.concat.apply([],e)}function qs(e,t){return Math.abs((e+t)%(2*t)-t)}function Ce(e,t){return Object.assign({},...t.map(s=>{if(e[s]!==void 0)return{[s]:e[s]}}))}function Kd(e,t){let s=0;for(const n of e)n===t&&++s;return s}var J={error(...e){te.logLevel<=$e.ERROR&&console.error(...e)},warn(...e){te.logLevel<=$e.WARNING&&console.warn(...e)},info(...e){te.logLevel<=$e.INFO&&console.log(...e)},debug(...e){te.logLevel<=$e.DEBUG&&console.log(...e)},log(...e){this.info(...e)}},Jd=class{constructor(e){this.trie=this._build_trie(e)}_build_trie(e){const t=Object.create(null);for(const s of e){let n=t;for(let r=0;r<s.length;++r){const a=s[r];n=n[a]??=Object.create(null)}n.end=s}return t}split(e){const t=[],s=e.length;let n=0,r=0;for(;r<s;){let a=this.trie,o=null,i=r;for(;i<s&&(a=a[e[i]]);)a.end&&(o=a.end),++i;o?(r>n&&t.push(e.slice(n,r)),t.push(o),r+=o.length,n=r):++r}return n<s&&t.push(e.slice(n)),t}},po=Jd,Yd=class{constructor(e){this.content=e.content,this.id=e.id,this.single_word=e.single_word??!1,this.lstrip=e.lstrip??!1,this.rstrip=e.rstrip??!1,this.special=e.special??!1,this.normalized=e.normalized??!this.special}},Zd=Yd,ci=(()=>{const e=[...Array.from({length:94},(r,a)=>a+33),...Array.from({length:12},(r,a)=>a+161),...Array.from({length:82},(r,a)=>a+174)],t=e.slice();let s=0;for(let r=0;r<256;++r)e.includes(r)||(e.push(r),t.push(256+s),s+=1);const n=t.map(r=>String.fromCharCode(r));return Object.fromEntries(e.map((r,a)=>[r,n[a]]))})(),eu=e=>Object.fromEntries(Object.entries(e).map(([t,s])=>[s,t])),tu=eu(ci),fo=".,!?…。,、।۔،",su=new Map([["(?i:'s|'t|'re|'ve|'m|'ll|'d)","(?:'([sS]|[tT]|[rR][eE]|[vV][eE]|[mM]|[lL][lL]|[dD]))"],["(?i:[sdmt]|ll|ve|re)","(?:[sS]|[dD]|[mM]|[tT]|[lL][lL]|[vV][eE]|[rR][eE])"],["[^\\r\\n\\p{L}\\p{N}]?+","[^\\r\\n\\p{L}\\p{N}]?"],["[^\\s\\p{L}\\p{N}]++","[^\\s\\p{L}\\p{N}]+"],["(?>\\p{Nd}{510})","(?:\\p{Nd}{510})"],["\\p{Nd}{3}+","(?:\\p{Nd}{3})+"],["\\G",""],[` ?[^(\\s|[${fo}])]+`,` ?[^\\s${fo}]+`]]),Us="\\p{P}\\u0021-\\u002F\\u003A-\\u0040\\u005B-\\u0060\\u007B-\\u007E",Jn=e=>e.replace(/ \./g,".").replace(/ \?/g,"?").replace(/ \!/g,"!").replace(/ ,/g,",").replace(/ \' /g,"'").replace(/ n't/g,"n't").replace(/ 'm/g,"'m").replace(/ 's/g,"'s").replace(/ 've/g,"'ve").replace(/ 're/g,"'re"),tn=(e,t=!0)=>{if(e.Regex!==void 0){let s=e.Regex.replace(/\\([#&~])/g,"$1");s=s.replace(/\\A/g,"^").replace(/\\z/g,"$").replace(/\\Z/g,"(?=\\r?\\n?$)");for(const[n,r]of su)s=s.replaceAll(n,r);try{return new RegExp(s,"gu")}catch(n){if(!(n instanceof SyntaxError)||!n.message.toLowerCase().includes("invalid property name"))throw n;let r=!1;const a=s.replace(/(\\[pP])\{([^}=]+)\}/g,(o,i,l)=>{try{return new RegExp(`\\p{${l}}`,"u"),`${i}{${l}}`}catch{return r=!0,`${i}{Script=${l}}`}});if(!r)throw n;try{return new RegExp(a,"gu")}catch{throw n}}}else if(e.String!==void 0){const s=nu(e.String);return new RegExp(t?s:`(${s})`,"gu")}else return console.warn("Unknown pattern type:",e),null},nu=e=>e.replace(/[.*+?^${}()|[\]\\]/g,"\\$&"),ru=(e,t,s)=>{const n=[];let r=0;for(;r<e.length;){if(n.push(e[r]),(t.get(e[r])??s)!==s){++r;continue}for(;++r<e.length&&(t.get(e[r])??s)===s;)t.get(n.at(-1))!==s&&(n[n.length-1]+=e[r])}return n},au=e=>e>=19968&&e<=40959||e>=13312&&e<=19903||e>=131072&&e<=173791||e>=173824&&e<=177983||e>=177984&&e<=178207||e>=178208&&e<=183983||e>=63744&&e<=64255||e>=194560&&e<=195103,ou=e=>Number.isInteger(e)||typeof e=="bigint",iu=e=>{let t=0;for(const s of e)++t;return t},lu=e=>di(e.toLowerCase()),Re=(...e)=>Array.prototype.concat.apply([],e),Yn=e=>new Map(Object.entries(e)),cu=(e,t)=>{const s=[];let n=0;for(const r of e.matchAll(t)){const a=r[0];n<r.index&&s.push(e.slice(n,r.index)),a.length>0&&s.push(a),n=r.index+a.length}return n<e.length&&s.push(e.slice(n)),s},di=e=>e.replace(new RegExp("\\p{M}","gu"),""),mo=(e,t,s=[])=>{if(!e||Array.isArray(e)||typeof e!="object")return`${t} must be a valid object`;for(const n of s)if(!(n in e))return`${t} must contain a "${n}" property`;return null},du=e=>e.match(/\S+/g)||[],uu=class{constructor(){const e=function(...t){return e._call(...t)};return Object.setPrototypeOf(e,new.target.prototype)}},ws=uu,_u=class extends ws{constructor(e){super(),this.config=e}_call(e){return this.normalize(e)}},it=_u,hu=class extends it{tokenize_chinese_chars(e){const t=[];for(let s=0;s<e.length;++s){const n=e[s],r=n.charCodeAt(0);au(r)?(t.push(" "),t.push(n),t.push(" ")):t.push(n)}return t.join("")}strip_accents(e){return e.normalize("NFD").replace(new RegExp("\\p{Mn}","gu"),"")}is_control(e){switch(e){case" ":case`
2
  `:case"\r":return!1;default:return new RegExp("^\\p{Cc}|\\p{Cf}|\\p{Co}|\\p{Cs}$","u").test(e)}}clean_text(e){const t=[];for(const s of e){const n=s.charCodeAt(0);n===0||n===65533||this.is_control(s)||(/^\s$/.test(s)?t.push(" "):t.push(s))}return t.join("")}normalize(e){return this.config.clean_text&&(e=this.clean_text(e)),this.config.handle_chinese_chars&&(e=this.tokenize_chinese_chars(e)),this.config.lowercase?(e=e.toLowerCase(),this.config.strip_accents!==!1&&(e=this.strip_accents(e))):this.config.strip_accents&&(e=this.strip_accents(e)),e}},pu=hu,fu=class extends it{constructor(e){super(e),this.charsmap=e.precompiled_charsmap??null}normalize(e){return e=e.replace(/[\u0001-\u0008\u000B\u000E-\u001F\u007F\u008F\u009F]/gm,""),e=e.replace(/[\u0009\u000A\u000C\u000D\u00A0\u1680\u2000-\u200F\u2028\u2029\u202F\u205F\u2581\u3000\uFEFF\uFFFD]/gm," "),e.includes("~")?e=e.split("~").map(s=>s.normalize("NFKC")).join("~"):e=e.normalize("NFKC"),e}},mu=fu,gu=class extends it{constructor(e){super(e),this.normalizers=(e.normalizers??[]).map(t=>ui(t))}normalize(e){return this.normalizers.reduce((t,s)=>s?s.normalize(t):t,e)}},wu=gu,vu=class extends it{normalize(e){const t=tn(this.config.pattern??{});return t===null?e:e.replaceAll(t,this.config.content??"")}},Mu=vu,xu=class extends it{constructor(){super(...arguments),this.form="NFC"}normalize(e){return e=e.normalize(this.form),e}},sn=xu,yu=class extends sn{constructor(){super(...arguments),this.form="NFC"}},bu=yu,ku=class extends sn{constructor(){super(...arguments),this.form="NFD"}},Tu=ku,Eu=class extends sn{constructor(){super(...arguments),this.form="NFKC"}},Pu=Eu,Su=class extends sn{constructor(){super(...arguments),this.form="NFKD"}},Cu=Su,Fu=class extends it{normalize(e){return this.config.strip_left&&this.config.strip_right?e=e.trim():(this.config.strip_left&&(e=e.trimStart()),this.config.strip_right&&(e=e.trimEnd())),e}},Au=Fu,Lu=class extends it{normalize(e){return di(e)}},Iu=Lu,Ou=class extends it{normalize(e){return e.toLowerCase()}},Nu=Ou,zu=class extends it{normalize(e){return e=this.config.prepend+e,e}},Du=zu;function Bu(e){if(e===null)return null;switch(e.type){case"BertNormalizer":return new pu(e);case"Precompiled":return new mu(e);case"Sequence":return new wu(e);case"Replace":return new Mu(e);case"NFC":return new bu(e);case"NFD":return new Tu(e);case"NFKC":return new Pu(e);case"NFKD":return new Cu(e);case"Strip":return new Au(e);case"StripAccents":return new Iu(e);case"Lowercase":return new Nu(e);case"Prepend":return new Du(e);default:throw new Error(`Unknown Normalizer type: ${e.type}`)}}var ui=Bu,Vu=class extends ws{pre_tokenize(e,t){return(Array.isArray(e)?e.map(s=>this.pre_tokenize_text(s,t)):this.pre_tokenize_text(e,t)).flat()}_call(e,t){return this.pre_tokenize(e,t)}},qe=Vu,Gu=class extends qe{constructor(e){super(),this.config=e,this.add_prefix_space=this.config.add_prefix_space??!1,this.trim_offsets=this.config.trim_offsets??!1,this.use_regex=this.config.use_regex??!0,this.pattern=new RegExp("'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+","gu"),this.byte_encoder=ci,this.text_encoder=new TextEncoder}pre_tokenize_text(e,t){return this.add_prefix_space&&!e.startsWith(" ")&&(e=" "+e),(this.use_regex?e.match(this.pattern)||[]:[e]).map(n=>Array.from(this.text_encoder.encode(n),r=>this.byte_encoder[r]).join(""))}},$u=Gu,Ru=class extends qe{pre_tokenize_text(e,t){return e.match(/\w+|[^\w\s]+/g)||[]}},ju=Ru,qu=class extends qe{constructor(e){super(),this.replacement=e.replacement??"▁",this.str_rep=e.str_rep||this.replacement,this.prepend_scheme=e.prepend_scheme??"always"}pre_tokenize_text(e,t){const{section_index:s=void 0}=t??{};let n=e.replaceAll(" ",this.str_rep);return!n.startsWith(this.replacement)&&(this.prepend_scheme==="always"||this.prepend_scheme==="first"&&s===0)&&(n=this.str_rep+n),[n]}},Uu=qu,Wu=class extends qe{constructor(e){super(),this.config=e,this.pattern=tn(this.config.pattern??{},this.config.invert??!0)}pre_tokenize_text(e){return this.pattern===null?[]:this.config.invert?e.match(this.pattern)||[]:this.config.behavior?.toLowerCase()==="removed"?e.split(this.pattern).filter(t=>t):cu(e,this.pattern)}},Hu=Wu,Qu=class extends qe{constructor(e){super(),this.config=e,this.pattern=new RegExp(`[^${Us}]+|[${Us}]+`,"gu")}pre_tokenize_text(e){return e.match(this.pattern)||[]}},Xu=Qu,Ku=class extends qe{constructor(e){super(),this.config=e;const t=`[^\\d]+|\\d${this.config.individual_digits?"":"+"}`;this.pattern=new RegExp(t,"gu")}pre_tokenize_text(e){return e.match(this.pattern)||[]}},Ju=Ku,Yu=class extends qe{constructor(){super(),this.pattern=new RegExp(`[^\\s${Us}]+|[${Us}]`,"gu")}pre_tokenize_text(e,t){return e.trim().match(this.pattern)||[]}},Zu=Yu,e_=class extends qe{constructor(e){super(),this.config=e,this.pattern=tn(this.config.pattern??{}),this.content=this.config.content??""}pre_tokenize_text(e){return this.pattern===null?[e]:[e.replaceAll(this.pattern,this.config.content??"")]}},t_=e_,s_=class extends qe{constructor(e){super(),this.tokenizers=(e.pretokenizers??[]).map(t=>_i(t))}pre_tokenize_text(e,t){return this.tokenizers.reduce((s,n)=>n?n.pre_tokenize(s,t):s,[e])}},n_=s_,r_=class extends qe{pre_tokenize_text(e){return du(e)}},a_=r_,o_=class extends qe{constructor(e){super(),this.config=e,this._length=e.length}pre_tokenize_text(e){const t=[];for(let s=0;s<e.length;s+=this._length)t.push(e.slice(s,s+this._length));return t}},i_=o_;function l_(e){if(e===null)return null;switch(e.type){case"BertPreTokenizer":return new Zu;case"Sequence":return new n_(e);case"Whitespace":return new ju;case"WhitespaceSplit":return new a_;case"Metaspace":return new Uu(e);case"ByteLevel":return new $u(e);case"Split":return new Hu(e);case"Punctuation":return new Xu(e);case"Digits":return new Ju(e);case"Replace":return new t_(e);case"FixedLength":return new i_(e);default:throw new Error(`Unknown PreTokenizer type: ${e.type}`)}}var _i=l_,c_=class extends ws{constructor(e){super(),this.config=e,this.vocab=[],this.tokens_to_ids=new Map,this.unk_token_id=void 0,this.unk_token=void 0,this.end_of_word_suffix=void 0,this.fuse_unk=this.config.fuse_unk??!1}_call(e){let t=this.encode(e);return this.fuse_unk&&(t=ru(t,this.tokens_to_ids,this.unk_token_id)),t}},nn=c_,d_=class extends nn{constructor(e){super(e),this.max_input_chars_per_word=100,this.tokens_to_ids=Yn(e.vocab),this.unk_token_id=this.tokens_to_ids.get(e.unk_token),this.unk_token=e.unk_token,this.max_input_chars_per_word=e.max_input_chars_per_word??100,this.vocab=new Array(this.tokens_to_ids.size);for(const[t,s]of this.tokens_to_ids)this.vocab[s]=t}encode(e){const t=[];for(const s of e){const n=[...s];if(n.length>this.max_input_chars_per_word){t.push(this.unk_token);continue}let r=!1,a=0;const o=[];for(;a<n.length;){let i=n.length,l=null;for(;a<i;){let c=n.slice(a,i).join("");if(a>0&&(c=this.config.continuing_subword_prefix+c),this.tokens_to_ids.has(c)){l=c;break}--i}if(l===null){r=!0;break}o.push(l),a=i}r?t.push(this.unk_token):t.push(...o)}return t}},go=d_,wo=class hi{constructor(t,s){this.is_leaf=t,this.children=s}static default(){return new hi(!1,new Map)}},u_=class{constructor(){this.root=wo.default()}extend(e){for(const t of e)this.push(t)}push(e){let t=this.root;for(const s of e){let n=t.children.get(s);n===void 0&&(n=wo.default(),t.children.set(s,n)),t=n}t.is_leaf=!0}*common_prefix_search(e){let t=this.root;if(t===void 0)return;let s="";for(const n of e){if(s+=n,t=t.children.get(n),t===void 0)return;t.is_leaf&&(yield s)}}},__=u_,Tn=class pi{constructor(t,s,n,r,a){this.token_id=t,this.node_id=s,this.pos=n,this.length=r,this.score=a,this.prev=null,this.backtrace_score=0}clone(){const t=new pi(this.token_id,this.node_id,this.pos,this.length,this.score);return t.prev=this.prev,t.backtrace_score=this.backtrace_score,t}},h_=class{constructor(e,t,s){this.chars=Array.from(e),this.len=this.chars.length,this.bos_token_id=t,this.eos_token_id=s,this.nodes=[],this.begin_nodes=Array.from({length:this.len+1},()=>[]),this.end_nodes=Array.from({length:this.len+1},()=>[]);const n=new Tn(this.bos_token_id??0,0,0,0,0),r=new Tn(this.eos_token_id??0,1,this.len,0,0);this.nodes.push(n.clone()),this.nodes.push(r.clone()),this.begin_nodes[this.len].push(r),this.end_nodes[0].push(n)}insert(e,t,s,n){const r=this.nodes.length,a=new Tn(n,r,e,t,s);this.begin_nodes[e].push(a),this.end_nodes[e+t].push(a),this.nodes.push(a)}viterbi(){const e=this.len;let t=0;for(;t<=e;){if(this.begin_nodes[t].length==0)return[];for(let o of this.begin_nodes[t]){o.prev=null;let i=0,l=null;for(let c of this.end_nodes[t]){const d=c.backtrace_score+o.score;(l===null||d>i)&&(l=c.clone(),i=d)}if(l!==null)o.prev=l,o.backtrace_score=i;else return[]}++t}const s=[],r=this.begin_nodes[e][0].prev;if(r===null)return[];let a=r.clone();for(;a.prev!==null;)s.push(a.clone()),a=a.clone().prev.clone();return s.reverse(),s}piece(e){return this.chars.slice(e.pos,e.pos+e.length).join("")}tokens(){return this.viterbi().map(t=>this.piece(t))}token_ids(){return this.viterbi().map(t=>t.token_id)}},p_=h_;function f_(e){if(e.length===0)throw new Error("Array must not be empty");let t=e[0],s=0;for(let n=1;n<e.length;++n)e[n]<t&&(t=e[n],s=n);return[t,s]}var m_=class extends nn{constructor(e,t){super(e);const s=e.vocab.length;this.vocab=new Array(s),this.scores=new Array(s);for(let n=0;n<s;++n)[this.vocab[n],this.scores[n]]=e.vocab[n];this.unk_token_id=e.unk_id,this.unk_token=this.vocab[e.unk_id],this.tokens_to_ids=new Map(this.vocab.map((n,r)=>[n,r])),this.bos_token=" ",this.bos_token_id=this.tokens_to_ids.get(this.bos_token),this.eos_token=t,this.eos_token_id=this.tokens_to_ids.get(this.eos_token),this.unk_token=this.vocab[this.unk_token_id],this.min_score=f_(this.scores)[0],this.unk_score=this.min_score-10,this.scores[this.unk_token_id]=this.unk_score,this.trie=new __,this.trie.extend(this.vocab),this.fuse_unk=!0}populate_nodes(e){const t=e.chars,s=1;let n=0;for(;n<t.length;){let r=!1;const a=t.slice(n).join(""),o=this.trie.common_prefix_search(a);for(const i of o){const l=this.tokens_to_ids.get(i),c=this.scores[l],d=iu(i);e.insert(n,d,c,l),!r&&d===s&&(r=!0)}r||e.insert(n,s,this.unk_score,this.unk_token_id),n+=s}}tokenize(e){const t=new p_(e,this.bos_token_id,this.eos_token_id);return this.populate_nodes(t),t.tokens()}encode(e){const t=[];for(const s of e){const n=this.tokenize(s);t.push(...n)}return t}},vo=m_,g_=class{constructor(e=(s,n)=>s>n,t=1/0){this._heap=[],this._comparator=e,this._max_size=t}get size(){return this._heap.length}is_empty(){return this.size===0}peek(){return this._heap[0]}push(...e){return this.extend(e)}extend(e){for(const t of e)if(this.size<this._max_size)this._heap.push(t),this._sift_up();else{const s=this._smallest();this._comparator(t,this._heap[s])&&(this._heap[s]=t,this._sift_up_from(s))}return this.size}pop(){const e=this.peek(),t=this.size-1;return t>0&&this._swap(0,t),this._heap.pop(),this._sift_down(),e}replace(e){const t=this.peek();return this._heap[0]=e,this._sift_down(),t}_parent(e){return(e+1>>>1)-1}_left(e){return(e<<1)+1}_right(e){return e+1<<1}_greater(e,t){return this._comparator(this._heap[e],this._heap[t])}_swap(e,t){const s=this._heap[e];this._heap[e]=this._heap[t],this._heap[t]=s}_sift_up(){this._sift_up_from(this.size-1)}_sift_up_from(e){for(;e>0&&this._greater(e,this._parent(e));)this._swap(e,this._parent(e)),e=this._parent(e)}_sift_down(){let e=0;for(;this._left(e)<this.size&&this._greater(this._left(e),e)||this._right(e)<this.size&&this._greater(this._right(e),e);){const t=this._right(e)<this.size&&this._greater(this._right(e),this._left(e))?this._right(e):this._left(e);this._swap(e,t),e=t}}_smallest(){return 2**Math.floor(Math.log2(this.size))-1}},w_=g_,v_=class{constructor(e){this.capacity=e,this.cache=new Map}get(e){if(!this.cache.has(e))return;const t=this.cache.get(e);return this.cache.delete(e),this.cache.set(e,t),t}put(e,t){this.cache.has(e)&&this.cache.delete(e),this.cache.set(e,t),this.cache.size>this.capacity&&this.cache.delete(this.cache.keys().next().value)}clear(){this.cache.clear()}},M_=v_,x_=class extends nn{constructor(e){super(e),this.tokens_to_ids=Yn(e.vocab),this.unk_token_id=this.tokens_to_ids.get(e.unk_token),this.unk_token=e.unk_token,this.vocab=new Array(this.tokens_to_ids.size);for(const[s,n]of this.tokens_to_ids)this.vocab[n]=s;const t=Array.isArray(e.merges[0]);this.merges=t?e.merges:e.merges.map(s=>s.split(" ",2)),this.bpe_ranks=new Map(this.merges.map((s,n)=>[JSON.stringify(s),n])),this.end_of_word_suffix=e.end_of_word_suffix,this.continuing_subword_suffix=e.continuing_subword_suffix??null,this.byte_fallback=this.config.byte_fallback??!1,this.byte_fallback&&(this.text_encoder=new TextEncoder),this.ignore_merges=this.config.ignore_merges??!1,this.max_length_to_cache=256,this.cache_capacity=1e4,this.cache=new M_(this.cache_capacity)}clear_cache(){this.cache.clear()}bpe(e){if(e.length===0)return[];const t=this.cache.get(e);if(t!==void 0)return t;const s=Array.from(e);this.end_of_word_suffix&&(s[s.length-1]+=this.end_of_word_suffix);let n=[];if(s.length>1){const r=new w_((i,l)=>i.score<l.score);let a={token:s[0],bias:0,prev:null,next:null},o=a;for(let i=1;i<s.length;++i){const l={bias:i/s.length,token:s[i],prev:o,next:null};o.next=l,this.add_node(r,o),o=l}for(;!r.is_empty();){const i=r.pop();if(i.deleted||!i.next||i.next.deleted)continue;if(i.deleted=!0,i.next.deleted=!0,i.prev){const c={...i.prev};i.prev.deleted=!0,i.prev=c,c.prev?c.prev.next=c:a=c}const l={token:i.token+i.next.token,bias:i.bias,prev:i.prev,next:i.next.next};l.prev?(l.prev.next=l,this.add_node(r,l.prev)):a=l,l.next&&(l.next.prev=l,this.add_node(r,l))}for(let i=a;i!==null;i=i.next)n.push(i.token)}else n=s;if(this.continuing_subword_suffix)for(let r=0;r<n.length-1;++r)n[r]+=this.continuing_subword_suffix;return e.length<this.max_length_to_cache&&this.cache.put(e,n),n}add_node(e,t){const s=this.bpe_ranks.get(JSON.stringify([t.token,t.next.token]));s!==void 0&&(t.score=s+t.bias,e.push(t))}encode(e){const t=[];for(const s of e){if(this.ignore_merges&&this.tokens_to_ids.has(s)){t.push(s);continue}const n=this.bpe(s);for(const r of n)if(this.tokens_to_ids.has(r))t.push(r);else if(this.byte_fallback){const a=Array.from(this.text_encoder.encode(r)).map(o=>`<0x${o.toString(16).toUpperCase().padStart(2,"0")}>`);a.every(o=>this.tokens_to_ids.has(o))?t.push(...a):t.push(this.unk_token)}else t.push(this.unk_token)}return t}},Mo=x_,y_=class extends nn{constructor(e,t){super(e);const s=e.vocab;this.tokens_to_ids=Yn(t.target_lang?s[t.target_lang]:s),this.bos_token=t.bos_token,this.bos_token_id=this.tokens_to_ids.get(this.bos_token),this.eos_token=t.eos_token,this.eos_token_id=this.tokens_to_ids.get(this.eos_token),this.pad_token=t.pad_token,this.pad_token_id=this.tokens_to_ids.get(this.pad_token),this.unk_token=t.unk_token,this.unk_token_id=this.tokens_to_ids.get(this.unk_token),this.vocab=new Array(this.tokens_to_ids.size);for(const[n,r]of this.tokens_to_ids)this.vocab[r]=n}encode(e){return e}},b_=y_;function k_(e,t){switch(e.type){case"WordPiece":return new go(e);case"Unigram":return new vo(e,t.eos_token);case"BPE":return new Mo(e);default:if(e.vocab)return Array.isArray(e.vocab)?new vo(e,t.eos_token):Object.hasOwn(e,"continuing_subword_prefix")&&Object.hasOwn(e,"unk_token")?Object.hasOwn(e,"merges")?new Mo(e):new go(e):new b_(e,{target_lang:t.target_lang,bos_token:t.bos_token,eos_token:t.eos_token,pad_token:t.pad_token,unk_token:t.unk_token});throw new Error(`Unknown TokenizerModel type: ${e?.type}`)}}var T_=k_,E_=class extends ws{constructor(e){super(),this.config=e}_call(e,...t){return this.post_process(e,...t)}},vs=E_,P_=class extends vs{post_process(e,t=null,s=!0){const n=t===null?this.config.single:this.config.pair;let r=[],a=[];for(const o of n)"SpecialToken"in o?s&&(r.push(o.SpecialToken.id),a.push(o.SpecialToken.type_id)):"Sequence"in o&&(o.Sequence.id==="A"?(r=Re(r,e),a=Re(a,new Array(e.length).fill(o.Sequence.type_id))):o.Sequence.id==="B"&&(r=Re(r,t),a=Re(a,new Array(t.length).fill(o.Sequence.type_id))));return{tokens:r,token_type_ids:a}}},S_=P_,C_=class extends vs{post_process(e,t=null){return{tokens:e,tokens_pair:t}}},F_=C_,A_=class extends vs{constructor(e){super(e),this.sep=e.sep,this.cls=e.cls}post_process(e,t=null,s=!0){s&&(e=Re([this.cls[0]],e,[this.sep[0]]));let n=new Array(e.length).fill(0);if(t){const r=[],a=s?[this.sep[0]]:[];e=Re(e,r,t,a),n=Re(n,new Array(t.length+r.length+a.length).fill(1))}return{tokens:e,token_type_ids:n}}},L_=A_,I_=class extends vs{constructor(e){super(e),this.sep=e.sep,this.cls=e.cls}post_process(e,t,s=!0){s&&(e=Re([this.cls[0]],e,[this.sep[0]]));let n=new Array(e.length).fill(0);if(t){const r=s?[this.sep[0]]:[],a=s?[this.sep[0]]:[];e=Re(e,r,t,a),n=Re(n,new Array(t.length+r.length+a.length).fill(1))}return{tokens:e,token_type_ids:n}}},O_=I_,N_=class extends vs{constructor(e){super(e),this.processors=(e.processors??[]).map(t=>fi(t))}post_process(e,t=null,s=!0){let n={tokens:e,tokens_pair:t};for(const r of this.processors)n=r.post_process(n.tokens,n.tokens_pair,s);return n}},z_=N_;function D_(e){if(e===null)return null;switch(e.type){case"TemplateProcessing":return new S_(e);case"ByteLevel":return new F_(e);case"BertProcessing":return new L_(e);case"RobertaProcessing":return new O_(e);case"Sequence":return new z_(e);default:throw new Error(`Unknown PostProcessor type: ${e.type}`)}}var fi=D_,B_=class extends ws{constructor(e){super(),this.config=e,this.added_tokens=[],this.end_of_word_suffix=null,this.trim_offsets="trim_offsets"in e?e.trim_offsets:!1}_call(e){return this.decode(e)}decode(e){return this.decode_chain(e).join("")}},Ue=B_,V_=class extends Ue{constructor(e){super(e),this.byte_decoder=tu,this.text_decoder=new TextDecoder("utf-8",{fatal:!1,ignoreBOM:!0}),this.end_of_word_suffix=null}convert_tokens_to_string(e){const t=e.join(""),s=new Uint8Array([...t].map(n=>this.byte_decoder[n]));return this.text_decoder.decode(s)}decode_chain(e){const t=[];let s=[];for(const n of e)this.added_tokens.find(r=>r.content===n)!==void 0?(s.length>0&&(t.push(this.convert_tokens_to_string(s)),s=[]),t.push(n)):s.push(n);return s.length>0&&t.push(this.convert_tokens_to_string(s)),t}},G_=V_,$_=class extends Ue{constructor(e){super(e),this.cleanup=e.cleanup}decode_chain(e){return e.map((t,s)=>{if(s!==0){const n=this.config.prefix;n&&t.startsWith(n)?t=t.replace(n,""):t=" "+t}return this.cleanup&&(t=Jn(t)),t})}},R_=$_,j_=class extends Ue{constructor(e){super(e),this.replacement=e.replacement??"▁"}decode_chain(e){const t=[];for(let s=0;s<e.length;++s){let n=e[s].replaceAll(this.replacement," ");s==0&&n.startsWith(" ")&&(n=n.substring(1)),t.push(n)}return t}},q_=j_,U_=class extends Ue{constructor(e){super(e),this.suffix=e.suffix??""}decode_chain(e){return e.map((t,s)=>t.replaceAll(this.suffix,s===e.length-1?"":" "))}},W_=U_,H_=class extends Ue{constructor(e){super(e),this.pad_token=e.pad_token??"",this.word_delimiter_token=e.word_delimiter_token??"",this.cleanup=e.cleanup}convert_tokens_to_string(e){if(e.length===0)return"";const t=[e[0]];for(let r=1;r<e.length;++r)e[r]!==t.at(-1)&&t.push(e[r]);let n=t.filter(r=>r!==this.pad_token).join("");return this.cleanup&&(n=Jn(n).replaceAll(this.word_delimiter_token," ").trim()),n}decode_chain(e){return[this.convert_tokens_to_string(e)]}},Q_=H_,X_=class extends Ue{constructor(e){super(e),this.decoders=(e.decoders??[]).map(t=>mi(t))}decode_chain(e){return this.decoders.reduce((t,s)=>s.decode_chain(t),e)}},K_=X_,J_=class extends Ue{decode_chain(e){const t=tn(this.config.pattern),s=this.config.content??"";return t===null?e:e.map(n=>n.replaceAll(t,s))}},Y_=J_,Z_=class extends Ue{decode_chain(e){return[e.join("")]}},eh=Z_,th=class extends Ue{constructor(e){super(e),this.content=e.content??"",this.start=e.start??0,this.stop=e.stop??0}decode_chain(e){return e.map(t=>{let s=0;for(let r=0;r<this.start&&t[r]===this.content;++r){s=r+1;continue}let n=t.length;for(let r=0;r<this.stop;++r){const a=t.length-r-1;if(t[a]===this.content){n=a;continue}else break}return t.slice(s,n)})}},sh=th,nh=class extends Ue{constructor(e){super(e),this.text_decoder=new TextDecoder}decode_chain(e){const t=[];let s=[];for(const n of e){let r=null;if(n.length===6&&n.startsWith("<0x")&&n.endsWith(">")){const a=parseInt(n.slice(3,5),16);isNaN(a)||(r=a)}if(r!==null)s.push(r);else{if(s.length>0){const a=this.text_decoder.decode(Uint8Array.from(s));t.push(a),s=[]}t.push(n)}}if(s.length>0){const n=this.text_decoder.decode(Uint8Array.from(s));t.push(n),s=[]}return t}},rh=nh;function ah(e){if(e===null)return null;switch(e.type){case"ByteLevel":return new G_(e);case"WordPiece":return new R_(e);case"Metaspace":return new q_(e);case"BPEDecoder":return new W_(e);case"CTC":return new Q_(e);case"Sequence":return new K_(e);case"Replace":return new Y_(e);case"Fuse":return new eh(e);case"Strip":return new sh(e);case"ByteFallback":return new rh(e);default:throw new Error(`Unknown Decoder type: ${e.type}`)}}var mi=ah,oh=class{constructor(e,t){const s=mo(e,"Tokenizer",["model","decoder","post_processor","pre_tokenizer","normalizer"]);if(s)throw new Error(s);const n=mo(t,"Config");if(n)throw new Error(n);this.tokenizer=e,this.config=t,this.normalizer=ui(this.tokenizer.normalizer),this.pre_tokenizer=_i(this.tokenizer.pre_tokenizer),this.model=T_(this.tokenizer.model,this.config),this.post_processor=fi(this.tokenizer.post_processor),this.decoder=mi(this.tokenizer.decoder),this.special_tokens=[],this.all_special_ids=[],this.added_tokens=[];const r=[],a=[];this.added_tokens_map=new Map;for(const o of this.tokenizer.added_tokens){const i=new Zd(o);if(this.added_tokens.push(i),this.model.tokens_to_ids.set(i.content,i.id),this.model.vocab[i.id]=i.content,i.special&&(this.special_tokens.push(i.content),this.all_special_ids.push(i.id)),this.added_tokens_map.set(i.content,i),i.normalized&&this.normalizer!==null){const l=this.normalizer(i.content);a.push(l),this.added_tokens_map.set(l,i)}else r.push(i.content)}(this.config.additional_special_tokens??[]).forEach(o=>{this.special_tokens.includes(o)||this.special_tokens.push(o)}),this.decoder&&(this.decoder.added_tokens=this.added_tokens,this.decoder.end_of_word_suffix=this.model.end_of_word_suffix),this.splitter_unnormalized=new po(r),this.splitter_normalized=new po(a),this.remove_space=this.config.remove_space,this.clean_up_tokenization_spaces=this.config.clean_up_tokenization_spaces??!0,this.do_lowercase_and_remove_accent=this.config.do_lowercase_and_remove_accent??!1}encode(e,{text_pair:t=null,add_special_tokens:s=!0,return_token_type_ids:n=null}={}){const{tokens:r,token_type_ids:a}=this.tokenize_helper(e,{text_pair:t,add_special_tokens:s}),o=r.map(l=>this.added_tokens_map.get(l)?.id??this.model.tokens_to_ids.get(l)??this.model.unk_token_id),i={ids:o,tokens:r,attention_mask:new Array(o.length).fill(1)};return n&&a&&(i.token_type_ids=a),i}decode(e,t={}){if(!Array.isArray(e)||e.length===0||!ou(e[0]))throw Error("token_ids must be a non-empty array of integers.");let s=e.map(r=>this.model.vocab[Number(r)]??this.model.unk_token);t.skip_special_tokens&&(s=s.filter(r=>!this.special_tokens.includes(r)));let n=this.decoder?this.decoder(s):s.join(" ");return this.decoder&&this.decoder.end_of_word_suffix&&(n=n.replaceAll(this.decoder.end_of_word_suffix," "),t.skip_special_tokens&&(n=n.trim())),(t.clean_up_tokenization_spaces??this.clean_up_tokenization_spaces)&&(n=Jn(n)),n}tokenize(e,{text_pair:t=null,add_special_tokens:s=!1}={}){return this.tokenize_helper(e,{text_pair:t,add_special_tokens:s}).tokens}encode_text(e){if(e===null)return null;const t=this.splitter_unnormalized.split(e);return t.forEach((s,n)=>{const r=this.added_tokens_map.get(s);r&&(r.lstrip&&n>0&&(t[n-1]=t[n-1].trimEnd()),r.rstrip&&n<t.length-1&&(t[n+1]=t[n+1].trimStart()))}),t.flatMap((s,n)=>{if(s.length===0)return[];if(this.added_tokens_map.has(s))return[s];if(this.remove_space===!0&&(s=s.trim().split(/\s+/).join(" ")),this.do_lowercase_and_remove_accent&&(s=lu(s)),this.normalizer!==null&&(s=this.normalizer(s)),s.length===0)return[];const r=this.splitter_normalized.split(s);return r.forEach((a,o)=>{const i=this.added_tokens_map.get(a);i&&(i.lstrip&&o>0&&(r[o-1]=r[o-1].trimEnd()),i.rstrip&&o<r.length-1&&(r[o+1]=r[o+1].trimStart()))}),r.flatMap(a=>{if(a.length===0)return[];if(this.added_tokens_map.has(a))return[a];const o=this.pre_tokenizer!==null?this.pre_tokenizer(a,{section_index:n}):[a];return this.model(o)})})}tokenize_helper(e,{text_pair:t=null,add_special_tokens:s=!0}){const n=this.encode_text(e),r=this.encode_text(t||null);return this.post_processor?this.post_processor(n,r,s):{tokens:Re(n??[],r??[])}}token_to_id(e){return this.model.tokens_to_ids.get(e)}id_to_token(e){return this.model.vocab[e]}get_added_tokens_decoder(){const e=new Map;for(const t of this.added_tokens)e.set(t.id,t);return e}get_vocab(e=!0){const t=new Map;for(let s=0;s<this.model.vocab.length;++s){const n=this.model.vocab[s];(e||!this.added_tokens_map.has(n))&&t.set(n,s)}return t}},ih=oh,k=Object.freeze({Text:"Text",NumericLiteral:"NumericLiteral",StringLiteral:"StringLiteral",Identifier:"Identifier",Equals:"Equals",OpenParen:"OpenParen",CloseParen:"CloseParen",OpenStatement:"OpenStatement",CloseStatement:"CloseStatement",OpenExpression:"OpenExpression",CloseExpression:"CloseExpression",OpenSquareBracket:"OpenSquareBracket",CloseSquareBracket:"CloseSquareBracket",OpenCurlyBracket:"OpenCurlyBracket",CloseCurlyBracket:"CloseCurlyBracket",Comma:"Comma",Dot:"Dot",Colon:"Colon",Pipe:"Pipe",CallOperator:"CallOperator",AdditiveBinaryOperator:"AdditiveBinaryOperator",MultiplicativeBinaryOperator:"MultiplicativeBinaryOperator",ComparisonBinaryOperator:"ComparisonBinaryOperator",UnaryOperator:"UnaryOperator",Comment:"Comment"}),Ne=class{constructor(e,t){this.value=e,this.type=t}};function xo(e){return/\w/.test(e)}function us(e){return/[0-9]/.test(e)}function yo(e){return/\s/.test(e)}var lh=[["{%",k.OpenStatement],["%}",k.CloseStatement],["{{",k.OpenExpression],["}}",k.CloseExpression],["(",k.OpenParen],[")",k.CloseParen],["{",k.OpenCurlyBracket],["}",k.CloseCurlyBracket],["[",k.OpenSquareBracket],["]",k.CloseSquareBracket],[",",k.Comma],[".",k.Dot],[":",k.Colon],["|",k.Pipe],["<=",k.ComparisonBinaryOperator],[">=",k.ComparisonBinaryOperator],["==",k.ComparisonBinaryOperator],["!=",k.ComparisonBinaryOperator],["<",k.ComparisonBinaryOperator],[">",k.ComparisonBinaryOperator],["+",k.AdditiveBinaryOperator],["-",k.AdditiveBinaryOperator],["~",k.AdditiveBinaryOperator],["*",k.MultiplicativeBinaryOperator],["/",k.MultiplicativeBinaryOperator],["%",k.MultiplicativeBinaryOperator],["=",k.Equals]],ch=new Map([["n",`
3
  `],["t"," "],["r","\r"],["b","\b"],["f","\f"],["v","\v"],["'","'"],['"','"'],["\\","\\"]]);function dh(e,t={}){return e.endsWith(`
4
  `)&&(e=e.slice(0,-1)),t.lstrip_blocks&&(e=e.replace(/^[ \t]*({[#%-])/gm,"$1")),t.trim_blocks&&(e=e.replace(/([#%-]})\n/g,"$1")),e.replace(/{%\s*(end)?generation\s*%}/gs,"")}function uh(e,t={}){const s=[],n=dh(e,t);let r=0,a=0;const o=c=>{let d="";for(;c(n[r]);){if(n[r]==="\\"){if(++r,r>=n.length)throw new SyntaxError("Unexpected end of input");const u=n[r++],_=ch.get(u);if(_===void 0)throw new SyntaxError(`Unexpected escaped character: ${u}`);d+=_;continue}if(d+=n[r++],r>=n.length)throw new SyntaxError("Unexpected end of input")}return d},i=()=>{const c=s.at(-1);c&&c.type===k.Text&&(c.value=c.value.trimEnd(),c.value===""&&s.pop())},l=()=>{for(;r<n.length&&yo(n[r]);)++r};e:for(;r<n.length;){const c=s.at(-1)?.type;if(c===void 0||c===k.CloseStatement||c===k.CloseExpression||c===k.Comment){let u="";for(;r<n.length&&!(n[r]==="{"&&(n[r+1]==="%"||n[r+1]==="{"||n[r+1]==="#"));)u+=n[r++];if(u.length>0){s.push(new Ne(u,k.Text));continue}}if(n[r]==="{"&&n[r+1]==="#"){r+=2;const u=n[r]==="-";u&&++r;let _="";for(;n[r]!=="#"||n[r+1]!=="}";){if(r+2>=n.length)throw new SyntaxError("Missing end of comment tag");_+=n[r++]}const h=_.endsWith("-");h&&(_=_.slice(0,-1)),u&&i(),s.push(new Ne(_,k.Comment)),r+=2,h&&l();continue}if(n.slice(r,r+3)==="{%-"){i(),s.push(new Ne("{%",k.OpenStatement)),r+=3;continue}if(n.slice(r,r+3)==="{{-"){i(),s.push(new Ne("{{",k.OpenExpression)),a=0,r+=3;continue}if(o(yo),n.slice(r,r+3)==="-%}"){s.push(new Ne("%}",k.CloseStatement)),r+=3,l();continue}if(n.slice(r,r+3)==="-}}"){s.push(new Ne("}}",k.CloseExpression)),r+=3,l();continue}const d=n[r];if(d==="-"||d==="+"){const u=s.at(-1)?.type;if(u===k.Text||u===void 0)throw new SyntaxError(`Unexpected character: ${d}`);switch(u){case k.Identifier:case k.NumericLiteral:case k.StringLiteral:case k.CloseParen:case k.CloseSquareBracket:break;default:{++r;const _=o(us);s.push(new Ne(`${d}${_}`,_.length>0?k.NumericLiteral:k.UnaryOperator));continue}}}for(const[u,_]of lh){if(u==="}}"&&a>0)continue;if(n.slice(r,r+u.length)===u){s.push(new Ne(u,_)),_===k.OpenExpression?a=0:_===k.OpenCurlyBracket?++a:_===k.CloseCurlyBracket&&--a,r+=u.length;continue e}}if(d==="'"||d==='"'){++r;const u=o(_=>_!==d);s.push(new Ne(u,k.StringLiteral)),++r;continue}if(us(d)){let u=o(us);if(n[r]==="."&&us(n[r+1])){++r;const _=o(us);u=`${u}.${_}`}s.push(new Ne(u,k.NumericLiteral));continue}if(xo(d)){const u=o(xo);s.push(new Ne(u,k.Identifier));continue}throw new SyntaxError(`Unexpected character: ${d}`)}return s}var We=class{type="Statement"},_h=class extends We{constructor(e){super(),this.body=e}type="Program"},hh=class extends We{constructor(e,t,s){super(),this.test=e,this.body=t,this.alternate=s}type="If"},ph=class extends We{constructor(e,t,s,n){super(),this.loopvar=e,this.iterable=t,this.body=s,this.defaultBlock=n}type="For"},fh=class extends We{type="Break"},mh=class extends We{type="Continue"},gh=class extends We{constructor(e,t,s){super(),this.assignee=e,this.value=t,this.body=s}type="Set"},wh=class extends We{constructor(e,t,s){super(),this.name=e,this.args=t,this.body=s}type="Macro"},vh=class extends We{constructor(e){super(),this.value=e}type="Comment"},Oe=class extends We{type="Expression"},Mh=class extends Oe{constructor(e,t,s){super(),this.object=e,this.property=t,this.computed=s}type="MemberExpression"},bo=class extends Oe{constructor(e,t){super(),this.callee=e,this.args=t}type="CallExpression"},Lt=class extends Oe{constructor(e){super(),this.value=e}type="Identifier"},Ht=class extends Oe{constructor(e){super(),this.value=e}type="Literal"},xh=class extends Ht{type="IntegerLiteral"},yh=class extends Ht{type="FloatLiteral"},ko=class extends Ht{type="StringLiteral"},bh=class extends Ht{type="ArrayLiteral"},To=class extends Ht{type="TupleLiteral"},kh=class extends Ht{type="ObjectLiteral"},_s=class extends Oe{constructor(e,t,s){super(),this.operator=e,this.left=t,this.right=s}type="BinaryExpression"},Th=class extends Oe{constructor(e,t){super(),this.operand=e,this.filter=t}type="FilterExpression"},Eh=class extends We{constructor(e,t){super(),this.filter=e,this.body=t}type="FilterStatement"},Ph=class extends Oe{constructor(e,t){super(),this.lhs=e,this.test=t}type="SelectExpression"},Sh=class extends Oe{constructor(e,t,s){super(),this.operand=e,this.negate=t,this.test=s}type="TestExpression"},Ch=class extends Oe{constructor(e,t){super(),this.operator=e,this.argument=t}type="UnaryExpression"},Fh=class extends Oe{constructor(e=void 0,t=void 0,s=void 0){super(),this.start=e,this.stop=t,this.step=s}type="SliceExpression"},Ah=class extends Oe{constructor(e,t){super(),this.key=e,this.value=t}type="KeywordArgumentExpression"},Lh=class extends Oe{constructor(e){super(),this.argument=e}type="SpreadExpression"},Ih=class extends We{constructor(e,t,s){super(),this.call=e,this.callerArgs=t,this.body=s}type="CallStatement"},Oh=class extends Oe{constructor(e,t,s){super(),this.condition=e,this.trueExpr=t,this.falseExpr=s}type="Ternary"};function Nh(e){const t=new _h([]);let s=0;function n(M,y){const E=e[s++];if(!E||E.type!==M)throw new Error(`Parser Error: ${y}. ${E.type} !== ${M}.`);return E}function r(M){if(!l(M))throw new SyntaxError(`Expected ${M}`);++s}function a(){switch(e[s].type){case k.Comment:return new vh(e[s++].value);case k.Text:return c();case k.OpenStatement:return d();case k.OpenExpression:return u();default:throw new SyntaxError(`Unexpected token type: ${e[s].type}`)}}function o(...M){return s+M.length<=e.length&&M.every((y,E)=>y===e[s+E].type)}function i(...M){return e[s]?.type===k.OpenStatement&&e[s+1]?.type===k.Identifier&&M.includes(e[s+1]?.value)}function l(...M){return s+M.length<=e.length&&M.every((y,E)=>e[s+E].type==="Identifier"&&y===e[s+E].value)}function c(){return new ko(n(k.Text,"Expected text token").value)}function d(){if(n(k.OpenStatement,"Expected opening statement token"),e[s].type!==k.Identifier)throw new SyntaxError(`Unknown statement, got ${e[s].type}`);const M=e[s].value;let y;switch(M){case"set":++s,y=_();break;case"if":++s,y=h(),n(k.OpenStatement,"Expected {% token"),r("endif"),n(k.CloseStatement,"Expected %} token");break;case"macro":++s,y=p(),n(k.OpenStatement,"Expected {% token"),r("endmacro"),n(k.CloseStatement,"Expected %} token");break;case"for":++s,y=m(),n(k.OpenStatement,"Expected {% token"),r("endfor"),n(k.CloseStatement,"Expected %} token");break;case"call":{++s;let E=null;o(k.OpenParen)&&(E=B());const U=O();if(U.type!=="Identifier")throw new SyntaxError("Expected identifier following call statement");const Q=B();n(k.CloseStatement,"Expected closing statement token");const pe=[];for(;!i("endcall");)pe.push(a());n(k.OpenStatement,"Expected '{%'"),r("endcall"),n(k.CloseStatement,"Expected closing statement token");const me=new bo(U,Q);y=new Ih(me,E,pe);break}case"break":++s,n(k.CloseStatement,"Expected closing statement token"),y=new fh;break;case"continue":++s,n(k.CloseStatement,"Expected closing statement token"),y=new mh;break;case"filter":{++s;let E=O();E instanceof Lt&&o(k.OpenParen)&&(E=I(E)),n(k.CloseStatement,"Expected closing statement token");const U=[];for(;!i("endfilter");)U.push(a());n(k.OpenStatement,"Expected '{%'"),r("endfilter"),n(k.CloseStatement,"Expected '%}'"),y=new Eh(E,U);break}default:throw new SyntaxError(`Unknown statement type: ${M}`)}return y}function u(){n(k.OpenExpression,"Expected opening expression token");const M=g();return n(k.CloseExpression,"Expected closing expression token"),M}function _(){const M=f();let y=null;const E=[];if(o(k.Equals))++s,y=f();else{for(n(k.CloseStatement,"Expected %} token");!i("endset");)E.push(a());n(k.OpenStatement,"Expected {% token"),r("endset")}return n(k.CloseStatement,"Expected closing statement token"),new gh(M,y,E)}function h(){const M=g();n(k.CloseStatement,"Expected closing statement token");const y=[],E=[];for(;!i("elif","else","endif");)y.push(a());if(i("elif")){++s,++s;const U=h();E.push(U)}else if(i("else"))for(++s,++s,n(k.CloseStatement,"Expected closing statement token");!i("endif");)E.push(a());return new hh(M,y,E)}function p(){const M=O();if(M.type!=="Identifier")throw new SyntaxError("Expected identifier following macro statement");const y=B();n(k.CloseStatement,"Expected closing statement token");const E=[];for(;!i("endmacro");)E.push(a());return new wh(M,y,E)}function f(M=!1){const y=M?O:g,E=[y()],U=o(k.Comma);for(;U&&(++s,E.push(y()),!!o(k.Comma)););return U?new To(E):E[0]}function m(){const M=f(!0);if(!(M instanceof Lt||M instanceof To))throw new SyntaxError(`Expected identifier/tuple for the loop variable, got ${M.type} instead`);if(!l("in"))throw new SyntaxError("Expected `in` keyword following loop variable");++s;const y=g();n(k.CloseStatement,"Expected closing statement token");const E=[];for(;!i("endfor","else");)E.push(a());const U=[];if(i("else"))for(++s,++s,n(k.CloseStatement,"Expected closing statement token");!i("endfor");)U.push(a());return new ph(M,y,E,U)}function g(){return v()}function v(){const M=x();if(l("if")){++s;const y=x();if(l("else")){++s;const E=v();return new Oh(y,M,E)}else return new Ph(M,y)}return M}function x(){let M=b();for(;l("or");){const y=e[s];++s;const E=b();M=new _s(y,M,E)}return M}function b(){let M=T();for(;l("and");){const y=e[s];++s;const E=T();M=new _s(y,M,E)}return M}function T(){let M;for(;l("not");){const y=e[s];++s;const E=T();M=new Ch(y,E)}return M??S()}function S(){let M=A();for(;;){let y;if(l("not","in"))y=new Ne("not in",k.Identifier),s+=2;else if(l("in"))y=e[s++];else if(o(k.ComparisonBinaryOperator))y=e[s++];else break;const E=A();M=new _s(y,M,E)}return M}function A(){let M=j();for(;o(k.AdditiveBinaryOperator);){const y=e[s];++s;const E=j();M=new _s(y,M,E)}return M}function N(){const M=z(O());return o(k.OpenParen)?I(M):M}function I(M){let y=new bo(M,B());return y=z(y),o(k.OpenParen)&&(y=I(y)),y}function B(){n(k.OpenParen,"Expected opening parenthesis for arguments list");const M=G();return n(k.CloseParen,"Expected closing parenthesis for arguments list"),M}function G(){const M=[];for(;!o(k.CloseParen);){let y;if(e[s].type===k.MultiplicativeBinaryOperator&&e[s].value==="*"){++s;const E=g();y=new Lh(E)}else if(y=g(),o(k.Equals)){if(++s,!(y instanceof Lt))throw new SyntaxError("Expected identifier for keyword argument");const E=g();y=new Ah(y,E)}M.push(y),o(k.Comma)&&++s}return M}function D(){const M=[];let y=!1;for(;!o(k.CloseSquareBracket);)o(k.Colon)?(M.push(void 0),++s,y=!0):(M.push(g()),o(k.Colon)&&(++s,y=!0));if(M.length===0)throw new SyntaxError("Expected at least one argument for member/slice expression");if(y){if(M.length>3)throw new SyntaxError("Expected 0-3 arguments for slice expression");return new Fh(...M)}return M[0]}function z(M){for(;o(k.Dot)||o(k.OpenSquareBracket);){const y=e[s];++s;let E;const U=y.type===k.OpenSquareBracket;if(U)E=D(),n(k.CloseSquareBracket,"Expected closing square bracket");else if(E=O(),E.type!=="Identifier")throw new SyntaxError("Expected identifier following dot operator");M=new Mh(M,E,U)}return M}function j(){let M=q();for(;o(k.MultiplicativeBinaryOperator);){const y=e[s++],E=q();M=new _s(y,M,E)}return M}function q(){let M=ee();for(;l("is");){++s;const y=l("not");y&&++s;const E=O();if(!(E instanceof Lt))throw new SyntaxError("Expected identifier for the test");M=new Sh(M,y,E)}return M}function ee(){let M=N();for(;o(k.Pipe);){++s;let y=O();if(!(y instanceof Lt))throw new SyntaxError("Expected identifier for the filter");o(k.OpenParen)&&(y=I(y)),M=new Th(M,y)}return M}function O(){const M=e[s++];switch(M.type){case k.NumericLiteral:{const y=M.value;return y.includes(".")?new yh(Number(y)):new xh(Number(y))}case k.StringLiteral:{let y=M.value;for(;o(k.StringLiteral);)y+=e[s++].value;return new ko(y)}case k.Identifier:return new Lt(M.value);case k.OpenParen:{const y=f();return n(k.CloseParen,"Expected closing parenthesis, got ${tokens[current].type} instead."),y}case k.OpenSquareBracket:{const y=[];for(;!o(k.CloseSquareBracket);)y.push(g()),o(k.Comma)&&++s;return++s,new bh(y)}case k.OpenCurlyBracket:{const y=new Map;for(;!o(k.CloseCurlyBracket);){const E=g();n(k.Colon,"Expected colon between key and value in object literal");const U=g();y.set(E,U),o(k.Comma)&&++s}return++s,new kh(y)}default:throw new SyntaxError(`Unexpected token: ${M.type}`)}}for(;s<e.length;)t.body.push(a());return t}function zh(e,t,s=1){if(t===void 0&&(t=e,e=0),s===0)throw new Error("range() step must not be zero");const n=[];if(s>0)for(let r=e;r<t;r+=s)n.push(r);else for(let r=e;r>t;r+=s)n.push(r);return n}function Eo(e,t,s,n=1){const r=Math.sign(n);r>=0?(t=(t??=0)<0?Math.max(e.length+t,0):Math.min(t,e.length),s=(s??=e.length)<0?Math.max(e.length+s,0):Math.min(s,e.length)):(t=(t??=e.length-1)<0?Math.max(e.length+t,-1):Math.min(t,e.length-1),s=(s??=-1)<-1?Math.max(e.length+s,-1):Math.min(s,e.length-1));const a=[];for(let o=t;r*o<r*s;o+=n)a.push(e[o]);return a}function Dh(e){return e.replace(/\b\w/g,t=>t.toUpperCase())}function Bh(e){return Vh(new Date,e)}function Vh(e,t){const s=new Intl.DateTimeFormat(void 0,{month:"long"}),n=new Intl.DateTimeFormat(void 0,{month:"short"}),r=a=>a<10?"0"+a:a.toString();return t.replace(/%[YmdbBHM%]/g,a=>{switch(a){case"%Y":return e.getFullYear().toString();case"%m":return r(e.getMonth()+1);case"%d":return r(e.getDate());case"%b":return n.format(e);case"%B":return s.format(e);case"%H":return r(e.getHours());case"%M":return r(e.getMinutes());case"%%":return"%";default:return a}})}function Gh(e){return e.replace(/[.*+?^${}()|[\]\\]/g,"\\$&")}function $h(e,t,s,n){if(n===0)return e;let r=n==null||n<0?1/0:n;const a=t.length===0?new RegExp("(?=)","gu"):new RegExp(Gh(t),"gu");return e.replaceAll(a,o=>r>0?(--r,s):o)}var Po=class extends Error{},So=class extends Error{},tt=class{type="RuntimeValue";value;builtins=new Map;constructor(e=void 0){this.value=e}__bool__(){return new V(!!this.value)}toString(){return String(this.value)}},W=class extends tt{type="IntegerValue"},he=class extends tt{type="FloatValue";toString(){return this.value%1===0?this.value.toFixed(1):this.value.toString()}},L=class extends tt{type="StringValue";builtins=new Map([["upper",new ie(()=>new L(this.value.toUpperCase()))],["lower",new ie(()=>new L(this.value.toLowerCase()))],["strip",new ie(()=>new L(this.value.trim()))],["title",new ie(()=>new L(Dh(this.value)))],["capitalize",new ie(()=>new L(this.value.charAt(0).toUpperCase()+this.value.slice(1)))],["length",new W(this.value.length)],["rstrip",new ie(()=>new L(this.value.trimEnd()))],["lstrip",new ie(()=>new L(this.value.trimStart()))],["startswith",new ie(e=>{if(e.length===0)throw new Error("startswith() requires at least one argument");const t=e[0];if(t instanceof L)return new V(this.value.startsWith(t.value));if(t instanceof Y){for(const s of t.value){if(!(s instanceof L))throw new Error("startswith() tuple elements must be strings");if(this.value.startsWith(s.value))return new V(!0)}return new V(!1)}throw new Error("startswith() argument must be a string or tuple of strings")})],["endswith",new ie(e=>{if(e.length===0)throw new Error("endswith() requires at least one argument");const t=e[0];if(t instanceof L)return new V(this.value.endsWith(t.value));if(t instanceof Y){for(const s of t.value){if(!(s instanceof L))throw new Error("endswith() tuple elements must be strings");if(this.value.endsWith(s.value))return new V(!0)}return new V(!1)}throw new Error("endswith() argument must be a string or tuple of strings")})],["split",new ie(e=>{const t=e[0]??new ce;if(!(t instanceof L||t instanceof ce))throw new Error("sep argument must be a string or null");const s=e[1]??new W(-1);if(!(s instanceof W))throw new Error("maxsplit argument must be a number");let n=[];if(t instanceof ce){const r=this.value.trimStart();for(const{0:a,index:o}of r.matchAll(/\S+/g)){if(s.value!==-1&&n.length>=s.value&&o!==void 0){n.push(a+r.slice(o+a.length));break}n.push(a)}}else{if(t.value==="")throw new Error("empty separator");n=this.value.split(t.value),s.value!==-1&&n.length>s.value&&n.push(n.splice(s.value).join(t.value))}return new Y(n.map(r=>new L(r)))})],["replace",new ie(e=>{if(e.length<2)throw new Error("replace() requires at least two arguments");const t=e[0],s=e[1];if(!(t instanceof L&&s instanceof L))throw new Error("replace() arguments must be strings");let n;if(e.length>2?e[2].type==="KeywordArgumentsValue"?n=e[2].value.get("count")??new ce:n=e[2]:n=new ce,!(n instanceof W||n instanceof ce))throw new Error("replace() count argument must be a number or null");return new L($h(this.value,t.value,s.value,n.value))})]])},V=class extends tt{type="BooleanValue"},Rh=/[\x7f-\uffff]/g;function Co(e){return e.replace(Rh,t=>"\\u"+t.charCodeAt(0).toString(16).padStart(4,"0"))}function Et(e,t={},s=0,n=!0){const{indent:r=null,ensureAscii:a=!1,separators:o=null,sortKeys:i=!1}=t;let l,c;switch(o?[l,c]=o:r?(l=",",c=": "):(l=", ",c=": "),e.type){case"NullValue":return"null";case"UndefinedValue":return n?"null":"undefined";case"IntegerValue":case"FloatValue":case"BooleanValue":return JSON.stringify(e.value);case"StringValue":{let d=JSON.stringify(e.value);return a&&(d=Co(d)),d}case"ArrayValue":case"ObjectValue":{const d=r?" ".repeat(r):"",u=`
assets/qwen35-model-7KVn_FLm.js DELETED
@@ -1 +0,0 @@
1
- import{S as V,a as W,_ as H}from"./gpu-ops-BbLjsC0p.js";function K(E){const a=E<<16,e=new ArrayBuffer(4);new Uint32Array(e)[0]=a,new Float32Array(e)[0];const t=a>>>31&1,s=a>>>23&255,i=a&8388607;if(s===0)return t<<15;if(s===255)return t<<15|31744|(i?512:0);const n=s-127+15;return n>=31?t<<15|31744:n<=0?t<<15:t<<15|n<<10|i>>>13}class Y{constructor(a,e,t){if(this.gpu=a,this.config=e,this.textCfg=e.text_config,this.quantConfig=t,this.hiddenSize=this.textCfg.hidden_size,this.intermediateSize=this.textCfg.intermediate_size,this.numLayers=this.textCfg.num_hidden_layers,this.numHeads=this.textCfg.num_attention_heads,this.numKVHeads=this.textCfg.num_key_value_heads,this.headDim=this.textCfg.head_dim,this.vocabSize=this.textCfg.vocab_size,this.rmsEps=this.textCfg.rms_norm_eps,this.ropeTheta=this.textCfg.rope_parameters?.rope_theta||1e7,this.partialRotary=this.textCfg.partial_rotary_factor,this.partialDim=Math.floor(this.headDim*this.partialRotary),this.mropeSection=this.textCfg.rope_parameters?.mrope_section||[11,11,10],this.layerTypes=this.textCfg.layer_types,this.groupSize=t?.group_size||t?.config_groups?.group_0?.weights?.group_size||128,this.fp16Layers=new Set,t?.extra_config)for(const u of Object.keys(t.extra_config))this.fp16Layers.add(u);this.weights={},this.kvCache={},this.seqLen=0,this.pipelines={},this.singlePassMode=!0,this._replayFlat=null,this._forceMinSplits=0,this._ropeDelta=0;const s=this.numHeads*this.headDim,i=this.numKVHeads*this.headDim,n=(s+i)/2,o=Math.ceil(n/4);this._splitQKNormShaderKey=`fused_split_qknorm_kvstore_${o}`,V[this._splitQKNormShaderKey]||(V[this._splitQKNormShaderKey]=W(o))}compilePipelines(){const a=this.gpu;for(const[e,t]of Object.entries(V))this.pipelines[e]=a.getOrCreatePipeline(e,t)}uploadTensors(a){this.quantConfig?.quant_method==="compressed-tensors"&&(a=this._convertCompressedTensors(a));for(const[t,s]of Object.entries(a)){let i=s.data;if(s.dtype==="F32"&&!t.endsWith(".qweight")&&!t.endsWith(".qzeros")&&!t.endsWith(".scales")){const n=new Float32Array(i.buffer,i.byteOffset,i.byteLength/4),o=new Uint16Array(n.length);for(let u=0;u<n.length;u++){const r=new Uint32Array(n.buffer,n.byteOffset+u*4,1)[0];o[u]=r>>>16}i=new Uint8Array(o.buffer)}if(s._partial){let{offset:n,totalSize:o}=s._partial;if(s.dtype==="F32"&&!t.endsWith(".qweight")&&!t.endsWith(".qzeros")&&!t.endsWith(".scales")&&(n/=2,o/=2),n===0){const r=this.gpu.createBuffer(t,o,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC);this.gpu.device.queue.writeBuffer(r,0,i),this.weights[t]=r}else{const r=this.weights[t];r&&this.gpu.device.queue.writeBuffer(r,n,i)}}else this.weights[t]=this.gpu.createBufferFromData(t,i);(t.includes("post_attention_layernorm.weight")||t.includes("input_layernorm.weight")||t.includes("q_norm.weight")||t.includes("k_norm.weight")||t==="model.language_model.norm.weight")&&(this._normWeightRaw||(this._normWeightRaw={}),this._normWeightRaw[t]=new Uint32Array(i.buffer.slice(i.byteOffset,i.byteOffset+i.byteLength))),this._uploadedCount=(this._uploadedCount||0)+1}}_convertCompressedTensors(a){const e={};for(const[t,s]of Object.entries(a))if(!t.endsWith(".weight_shape")){if(t.endsWith(".weight_packed")){const i=t.slice(0,-14),n=s.shape,o=n[0],u=n[1],r=new Int32Array(s.data.buffer,s.data.byteOffset,s.data.byteLength/4),h=new Int32Array(o*u);for(let c=0;c<o;c++)for(let m=0;m<u;m++)h[m*o+c]=r[c*u+m];e[`${i}.qweight`]={dtype:"I32",shape:[u,o],data:new Uint8Array(h.buffer)};continue}if(t.endsWith(".weight_scale")){const i=t.slice(0,-13),n=s.shape,o=n[0],u=n[1],r=new Uint16Array(s.data.buffer,s.data.byteOffset,s.data.byteLength/2),h=Math.ceil(o/2),c=new Uint32Array(u*h);for(let m=0;m<u;m++)for(let f=0;f<o;f+=2){const p=r[f*u+m],w=f+1<o?r[(f+1)*u+m]:0,g=K(p),B=K(w);c[m*h+(f>>1)]=g|B<<16}e[`${i}.scales`]={dtype:"I32",shape:[u,h],data:new Uint8Array(c.buffer)};continue}e[t]=s}return e}async postProcessWeights(){const a=this.textCfg.linear_num_key_heads||0,e=this.hiddenSize,t=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC,s=this.layerTypes.indexOf("linear_attention"),i=s>=0?`model.language_model.layers.${s}.linear_attn`:"";if(this.abQuantized=i&&!!this.weights[`${i}.in_proj_a.qweight`],this.abQuantized){const v=this.textCfg.linear_num_value_heads??a;this.linAlpha=this.gpu.createBuffer("lin_alpha",v*4,t),this.linBeta=this.gpu.createBuffer("lin_beta",v*4,t)}else{this.linABWeight={};const v=this.textCfg.linear_num_value_heads??a,q=e*2,C=v*q,d=2*C;for(let P=0;P<this.numLayers;P++)if(this.layerTypes[P]==="linear_attention"){const k=`model.language_model.layers.${P}.linear_attn`,D=this.weights[`${k}.in_proj_a.weight`],S=this.weights[`${k}.in_proj_b.weight`];if(D&&S){const $=this.gpu.createBuffer(`ab_merged_${P}`,d,t),M=this.gpu.device.createCommandEncoder();M.copyBufferToBuffer(D,0,$,0,C),M.copyBufferToBuffer(S,0,$,C,C),this.gpu.device.queue.submit([M.finish()]),this.linABWeight[P]=$}}}{const v=[];for(let d=0;d<this.numLayers;d++){if(this.layerTypes[d]==="linear_attention"){const P=`model.language_model.layers.${d}.linear_attn`,k=this.textCfg.linear_num_key_heads||0,D=this.textCfg.linear_key_head_dim||128,S=this.textCfg.linear_value_head_dim||128,$=this.textCfg.linear_num_value_heads??k,M=$*S,z=$/k*S,O=k*(D+D+z);v.push({prefix:`${P}.in_proj_qkv`,K:e,N:O}),v.push({prefix:`${P}.in_proj_z`,K:e,N:M}),v.push({prefix:`${P}.out_proj`,K:M,N:e})}else{const P=`model.language_model.layers.${d}.self_attn`,k=this.numHeads*this.headDim*2,D=this.numKVHeads*this.headDim;v.push({prefix:`${P}.q_proj`,K:e,N:k}),v.push({prefix:`${P}.k_proj`,K:e,N:D}),v.push({prefix:`${P}.v_proj`,K:e,N:D}),v.push({prefix:`${P}.o_proj`,K:this.numHeads*this.headDim,N:e})}v.push({prefix:`model.language_model.layers.${d}.mlp.gate_proj`,K:e,N:this.intermediateSize}),v.push({prefix:`model.language_model.layers.${d}.mlp.up_proj`,K:e,N:this.intermediateSize}),v.push({prefix:`model.language_model.layers.${d}.mlp.down_proj`,K:this.intermediateSize,N:e})}let q=0;const C=performance.now();for(const{prefix:d,K:P,N:k}of v)if(!this.weights[`${d}.qweight`]&&this.weights[`${d}.weight`]){const{qweight:D,scales:S}=await this._quantizeBF16ToINT4(this.weights[`${d}.weight`],P,k,this.groupSize,d.replace(/\./g,"_"));this.weights[`${d}.qweight`]=D,this.weights[`${d}.scales`]=S,q++}q>0&&console.log(`[QUANT] GPU-quantized ${q} BF16 projections to INT4 in ${(performance.now()-C).toFixed(0)}ms`)}this._mergedGateUp={};const n=this.intermediateSize,o=e/8,u=o*n*4,h=o/(this.groupSize/8)*n*2;for(let v=0;v<this.numLayers;v++){const q=`model.language_model.layers.${v}.mlp`,C=this.getQWeight(`${q}.gate_proj`),d=this.getQWeight(`${q}.up_proj`);if(C.qweight&&d.qweight){const P=this.gpu.createBuffer(`merged_qw_${v}`,u*2,t),k=this.gpu.createBuffer(`merged_sc_${v}`,h*2,t),D=this.gpu.device.createCommandEncoder();D.copyBufferToBuffer(C.qweight,0,P,0,u),D.copyBufferToBuffer(d.qweight,0,P,u,u),D.copyBufferToBuffer(C.scales,0,k,0,h),D.copyBufferToBuffer(d.scales,0,k,h,h),this.gpu.device.queue.submit([D.finish()]),this._mergedGateUp[v]={qweight:P,scales:k}}}this._fusedMLPParams={};const c=16+512*16;for(let v=0;v<this.numLayers;v++){const q=`model.language_model.layers.${v}.post_attention_layernorm.weight`,C=this._normWeightRaw?.[q];if(!C||!this._mergedGateUp[v])continue;const d=new ArrayBuffer(c),P=new Uint32Array(d),k=new Float32Array(d);P[0]=e,P[1]=n,P[2]=this.groupSize,k[3]=this.rmsEps;for(let D=0;D<C.length;D++)P[4+D]=C[D];this._fusedMLPParams[v]=this.gpu.createBufferFromData(`fused_mlp_params_${v}`,new Uint32Array(d),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST)}this._fusedSQKParams={};const m=this.headDim,f=this.numHeads,p=this.numKVHeads,w=f*m,g=p*m,B=(w+g)/2,_=Math.ceil(B/4),l=32+_*16,b=this.mropeSection[1]*3,U=this.mropeSection[2]*3,y=`fused_split_qknorm_kvstore_${_}`;V[y]||(V[y]=W(_,this.ropeTheta,b,U,this.partialDim)),this.pipelines[y]||(this.pipelines[y]=this.gpu.getOrCreatePipeline(y,V[y])),this._splitQKNormShaderKey=y;for(let v=0;v<this.numLayers;v++){if(this.layerTypes[v]!=="full_attention")continue;const q=`model.language_model.layers.${v}.self_attn`,C=`${q}.q_norm.weight`,d=`${q}.k_norm.weight`,P=this._normWeightRaw?.[C],k=this._normWeightRaw?.[d],D=new ArrayBuffer(l),S=new DataView(D);if(S.setUint32(0,f,!0),S.setUint32(4,p,!0),S.setUint32(8,m,!0),S.setFloat32(12,this.rmsEps,!0),S.setUint32(16,0,!0),S.setUint32(20,0,!0),S.setUint32(24,0,!0),S.setUint32(28,0,!0),P)for(let M=0;M<w/2;M++){const z=Math.floor(M/4),O=M%4;S.setUint32(32+z*16+O*4,P[M],!0)}if(k){const M=w/2;for(let z=0;z<g/2;z++){const O=M+z,G=Math.floor(O/4),R=O%4;S.setUint32(32+G*16+R*4,k[z],!0)}}const $=this.gpu.device.createBuffer({size:l,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:`fused_sqk_${v}`});this.gpu.device.queue.writeBuffer($,0,new Uint8Array(D)),this._fusedSQKParams[v]=$}await this._quantizeLmHead()}async _quantizeLmHead(){const e=(this.config.tie_word_embeddings??this.textCfg.tie_word_embeddings)!==!1?"model.language_model.embed_tokens.weight":"lm_head.weight";if(!this.weights[e])return;const t=this.hiddenSize,s=this.vocabSize,i=this.groupSize,n=t/8,o=t/i,u=performance.now(),r=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC,h=this.weights[e];this._lmHeadQWeight=this.gpu.createBuffer("lmhead_qweight",n*s*4,r);const c=this.gpu.createBuffer("lmhead_scales_f32",o*s*4,r),m=Math.ceil(o*s/2)*4;this._lmHeadScales=this.gpu.createBuffer("lmhead_scales",m,r);const f=this.gpu.getOrCreatePipeline("quantize_bf16_to_int4",(await H(async()=>{const{SHADERS:D}=await import("./gpu-ops-BbLjsC0p.js").then(S=>S.b);return{SHADERS:D}},[])).SHADERS.quantize_bf16_to_int4),p=this.gpu.createBufferFromData("quant_params",new Uint32Array([t,s,i]),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),w=this.gpu.createBindGroup(f,0,[h,this._lmHeadQWeight,c,p]),g=65535,B=Math.min(s,g),_=Math.ceil(s/g),l=this.gpu.device.createCommandEncoder(),b=l.beginComputePass();b.setPipeline(f),b.setBindGroup(0,w),b.dispatchWorkgroups(B,_),b.end(),this.gpu.device.queue.submit([l.finish()]);const U=this.gpu.getOrCreatePipeline("pack_f32_to_f16_pairs",(await H(async()=>{const{SHADERS:D}=await import("./gpu-ops-BbLjsC0p.js").then(S=>S.b);return{SHADERS:D}},[])).SHADERS.pack_f32_to_f16_pairs),y=Math.ceil(o*s/2),v=this.gpu.createBufferFromData("pack_params",new Uint32Array([y]),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),q=this.gpu.createBindGroup(U,0,[c,this._lmHeadScales,v]),C=this.gpu.device.createCommandEncoder(),d=C.beginComputePass();d.setPipeline(U),d.setBindGroup(0,q),d.dispatchWorkgroups(Math.ceil(y/256)),d.end(),this.gpu.device.queue.submit([C.finish()]),await this.gpu.device.queue.onSubmittedWorkDone(),c.destroy(),p.destroy(),v.destroy();const P=(n*s*4/1e6).toFixed(0),k=(m/1e6).toFixed(0);console.log(`[QUANT] lm_head INT4 (GPU): ${(performance.now()-u).toFixed(0)}ms, ${P}MB qw + ${k}MB sc`)}async _quantizeBF16ToINT4(a,e,t,s,i){const n=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC,o=e/8,u=e/s,r=this.gpu.createBuffer(`${i}_qweight`,o*t*4,n),h=this.gpu.createBuffer(`${i}_scales_f32`,u*t*4,n),c=Math.ceil(u*t/2)*4,m=this.gpu.createBuffer(`${i}_scales`,c,n),f=this.gpu.getOrCreatePipeline("quantize_bf16_to_int4",(await H(async()=>{const{SHADERS:P}=await import("./gpu-ops-BbLjsC0p.js").then(k=>k.b);return{SHADERS:P}},[])).SHADERS.quantize_bf16_to_int4),p=this.gpu.createBufferFromData(`${i}_qp`,new Uint32Array([e,t,s]),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),w=this.gpu.createBindGroup(f,0,[a,r,h,p]),g=65535,B=Math.min(t,g),_=Math.ceil(t/g),l=this.gpu.device.createCommandEncoder(),b=l.beginComputePass();b.setPipeline(f),b.setBindGroup(0,w),b.dispatchWorkgroups(B,_),b.end(),this.gpu.device.queue.submit([l.finish()]);const U=this.gpu.getOrCreatePipeline("pack_f32_to_f16_pairs",(await H(async()=>{const{SHADERS:P}=await import("./gpu-ops-BbLjsC0p.js").then(k=>k.b);return{SHADERS:P}},[])).SHADERS.pack_f32_to_f16_pairs),y=Math.ceil(u*t/2),v=this.gpu.createBufferFromData(`${i}_pp`,new Uint32Array([y]),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),q=this.gpu.createBindGroup(U,0,[h,m,v]),C=this.gpu.device.createCommandEncoder(),d=C.beginComputePass();return d.setPipeline(U),d.setBindGroup(0,q),d.dispatchWorkgroups(Math.ceil(y/256)),d.end(),this.gpu.device.queue.submit([C.finish()]),await this.gpu.device.queue.onSubmittedWorkDone(),h.destroy(),p.destroy(),v.destroy(),{qweight:r,scales:m}}initBuffers(a){a||(a=this.textCfg.max_position_embeddings||4096),this.maxSeqLen=a;const e=this.gpu,t=this.hiddenSize,s=this.intermediateSize,i=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC;this.hidden=e.createBuffer("hidden_a",t*4,i),this.hiddenB=e.createBuffer("hidden_b",t*4,i),this.normed=e.createBuffer("normed",t*4,i),this.normedB=e.createBuffer("normed_b",t*4,i),this.mlpIntermediate=e.createBuffer("mlp_inter",s*4,i),this.mlpOut=e.createBuffer("mlp_out",t*4,i),this.logits=e.createBuffer("logits",this.vocabSize*4,i),this.argmaxResult=e.createBuffer("argmax_result",8,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC),this._argmaxReadback=e.device.createBuffer({size:8,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST,label:"argmax_readback"}),this._topkResult=e.createBuffer("topk_result",2048,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC),this._repMaxTokens=64,this._recentTokens=new Uint32Array(this._repMaxTokens),this._recentTokenCount=0,this._gpuRecentTokens=e.createBuffer("recent_tokens",this._repMaxTokens*4,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST);for(let B=0;B<this.numLayers;B++)this.layerTypes[B]==="full_attention"&&(this.kvCache[B]={keys:e.createBuffer(`kv_k_${B}`,a*this.numKVHeads*this.headDim*4),values:e.createBuffer(`kv_v_${B}`,a*this.numKVHeads*this.headDim*4)});this.qProjFull=e.createBuffer("q_proj_full",this.numHeads*this.headDim*2*4,i),this.qProj=e.createBuffer("q_proj_out",this.numHeads*this.headDim*4,i),this.qGate=e.createBuffer("q_gate",this.numHeads*this.headDim*4,i),this.kProj=e.createBuffer("k_proj_out",this.numKVHeads*this.headDim*4,i),this.vProj=e.createBuffer("v_proj_out",this.numKVHeads*this.headDim*4,i);const n=this.textCfg.linear_num_key_heads,o=this.textCfg.linear_key_head_dim,u=this.textCfg.linear_value_head_dim,r=this.textCfg.linear_num_value_heads??n,c=r/n*u,m=n*(o+o+c),f=r*u;this.linValueDim=f,this.linValueHeads=r,this.linQKV=e.createBuffer("lin_qkv",m*4,i),this.linZ=e.createBuffer("lin_z",f*4,i),this.linOut=e.createBuffer("lin_out",f*4,i);const p=Math.max(t,this.numHeads*this.headDim,f)*4;this.attnOut=e.createBuffer("attn_out",p,i),this._maxGqaSplits=64;const w=this.numHeads*this._maxGqaSplits*(this.headDim+2)*4;this._gqaPartials=e.createBuffer("gqa_partials",w,i),this.linState={},this.linConvHist={};for(let B=0;B<this.numLayers;B++)this.layerTypes[B]==="linear_attention"&&(this.linState[B]=e.createBuffer(`lin_state_${B}`,n*o*c*4,i),this.linConvHist[B]=e.createBuffer(`lin_conv_hist_${B}`,3*m*4,i));this.zeroBuf=e.createBuffer("zero_buf",t*4,i),this.useSplitK=!1,this.splitKSplits=1;const g=Math.max(t,s);this.splitKPartials=e.createBuffer("splitk_partials",g*this.splitKSplits*4,i),this.paramBufs={},this._gqaParamBuf=e.device.createBuffer({size:32,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"gqa_params"}),this._kvStoreParamBuf=e.device.createBuffer({size:16,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"kv_store_params"}),this._gqaData=new Uint8Array(32),this._gqaDv=new DataView(this._gqaData.buffer),this._kvData=new Uint8Array(16),this._kvDv=new DataView(this._kvData.buffer)}initVision(){const a=this.config.vision_config;if(!a){this.vision=null;return}const e=this.gpu,t=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC,s=a.hidden_size,i=a.intermediate_size,n=a.out_hidden_size,o=a.depth,u=a.num_heads,r=s/u,h=a.patch_size,c=a.temporal_patch_size,m=a.spatial_merge_size,f=3*c*h*h,p=4096,w=s*m*m;this.vision={V:s,Vi:i,Vo:n,depth:o,heads:u,headDim:r,patchSize:h,temporalPatchSize:c,mergeSize:m,patchInputDim:f,maxVitTokens:p,mergedHidden:w,numPosEmbeddings:a.num_position_embeddings,numGridPerSide:Math.round(Math.sqrt(a.num_position_embeddings)),hidden:e.createBuffer("vit_hidden",p*s*4,t),hiddenB:e.createBuffer("vit_hidden_b",p*s*4,t),normed:e.createBuffer("vit_normed",p*s*4,t),q:e.createBuffer("vit_q",p*s*4,t),k:e.createBuffer("vit_k",p*s*4,t),v:e.createBuffer("vit_v",p*s*4,t),attnOut:e.createBuffer("vit_attn_out",p*s*4,t),mlpInter:e.createBuffer("vit_mlp_inter",p*i*4,t),mlpOut:e.createBuffer("vit_mlp_out",p*s*4,t),cos:e.createBuffer("vit_cos",p*r*4,t),sin:e.createBuffer("vit_sin",p*r*4,t),posEmbed:e.createBuffer("vit_pos_embed",p*s*4,t),merged:e.createBuffer("vit_merged",p/4*n*4,t),mergerNormed:e.createBuffer("vit_merger_normed",p*s*4,t),mergerInter:e.createBuffer("vit_merger_inter",p/4*w*4,t),patchInput:e.createBuffer("vit_patch_input",p*f*4,t),qkv:e.createBuffer("vit_qkv",p*3*s*4,t)},this.imageTokenId=this.config.image_token_id,this.visionStartTokenId=this.config.vision_start_token_id,this.visionEndTokenId=this.config.vision_end_token_id,console.log(`[VISION] Initialized: depth=${o}, hidden=${s}, heads=${u}, headDim=${r}, out=${n}`),console.log(`[VISION] Buffer allocation: ~${((p*s*4*12+p*i*4)/1024/1024).toFixed(0)} MB transient`)}async _readVisionPosEmbed(){const a=this.vision,e=this.weights["model.visual.pos_embed.weight"];if(!e)throw new Error("Vision pos_embed weight not found");const t=a.numPosEmbeddings,s=a.V,i=t*s*2,n=this.gpu.createReadbackBuffer("vit_pos_embed_readback",i),o=this.gpu.device.createCommandEncoder();o.copyBufferToBuffer(e,0,n,0,i),this.gpu.device.queue.submit([o.finish()]),await n.mapAsync(GPUMapMode.READ);const u=new Uint16Array(n.getMappedRange().slice(0));n.unmap(),n.destroy();const r=new Float32Array(t*s);for(let h=0;h<u.length;h++){const c=u[h]<<16,m=new ArrayBuffer(4);new Uint32Array(m)[0]=c,r[h]=new Float32Array(m)[0]}this._vitPosEmbedF32=r,console.log(`[VISION] Read pos_embed: ${t}×${s} (${(i/1024).toFixed(0)} KB)`)}_interpolatePosEmbed(a,e){const t=this.vision,s=t.V,i=t.numGridPerSide,n=this._vitPosEmbedF32,o=t.mergeSize,u=a*e,r=new Float32Array(u*s);for(let p=0;p<a;p++){const w=a===1?0:p*(i-1)/(a-1),g=Math.min(Math.floor(w),i-1),B=Math.min(g+1,i-1),_=w-g;for(let l=0;l<e;l++){const b=e===1?0:l*(i-1)/(e-1),U=Math.min(Math.floor(b),i-1),y=Math.min(U+1,i-1),v=b-U,q=g*i+U,C=g*i+y,d=B*i+U,P=B*i+y,k=(1-_)*(1-v),D=(1-_)*v,S=_*(1-v),$=_*v,M=p*e+l;for(let z=0;z<s;z++)r[M*s+z]=k*n[q*s+z]+D*n[C*s+z]+S*n[d*s+z]+$*n[P*s+z]}}const h=a/o,c=e/o,m=new Float32Array(u*s);let f=0;for(let p=0;p<h;p++)for(let w=0;w<c;w++)for(let g=0;g<o;g++)for(let B=0;B<o;B++){const _=p*o+g,l=w*o+B,b=_*e+l;m.set(r.subarray(b*s,b*s+s),f*s),f++}return m}_computeVisionRoPE(a,e){const t=this.vision;t.headDim/2;const s=t.mergeSize,i=a/s,n=e/s,o=a*e,u=t.headDim/2,r=u/2,h=Math.max(a,e),c=new Float32Array(h*r);for(let w=0;w<h;w++)for(let g=0;g<r;g++){const B=1/Math.pow(1e4,2*g/u);c[w*r+g]=w*B}const m=new Float32Array(o*t.headDim),f=new Float32Array(o*t.headDim);let p=0;for(let w=0;w<i;w++)for(let g=0;g<n;g++)for(let B=0;B<s;B++)for(let _=0;_<s;_++){const l=w*s+B,b=g*s+_,U=p*t.headDim;for(let y=0;y<r;y++){const v=c[l*r+y],q=c[b*r+y];m[U+y]=Math.cos(v),m[U+r+y]=Math.cos(q),m[U+2*r+y]=Math.cos(v),m[U+3*r+y]=Math.cos(q),f[U+y]=Math.sin(v),f[U+r+y]=Math.sin(q),f[U+2*r+y]=Math.sin(v),f[U+3*r+y]=Math.sin(q)}p++}return{cos:m,sin:f}}async preprocessImage(a){const e=this.vision,t=e.patchSize,s=e.mergeSize,i=t*s,n=65536,o=Math.min(16777216,e.maxVitTokens*t*t),u=new Image;await new Promise((k,D)=>{u.onload=k,u.onerror=D,u.src=a});let{width:r,height:h}=u,c=Math.max(i,Math.round(h/i)*i),m=Math.max(i,Math.round(r/i)*i);if(c*m>o){const k=Math.sqrt(h*r/o);c=Math.max(i,Math.floor(h/k/i)*i),m=Math.max(i,Math.floor(r/k/i)*i)}else if(c*m<n){const k=Math.sqrt(n/(h*r));c=Math.ceil(h*k/i)*i,m=Math.ceil(r*k/i)*i}const p=new OffscreenCanvas(m,c).getContext("2d");p.drawImage(u,0,0,m,c);const g=p.getImageData(0,0,m,c).data,B=[.5,.5,.5],_=[.5,.5,.5],l=c/t,b=m/t,U=l*b,y=l/s,v=b/s,q=e.temporalPatchSize,C=3*q*t*t,d=new Float32Array(U*C);let P=0;for(let k=0;k<y;k++)for(let D=0;D<v;D++)for(let S=0;S<s;S++)for(let $=0;$<s;$++){const M=k*s+S,z=D*s+$,O=M*t,G=z*t,R=P*C;for(let F=0;F<q;F++)for(let T=0;T<3;T++)for(let x=0;x<t;x++)for(let A=0;A<t;A++){const j=((O+x)*m+(G+A))*4+T,L=(g[j]/255-B[T])/_[T],Q=((T*q+F)*t+x)*t+A;d[R+Q]=L}P++}return console.log(`[VISION] Preprocessed: ${r}x${h} → ${m}x${c}, ${U} patches (${l}x${b}), merge→${U/4} tokens`),{pixels:d,gridH:l,gridW:b,numPatches:U,imgW:m,imgH:c}}async visionForward(a){const e=this.vision,t=this.gpu,{pixels:s,gridH:i,gridW:n,numPatches:o}=a;this._vitPosEmbedF32||await this._readVisionPosEmbed(),t.device.queue.writeBuffer(e.patchInput,0,s);const u=this._interpolatePosEmbed(i,n);t.device.queue.writeBuffer(e.posEmbed,0,u);const{cos:r,sin:h}=this._computeVisionRoPE(i,n);t.device.queue.writeBuffer(e.cos,0,r),t.device.queue.writeBuffer(e.sin,0,h),t.beginBatch();const c=this.weights["model.visual.patch_embed.proj.weight"],m=this.weights["model.visual.patch_embed.proj.bias"],f=this.makeUniform("vit_patch_params",[e.patchInputDim,e.V]);t.dispatch(this.pipelines.vit_bf16_matvec_bias,[t.createBindGroup(this.pipelines.vit_bf16_matvec_bias,0,[e.patchInput,c,m,e.hidden,f])],Math.ceil(e.V/32),o);const p=this.makeUniform("vit_add_len",[o*e.V]);t.dispatch(this.pipelines.vit_add,[t.createBindGroup(this.pipelines.vit_add,0,[e.hidden,e.posEmbed,p])],Math.ceil(o*e.V/256));for(let d=0;d<e.depth;d++)t.endBatch(),t.beginBatch(),this._vitBlock(d,o);const w=this.weights["model.visual.merger.norm.weight"],g=this.weights["model.visual.merger.norm.bias"],B=this.makeUniform("vit_merger_ln_params",[e.V,1e-6]);t.dispatch(this.pipelines.vit_layernorm,[t.createBindGroup(this.pipelines.vit_layernorm,0,[e.hidden,w,g,e.mergerNormed,B])],o);const _=o/4,l=this.weights["model.visual.merger.linear_fc1.weight"],b=this.weights["model.visual.merger.linear_fc1.bias"],U=this.makeUniform("vit_merger_fc1_params",[e.mergedHidden,e.mergedHidden]);t.dispatch(this.pipelines.vit_bf16_matvec_bias,[t.createBindGroup(this.pipelines.vit_bf16_matvec_bias,0,[e.mergerNormed,l,b,e.mergerInter,U])],Math.ceil(e.mergedHidden/32),_);const y=this.makeUniform("vit_gelu_len",[_*e.mergedHidden]);t.dispatch(this.pipelines.vit_gelu,[t.createBindGroup(this.pipelines.vit_gelu,0,[e.mergerInter,y])],Math.ceil(_*e.mergedHidden/256));const v=this.weights["model.visual.merger.linear_fc2.weight"],q=this.weights["model.visual.merger.linear_fc2.bias"],C=this.makeUniform("vit_merger_fc2_params",[e.mergedHidden,e.Vo]);return t.dispatch(this.pipelines.vit_bf16_matvec_bias,[t.createBindGroup(this.pipelines.vit_bf16_matvec_bias,0,[e.mergerInter,v,q,e.merged,C])],Math.ceil(e.Vo/32),_),t.endBatch(),await t.device.queue.onSubmittedWorkDone(),console.log(`[VISION] Forward done: ${o} patches → ${_} merged tokens (dim=${e.Vo})`),{numMergedTokens:_,embedBuffer:e.merged}}_vitBlock(a,e){const t=this.vision,s=this.gpu,i=`model.visual.blocks.${a}`,n=this.weights[`${i}.norm1.weight`],o=this.weights[`${i}.norm1.bias`],u=this.makeUniform(`vit_ln1_${a}`,[t.V,1e-6]);s.dispatch(this.pipelines.vit_layernorm,[s.createBindGroup(this.pipelines.vit_layernorm,0,[t.hidden,n,o,t.normed,u])],e);const r=this.weights[`${i}.attn.qkv.weight`],h=this.weights[`${i}.attn.qkv.bias`],c=t.V,m=c*c*2,f=c*2,p=this.makeUniform(`vit_qkv_mv_${a}`,[c,c]);for(let M=0;M<3;M++){const z=[t.q,t.k,t.v][M],O=s.createBindGroupWithOffsets(this.pipelines.vit_bf16_matvec_bias,0,[t.normed,{buffer:r,offset:M*m,size:m},{buffer:h,offset:M*f,size:f},z,p]);s.dispatch(this.pipelines.vit_bf16_matvec_bias,[O],Math.ceil(c/32),e)}const w=this.makeUniform(`vit_rope_${a}`,[e,t.heads,t.headDim]);s.dispatch(this.pipelines.vit_rope,[s.createBindGroup(this.pipelines.vit_rope,0,[t.q,t.k,t.attnOut,t.mlpOut,t.cos,t.sin,w])],Math.ceil(e*t.heads*t.headDim/256));const g=1/Math.sqrt(t.headDim),B=this.makeUniform(`vit_attn_${a}`,[e,t.heads,t.headDim,g]);s.dispatch(this.pipelines.vit_attention,[s.createBindGroup(this.pipelines.vit_attention,0,[t.attnOut,t.mlpOut,t.v,t.q,B])],e,t.heads);const _=this.weights[`${i}.attn.proj.weight`],l=this.weights[`${i}.attn.proj.bias`],b=this.makeUniform(`vit_proj_${a}`,[c,c]);s.dispatch(this.pipelines.vit_bf16_matvec_bias,[s.createBindGroup(this.pipelines.vit_bf16_matvec_bias,0,[t.q,_,l,t.mlpOut,b])],Math.ceil(c/32),e);const U=this.makeUniform(`vit_res1_${a}`,[e*c]);s.dispatch(this.pipelines.vit_add,[s.createBindGroup(this.pipelines.vit_add,0,[t.hidden,t.mlpOut,U])],Math.ceil(e*c/256));const y=this.weights[`${i}.norm2.weight`],v=this.weights[`${i}.norm2.bias`];s.dispatch(this.pipelines.vit_layernorm,[s.createBindGroup(this.pipelines.vit_layernorm,0,[t.hidden,y,v,t.normed,u])],e);const q=this.weights[`${i}.mlp.linear_fc1.weight`],C=this.weights[`${i}.mlp.linear_fc1.bias`],d=this.makeUniform(`vit_mlp_fc1_${a}`,[c,t.Vi]);s.dispatch(this.pipelines.vit_bf16_matvec_bias,[s.createBindGroup(this.pipelines.vit_bf16_matvec_bias,0,[t.normed,q,C,t.mlpInter,d])],Math.ceil(t.Vi/32),e);const P=this.makeUniform(`vit_gelu_${a}`,[e*t.Vi]);s.dispatch(this.pipelines.vit_gelu_tanh,[s.createBindGroup(this.pipelines.vit_gelu_tanh,0,[t.mlpInter,P])],Math.ceil(e*t.Vi/256));const k=this.weights[`${i}.mlp.linear_fc2.weight`],D=this.weights[`${i}.mlp.linear_fc2.bias`],S=this.makeUniform(`vit_mlp_fc2_${a}`,[t.Vi,c]);s.dispatch(this.pipelines.vit_bf16_matvec_bias,[s.createBindGroup(this.pipelines.vit_bf16_matvec_bias,0,[t.mlpInter,k,D,t.mlpOut,S])],Math.ceil(c/32),e);const $=this.makeUniform(`vit_res2_${a}`,[e*c]);s.dispatch(this.pipelines.vit_add,[s.createBindGroup(this.pipelines.vit_add,0,[t.hidden,t.mlpOut,$])],Math.ceil(e*c/256))}computeMultimodalPositions(a,e,t,s){const i=this.vision.mergeSize,n=t/i,o=s/i,u=a.length,r=new Array(3);for(let f=0;f<3;f++)r[f]=new Int32Array(u);let h=0,c=0;for(let f=0;f<u;f++)if(a[f]===this.imageTokenId){const w=c,g=Math.floor(w/o),B=w%o;r[0][f]=h,r[1][f]=h+g,r[2][f]=h+B,c++,c===e&&(h+=Math.max(n,o))}else r[0][f]=h,r[1][f]=h,r[2][f]=h,h++;const m=h-u;return{positionIds3D:r,ropeDelta:m}}initB2Buffers(){const a=this.gpu,e=this.hiddenSize,t=this.intermediateSize,s=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC;this.b2={},this.b2.hidden=a.createBuffer("b2_hidden_a",2*e*4,s),this.b2.hiddenB=a.createBuffer("b2_hidden_b",2*e*4,s),this.b2.normed=a.createBuffer("b2_normed",2*e*4,s),this.b2.mlpIntermediate=a.createBuffer("b2_mlp_inter",2*t*4,s),this.b2.mlpOut=a.createBuffer("b2_mlp_out",2*e*4,s),this.b2.logits=a.createBuffer("b2_logits",2*this.vocabSize*4,s),this.b2.zeroBuf=a.createBuffer("b2_zero_buf",2*e*4,s),this.b2.qProjFull=a.createBuffer("b2_q_proj_full",2*this.numHeads*this.headDim*2*4,s),this.b2.qProj=a.createBuffer("b2_q_proj_out",2*this.numHeads*this.headDim*4,s),this.b2.qGate=a.createBuffer("b2_q_gate",2*this.numHeads*this.headDim*4,s),this.b2.kProj=a.createBuffer("b2_k_proj_out",2*this.numKVHeads*this.headDim*4,s),this.b2.vProj=a.createBuffer("b2_v_proj_out",2*this.numKVHeads*this.headDim*4,s);const i=this.textCfg.linear_num_key_heads,n=this.textCfg.linear_key_head_dim,o=this.textCfg.linear_value_head_dim,h=this.linValueHeads/i*o,c=i*(n+n+h),m=this.linValueDim;this.b2.linQKV=a.createBuffer("b2_lin_qkv",2*c*4,s),this.b2.linZ=a.createBuffer("b2_lin_z",2*m*4,s),this.b2.linOut=a.createBuffer("b2_lin_out",2*m*4,s);const f=Math.max(e,this.numHeads*this.headDim,m)*4;if(this.b2.attnOut=a.createBuffer("b2_attn_out",2*f,s),this.abQuantized){const p=this.textCfg.linear_num_value_heads??i;this.b2.linAlpha=a.createBuffer("b2_lin_alpha",2*p*4,s),this.b2.linBeta=a.createBuffer("b2_lin_beta",2*p*4,s)}this.b2.topkResult0=a.createBuffer("b2_topk0",2048,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC),this.b2.topkResult1=a.createBuffer("b2_topk1",2048,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC),this.b2.topkReadback0=a.device.createBuffer({size:2048,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST,label:"b2_topk_rb0"}),this.b2.topkReadback1=a.device.createBuffer({size:2048,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST,label:"b2_topk_rb1"}),this.b2.argmaxResult0=a.createBuffer("b2_argmax0",8,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC),this.b2.argmaxResult1=a.createBuffer("b2_argmax1",8,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC),this.b2.argmaxReadback0=a.device.createBuffer({size:8,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST,label:"b2_argmax_rb0"}),this.b2.argmaxReadback1=a.device.createBuffer({size:8,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST,label:"b2_argmax_rb1"}),this.b2._gqaParamBuf0=a.device.createBuffer({size:32,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"b2_gqa_params0"}),this.b2._gqaParamBuf1=a.device.createBuffer({size:32,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"b2_gqa_params1"}),this.b2._dims={H:e,I:t,linQKVDim:c,valueDim:m,linHeads:i,linKeyDim:n,linValDim:o,linEVD:h,qProjSize:this.numHeads*this.headDim,qProjFullSize:this.numHeads*this.headDim*2,kProjSize:this.numKVHeads*this.headDim,vProjSize:this.numKVHeads*this.headDim,attnOutElems:f/4},console.log(`[B2] Allocated batch=2 buffers (${((2*(e+e+e+t+e+this.vocabSize)*4+2*f)/1e6).toFixed(1)} MB activations)`)}runWithOffsets(a,e,t,s=1){const i=this.pipelines[a],n=this.gpu.createBindGroupWithOffsets(i,0,e);this.gpu.dispatch(i,[n],t,s)}makeUniform(a,e){const t=new ArrayBuffer(Math.max(16,Math.ceil(e.length*4/16)*16)),s=new DataView(t);for(let o=0;o<e.length;o++)Number.isInteger(e[o])?s.setUint32(o*4,e[o],!0):s.setFloat32(o*4,e[o],!0);const i=a+"_"+e.join("_");if(this.paramBufs[i])return this.paramBufs[i];const n=this.gpu.device.createBuffer({size:t.byteLength,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:a});return this.gpu.device.queue.writeBuffer(n,0,new Uint8Array(t)),this.paramBufs[i]=n,n}_makeMixedUniform(a,e){const t=new ArrayBuffer(Math.max(16,Math.ceil(e.length*4/16)*16)),s=new DataView(t);for(let n=0;n<e.length;n++){const o=e[n];o.u!==void 0?s.setUint32(n*4,o.u,!0):s.setFloat32(n*4,o.f,!0)}this._mixedUniformBufs||(this._mixedUniformBufs={});let i=this._mixedUniformBufs[a];return i||(i=this.gpu.device.createBuffer({size:t.byteLength,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:a}),this._mixedUniformBufs[a]=i),this.gpu.device.queue.writeBuffer(i,0,new Uint8Array(t)),i}makeUniformTyped(a,e,t){const s=e.length+t.length,i=new ArrayBuffer(Math.max(16,Math.ceil(s*4/16)*16)),n=new DataView(i);let o=0;for(const h of e)n.setUint32(o,h,!0),o+=4;for(const h of t)n.setFloat32(o,h,!0),o+=4;const u=a+"_"+[...e,...t].join("_");if(this.paramBufs[u])return this.paramBufs[u];const r=this.gpu.device.createBuffer({size:i.byteLength,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:a});return this.gpu.device.queue.writeBuffer(r,0,new Uint8Array(i)),this.paramBufs[u]=r,r}makeFusedNormMLPUniform(a){const e=`fused_anorm_mlp_${a}`;if(this.paramBufs[e])return this.paramBufs[e];const t=this.hiddenSize,s=this.intermediateSize,i=16+640*16,n=new ArrayBuffer(i),o=new DataView(n);o.setUint32(0,t,!0),o.setUint32(4,s,!0),o.setUint32(8,this.groupSize,!0),o.setFloat32(12,this.rmsEps,!0);const u=`model.language_model.layers.${a}.post_attention_layernorm.weight`,r=this._normWeightRaw?.[u];if(!r)throw new Error(`Norm weight not cached for layer ${a}`);for(let c=0;c<t/2;c++){const m=Math.floor(c/4),f=c%4;o.setUint32(16+m*16+f*4,r[c],!0)}const h=this.gpu.device.createBuffer({size:i,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:e});return this.gpu.device.queue.writeBuffer(h,0,new Uint8Array(n)),this.paramBufs[e]=h,h}fusedNormGptqOp(a,e,t,s,i,n,o,u){const r=this.getQWeight(s);if(!r.qweight)return null;const h=this.makeFusedNormGPTQUniform(i,o),c=u?"fused_norm_gptq":"fused_norm_gptq_noadd",m=u?"fnorm_gptq_":"fnorm_gptqna_",f=u?[a,e,r.qweight,r.scales,t,h]:[a,r.qweight,r.scales,t,h];return this.prepOpCached(`${m}${s}`,c,f,this.wg4(o))}makeFusedNormGPTQUniform(a,e){const t=`fused_norm_gptq_${a}_${e}`;if(this.paramBufs[t])return this.paramBufs[t];const s=this.hiddenSize,i=16+512*16,n=new ArrayBuffer(i),o=new DataView(n);o.setUint32(0,s,!0),o.setUint32(4,e,!0),o.setUint32(8,this.groupSize,!0),o.setFloat32(12,this.rmsEps,!0);const u=this._normWeightRaw?.[a];if(!u)throw new Error(`Norm weight not cached: ${a}`);for(let h=0;h<s/2;h++){const c=Math.floor(h/4),m=h%4;o.setUint32(16+c*16+m*4,u[h],!0)}const r=this.gpu.device.createBuffer({size:i,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:t});return this.gpu.device.queue.writeBuffer(r,0,new Uint8Array(n)),this.paramBufs[t]=r,r}run(a,e,t,s=1){const i=this.pipelines[a],n=this.gpu.createBindGroup(i,0,e);this.gpu.dispatch(i,[n],t,s)}runCached(a,e,t,s,i=1){const n=this.pipelines[e],o=this.gpu.getCachedBindGroup(a,n,0,t);this.gpu.dispatch(n,[o],s,i)}prepOp(a,e,t,s=1){const i=this.pipelines[a],n=this.gpu.createBindGroup(i,0,e);return{pipeline:i,bindGroups:[n],workgroupsX:t,workgroupsY:s}}prepOpCached(a,e,t,s,i=1){const n=this.pipelines[e],o=this.gpu.getCachedBindGroup(a,n,0,t);return{pipeline:n,bindGroups:[o],workgroupsX:s,workgroupsY:i}}wg(a){return Math.ceil(a/256)}wg4(a){return Math.ceil(a/32)}wg8(a){return Math.ceil(a/8)}getQWeight(a){return{qweight:this.weights[a+".qweight"],scales:this.weights[a+".scales"],qzeros:this.weights[a+".qzeros"]}}embedding(a){const e=this.weights["model.language_model.embed_tokens.weight"],t=this.makeUniform("emb_params",[a,this.hiddenSize]);this.run("embedding",[e,this.hidden,t],this.wg(this.hiddenSize))}embeddingFromVisionBuffer(a,e){const t=this.hiddenSize;this.gpu.copyBuffer(a,this.hidden,t*4,e*t*4,0)}embeddingFromArgmax(){const a=this.weights["model.language_model.embed_tokens.weight"];this._embedArgmaxParams||(this._embedArgmaxParams=this.makeUniform("emb_argmax_params",[this.hiddenSize])),this.runCached("embed_from_argmax","embed_from_argmax",[a,this.hidden,this.argmaxResult,this._embedArgmaxParams],this.wg(this.hiddenSize))}_dispatchLmHead(){if(this._lmHeadQWeight){const a=this.hiddenSize,e=this.vocabSize,s=a/this.groupSize%4===0,i=this.makeUniform(`mv_${a}_${e}`,[a,e,this.groupSize]);if(s){const n=this.gpu.hasF16?"gptq_matvec_4t_f16":"gptq_matvec_4t";this.runCached("lm_head",n,[this.normed,this._lmHeadQWeight,this._lmHeadScales,this.logits,i],this.wg8(e))}else{const n=this.gpu.hasF16?"gptq_matvec_f16":"gptq_matvec";this.runCached("lm_head",n,[this.normed,this._lmHeadQWeight,this._lmHeadScales,this.logits,i],this.wg4(e))}}else{const e=(this.config.tie_word_embeddings??this.textCfg.tie_word_embeddings)!==!1?"model.language_model.embed_tokens.weight":"lm_head.weight",t=this.weights[e],s=this.makeUniform("lmhead_params",[this.hiddenSize,this.vocabSize]);this.runCached("lm_head","bf16_matvec",[this.normed,t,this.logits,s],this.wg(this.vocabSize))}}rmsNorm(a,e,t,s){const i=this.makeUniform("rmsnorm_params",[this.hiddenSize,this.rmsEps]);s?this.runCached(s,"rmsnorm",[a,t,e,i],1):this.run("rmsnorm",[a,t,e,i],1)}gptqMatvec(a,e,t,s,i){const n=this.getQWeight(t);if(!n.qweight)return;const o=s/this.groupSize,u=o%4===0;if(this.useSplitK&&u){let r=this.splitKSplits;for(;r>1&&o%(r*4)!==0;)r>>=1;if(r>1){const h=this.makeUniform(`mv_sk_${s}_${i}_${r}`,[s,i,this.groupSize,r]);this.runCached(`sk4tf16_${t}`,"gptq_matvec_4t_f16_sk",[a,n.qweight,n.scales,this.splitKPartials,h],this.wg8(i),r);const c=this.makeUniform(`rsk_${i}_${r}`,[i,r]);this.runCached(`rsk_${t}`,"reduce_splitk",[this.splitKPartials,e,c],this.wg(i));return}}if(u){const r=this.gpu.hasF16?"gptq_matvec_4t_f16":"gptq_matvec_4t",h=this.gpu.hasF16?"gptq4tf16_":"gptq4t_",c=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]);this.runCached(`${h}${t}`,r,[a,n.qweight,n.scales,e,c],this.wg8(i))}else{const r=this.gpu.hasF16?"gptq_matvec_f16":"gptq_matvec",h=this.gpu.hasF16?"gptqf16_":"gptq_",c=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]);this.runCached(`${h}${t}`,r,[a,n.qweight,n.scales,e,c],this.wg4(i))}}gptqMatvecOp(a,e,t,s,i){const n=this.getQWeight(t);if(!n.qweight)return null;if(s/this.groupSize%4===0){const r=this.gpu.hasF16?"gptq_matvec_4t_f16":"gptq_matvec_4t",h=this.gpu.hasF16?"gptq4tf16_":"gptq4t_",c=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]);return this.prepOpCached(`${h}${t}`,r,[a,n.qweight,n.scales,e,c],this.wg8(i))}else{const r=this.gpu.hasF16?"gptq_matvec_f16":"gptq_matvec",h=this.gpu.hasF16?"gptqf16_":"gptq_",c=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]);return this.prepOpCached(`${h}${t}`,r,[a,n.qweight,n.scales,e,c],this.wg4(i))}}bf16Matvec(a,e,t,s,i,n){const o=this.makeUniform(`bf16mv_${s}_${i}`,[s,i]);n?this.runCached(n,"bf16_matvec",[a,t,e,o],this.wg(i)):this.run("bf16_matvec",[a,t,e,o],this.wg(i))}siluMul(a,e,t,s,i){const n=this.makeUniform(`silu_${s}`,[s]);i?this.runCached(i,"silu_mul",[a,e,t,n],this.wg(s)):this.run("silu_mul",[a,e,t,n],this.wg(s))}addVectors(a,e,t,s){const i=this.makeUniform(`add_${t}`,[t]);s?this.runCached(s,"add",[a,e,i],this.wg(t)):this.run("add",[a,e,i],this.wg(t))}addAndRmsNorm(a,e,t,s,i){const n=this.makeUniform("add_rmsnorm_params",[this.hiddenSize,this.rmsEps]);i?this.runCached(i,"add_rmsnorm",[a,e,s,t,n],1):this.run("add_rmsnorm",[a,e,s,t,n],1)}mlp(a,e){const t=`model.language_model.layers.${e}.mlp`,s=this.hiddenSize,i=this.intermediateSize,n=this.getQWeight(`${t}.gate_proj`),o=this.getQWeight(`${t}.up_proj`),u=this.makeUniform(`fused_mv_${s}_${i}`,[s,i,this.groupSize]);if(s/this.groupSize%4===0){const c=this.gpu.hasF16?"fused_gate_up_silu_4t_f16":"fused_gate_up_silu_4t",m=this.gpu.hasF16?"fused_gus4t16_":"fused_gus4t_";this.runCached(`${m}${e}`,c,[a,n.qweight,n.scales,o.qweight,o.scales,this.mlpIntermediate,u],this.wg8(i))}else{const c=this.gpu.hasF16?"fused_gate_up_silu_f16":"fused_gate_up_silu",m=this.gpu.hasF16?"fused_gus16_":"fused_gus_";this.runCached(`${m}${e}`,c,[a,n.qweight,n.scales,o.qweight,o.scales,this.mlpIntermediate,u],this.wg4(i))}this.gptqMatvec(this.mlpIntermediate,this.mlpOut,`${t}.down_proj`,i,s)}fullAttentionFused(a,e,t,s,i,n,o,u){i=i||this.normed;const r=`model.language_model.layers.${t}.self_attn`,h=this.hiddenSize,c=this.headDim,m=this.numHeads,f=this.numKVHeads,p=m/f,w=t===0,g=`model.language_model.layers.${t}.input_layernorm.weight`,B=this.weights[g];if(w){const z=this.makeUniform("add_rmsnorm_params",[h,this.rmsEps]);this.runCached(`fa_norm_${t}`,"rmsnorm",[a,B,i,z],1)}else{const z=this.makeUniform("add_rmsnorm_params",[h,this.rmsEps]);this.runCached(`fa_norm_${t}`,"add_rmsnorm_ro",[a,e,B,i,z],1)}const _=this.gptqMatvecOp(i,this.qProjFull,`${r}.q_proj`,h,m*c*2),l=this.gptqMatvecOp(i,this.kProj,`${r}.k_proj`,h,f*c),b=this.gptqMatvecOp(i,this.vProj,`${r}.v_proj`,h,f*c);this.gpu.dispatchMulti([_,l,b].filter(Boolean));const U=this.kvCache[t],y=this._fusedSQKParams[t],v=u??s;this._gqaDv.setUint32(0,v,!0),this._gqaDv.setUint32(4,s,!0),this._gqaDv.setUint32(8,n??s,!0),this._gqaDv.setUint32(12,o??s,!0),this.gpu.device.queue.writeBuffer(y,16,this._gqaData,0,16),this.runCached(`fa_sqk_${t}`,this._splitQKNormShaderKey,[this.qProjFull,this.kProj,this.vProj,this.qProj,this.qGate,U.keys,U.values,y],m+f);const q=(u??s)+1,C=this._forceMinSplits||1,d=Math.max(C,Math.min(Math.max(1,Math.ceil(q/32)),this._maxGqaSplits)),P=d>1?this._gqaPartials:this.attnOut;if(this._gqaDv.setUint32(0,q,!0),this._gqaDv.setUint32(4,c,!0),this._gqaDv.setUint32(8,f,!0),this._gqaDv.setUint32(12,m,!0),this._gqaDv.setUint32(16,p,!0),this._gqaDv.setUint32(20,d,!0),this.gpu.device.queue.writeBuffer(this._gqaParamBuf,0,this._gqaData),this.run("gqa_attention_head",[this.qProj,U.keys,U.values,P,this._gqaParamBuf],m,d),d>1){this._gqaReduceParams||(this._gqaReduceParams=this.gpu.device.createBuffer({size:16,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"gqa_reduce_params"}));const z=new Uint8Array(16),O=new DataView(z.buffer);O.setUint32(0,c,!0),O.setUint32(4,d,!0),O.setUint32(8,m,!0),this.gpu.device.queue.writeBuffer(this._gqaReduceParams,0,z),this.run("gqa_reduce",[this._gqaPartials,this.attnOut,this._gqaReduceParams],m)}const k=this.getQWeight(`${r}.o_proj`),D=m*c,$=D/this.groupSize%4===0,M=this.makeUniform(`fused_sig_mv_${D}_${h}`,[D,h,this.groupSize]);if($){const z=this.gpu.hasF16?"fused_sigmoid_gptq_4t_f16":"fused_sigmoid_gptq_4t",O=this.gpu.hasF16?"fused_sig4t16_oproj_":"fused_sig4t_oproj_";this.runCached(`${O}${t}`,z,[this.attnOut,this.qGate,k.qweight,k.scales,this.qProj,M],this.wg8(h))}else{const z=this.gpu.hasF16?"fused_sigmoid_gptq_f16":"fused_sigmoid_gptq",O=this.gpu.hasF16?"fused_sig16_oproj_":"fused_sig_oproj_";this.runCached(`${O}${t}`,z,[this.attnOut,this.qGate,k.qweight,k.scales,this.qProj,M],this.wg4(h))}}linearAttentionFused(a,e,t,s){s=s||this.normed;const i=`model.language_model.layers.${t}.linear_attn`,n=this.hiddenSize,o=this.textCfg.linear_num_key_heads,u=this.textCfg.linear_key_head_dim,r=this.textCfg.linear_value_head_dim,h=this.linValueHeads,m=h/o*r,f=o*(u+u+m),p=this.linValueDim,w=t===0,g=`model.language_model.layers.${t}.input_layernorm.weight`,B=this.weights[g];if(w){const d=this.makeUniform("add_rmsnorm_params",[n,this.rmsEps]);this.runCached(`la_norm_${t}`,"rmsnorm",[a,B,s,d],1)}else{const d=this.makeUniform("add_rmsnorm_params",[n,this.rmsEps]);this.runCached(`la_norm_${t}`,"add_rmsnorm_ro",[a,e,B,s,d],1)}{const d=[this.gptqMatvecOp(s,this.linQKV,`${i}.in_proj_qkv`,n,f),this.gptqMatvecOp(s,this.linZ,`${i}.in_proj_z`,n,p)];this.abQuantized&&(d.push(this.gptqMatvecOp(s,this.linAlpha,`${i}.in_proj_a`,n,h)),d.push(this.gptqMatvecOp(s,this.linBeta,`${i}.in_proj_b`,n,h))),this.gpu.dispatchMulti(d.filter(Boolean))}const _=this.weights[`${i}.conv1d.weight`],l=this.weights[`${i}.A_log`],b=this.weights[`${i}.dt_bias`],U=this.weights[`${i}.norm.weight`];if(this.abQuantized){const d=`fused_cdn_q_${o}_${u}_${r}_${f}_${h}`;let P=this.paramBufs[d];if(!P){const k=new ArrayBuffer(32),D=new DataView(k);D.setUint32(0,o,!0),D.setUint32(4,u,!0),D.setUint32(8,r,!0),D.setUint32(12,f,!0),D.setFloat32(16,this.rmsEps,!0),D.setUint32(20,0,!0),D.setUint32(24,h,!0),P=this.gpu.device.createBuffer({size:32,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"fused_cdn_q_params"}),this.gpu.device.queue.writeBuffer(P,0,new Uint8Array(k)),this.paramBufs[d]=P}this.runCached(`fused_cdn_${t}`,"fused_conv_deltanet_norm",[this.linQKV,this.linConvHist[t],_,this.linState[t],this.linOut,this.linAlpha,this.linBeta,l,b,U,P],o)}else{const d=`fused_cdn_ext_${o}_${u}_${r}_${f}_${n}_${h}`;let P=this.paramBufs[d];if(!P){const D=new ArrayBuffer(32),S=new DataView(D);S.setUint32(0,o,!0),S.setUint32(4,u,!0),S.setUint32(8,r,!0),S.setUint32(12,f,!0),S.setFloat32(16,this.rmsEps,!0),S.setUint32(20,n,!0),S.setUint32(24,h,!0),P=this.gpu.device.createBuffer({size:32,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"fused_cdn_ext_params"}),this.gpu.device.queue.writeBuffer(P,0,new Uint8Array(D)),this.paramBufs[d]=P}const k=this.linABWeight[t];this.runCached(`fused_cdn_${t}`,"fused_conv_deltanet_norm",[this.linQKV,this.linConvHist[t],_,this.linState[t],this.linOut,s,k,l,b,U,P],o)}const y=this.getQWeight(`${i}.out_proj`),q=p/this.groupSize%4===0,C=this.makeUniform(`fused_silu_mv_${p}_${n}`,[p,n,this.groupSize]);if(q){const d=this.gpu.hasF16?"fused_silu_gptq_4t_f16":"fused_silu_gptq_4t",P=this.gpu.hasF16?"fused_silu4t16_oproj_":"fused_silu4t_oproj_";this.runCached(`${P}${t}`,d,[this.linZ,this.linOut,y.qweight,y.scales,this.attnOut,C],this.wg8(n))}else{const d=this.gpu.hasF16?"fused_silu_gptq_f16":"fused_silu_gptq",P=this.gpu.hasF16?"fused_silu16_oproj_":"fused_silu_oproj_";this.runCached(`${P}${t}`,d,[this.linZ,this.linOut,y.qweight,y.scales,this.attnOut,C],this.wg4(n))}}fusedNormMLP(a,e,t,s,i,n){n=n||this.normed;const o=this.hiddenSize,u=`model.language_model.layers.${i}.post_attention_layernorm.weight`,r=this.weights[u],h=this.makeUniform("add_rmsnorm_params",[o,this.rmsEps]),c=`mlp_norm_${i}_${a===this.hidden?"a":"b"}`;this.runCached(c,"three_way_add_rmsnorm",[a,t,s,r,e,n,h],1),this.mlp(n,i)}decoderLayer(a,e,t,s,i,n,o,u){let r;this.layerTypes[a]==="full_attention"?(this.fullAttentionFused(t,i,a,e,void 0,n,o,u),r=this.qProj):(this.linearAttentionFused(t,i,a),r=this.attnOut),this.fusedNormMLP(t,s,i,r,a)}async forward(a,e){this.gpu.singlePassMode=this.singlePassMode,this.gpu.beginBatch(),this.embedding(a);let t=this.hidden,s=this.hiddenB,i=this.zeroBuf;for(let r=0;r<this.numLayers;r++){this.decoderLayer(r,e,t,s,i),i=this.mlpOut;const h=t;t=s,s=h}this.mtp&&this._mtpSaveHidden(t,this.mlpOut);const n=this.weights["model.language_model.norm.weight"];this.addAndRmsNorm(t,this.mlpOut,this.normed,n,"add_final_norm"),this._dispatchLmHead();const o=this.temperature??.7;if(o>0){const r=this.repetitionPenalty??1,h=this.presencePenalty??0;if(this._recentTokenCount>0&&(r>1||h>0)){this.gpu.device.queue.writeBuffer(this._gpuRecentTokens,0,this._recentTokens.buffer,0,this._recentTokenCount*4);const p=this._makeMixedUniform("penalty_params",[{u:this._recentTokenCount},{f:r},{f:h},{u:0}]);this.run("rep_penalty",[this.logits,this._gpuRecentTokens,p],Math.ceil(this._recentTokenCount/256))}const c=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("topk","topk_extract",[this.logits,this._topkResult,c],1);const m=Math.random()*4294967295>>>0,f=this._makeMixedUniform("sample_params",[{f:o},{u:this.topK??20},{f:this.topP??.8},{u:m}]);this.run("gpu_sample",[this._topkResult,this.argmaxResult,f],1)}else{const r=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("argmax","argmax",[this.logits,this.argmaxResult,r],1)}return this.gpu._passCount!==void 0&&!this._passCountLogged&&(console.log(`[PERF] ${this.gpu._passCount} compute passes per token`),this._passCountLogged=!0),this.gpu._singlePass&&(this.gpu._singlePass.end(),this.gpu._singlePass=null),this.gpu.copyBuffer(this.argmaxResult,this._argmaxReadback,8),this.gpu.endBatch(),this._readAndSample()}forwardSubmit(a,e){this.gpu.singlePassMode=this.singlePassMode,this.gpu.beginBatch(),this.embedding(a);let t=this.hidden,s=this.hiddenB,i=this.zeroBuf;for(let r=0;r<this.numLayers;r++){this.decoderLayer(r,e,t,s,i),i=this.mlpOut;const h=t;t=s,s=h}this.mtp&&this._mtpSaveHidden(t,this.mlpOut);const n=this.weights["model.language_model.norm.weight"];this.addAndRmsNorm(t,this.mlpOut,this.normed,n,"add_final_norm"),this._dispatchLmHead();const o=this.temperature??.7;if(o>0){const r=this.repetitionPenalty??1,h=this.presencePenalty??0;if(this._recentTokenCount>0&&(r>1||h>0)){this.gpu.device.queue.writeBuffer(this._gpuRecentTokens,0,this._recentTokens.buffer,0,this._recentTokenCount*4);const p=this._makeMixedUniform("penalty_params",[{u:this._recentTokenCount},{f:r},{f:h},{u:0}]);this.run("rep_penalty",[this.logits,this._gpuRecentTokens,p],Math.ceil(this._recentTokenCount/256))}const c=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("topk","topk_extract",[this.logits,this._topkResult,c],1);const m=Math.random()*4294967295>>>0,f=this._makeMixedUniform("sample_params",[{f:o},{u:this.topK??20},{f:this.topP??.8},{u:m}]);this.run("gpu_sample",[this._topkResult,this.argmaxResult,f],1)}else{const r=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("argmax","argmax",[this.logits,this.argmaxResult,r],1)}this.gpu._passCount!==void 0&&!this._passCountLogged&&(console.log(`[PERF] ${this.gpu._passCount} compute passes per token`),this._passCountLogged=!0),this.gpu._singlePass&&(this.gpu._singlePass.end(),this.gpu._singlePass=null),this.gpu.copyBuffer(this.argmaxResult,this._argmaxReadback,8),this.gpu.endBatch()}_buildReplayFromRecording(){const a=this.gpu.stopRecording(),e=this.pipelines.gqa_attention_head,t=[];for(const s of a)if(s.multi)for(const i of s.ops)t.push({p:i.pipeline,bg:i.bindGroup,x:i.wgX,y:i.wgY,gqa:i.pipeline===e?1:0});else t.push({p:s.pipeline,bg:s.bindGroup,x:s.wgX,y:s.wgY,gqa:s.pipeline===e?1:0});this._replayFlat=t,console.log(`[PERF] Recorded ${t.length} flat replay ops for core forward`)}_replayCoreForward(a){const e=this.gpu,t=a-this._ropeDelta,s=t+1,i=Math.min(Math.max(1,Math.ceil(s/32)),this._maxGqaSplits);this._gqaDv.setUint32(0,t,!0),this._gqaDv.setUint32(4,a,!0),this._gqaDv.setUint32(8,a,!0),this._gqaDv.setUint32(12,a,!0);for(let r=0;r<this.numLayers;r++)this.layerTypes[r]==="full_attention"&&e.device.queue.writeBuffer(this._fusedSQKParams[r],16,this._gqaData,0,16);this._gqaDv.setUint32(0,s,!0),this._gqaDv.setUint32(4,this.headDim,!0),this._gqaDv.setUint32(8,this.numKVHeads,!0),this._gqaDv.setUint32(12,this.numHeads,!0),this._gqaDv.setUint32(16,this.numHeads/this.numKVHeads,!0),this._gqaDv.setUint32(20,i,!0),e.device.queue.writeBuffer(this._gqaParamBuf,0,this._gqaData,0,24),this._gqaReduceParams&&(this._gqaDv.setUint32(0,this.headDim,!0),this._gqaDv.setUint32(4,i,!0),this._gqaDv.setUint32(8,this.numHeads,!0),this._gqaDv.setUint32(12,0,!0),e.device.queue.writeBuffer(this._gqaReduceParams,0,this._gqaData,0,16));const n=e._singlePass,o=this._replayFlat,u=o.length;for(let r=0;r<u;r++){const h=o[r];n.setPipeline(h.p),n.setBindGroup(0,h.bg),n.dispatchWorkgroups(h.x,h.gqa?i:h.y)}}embeddingB2(a,e){const t=this.hiddenSize,s=this.weights["model.language_model.embed_tokens.weight"],i=this.makeUniform("emb_params",[a,t]);this.runWithOffsets("embedding",[s,{buffer:this.b2.hidden,offset:0,size:t*4},i],this.wg(t));const n=this.makeUniform(`emb_params_${e}`,[e,t]);this.runWithOffsets("embedding",[s,{buffer:this.b2.hidden,offset:t*4,size:t*4},n],this.wg(t))}gptqMatvecB2(a,e,t,s,i){const n=this.getQWeight(t);if(!n.qweight)return;const o=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]),u=this.gpu.hasF16?"gptq_matmul_b2_f16":"gptq_matmul_b2",r=this.gpu.hasF16?"b2gptqf16_":"b2gptq_";this.runCached(`${r}${t}`,u,[a,n.qweight,n.scales,e,o],this.wg4(i))}gptqMatvecB2Op(a,e,t,s,i){const n=this.getQWeight(t);if(!n.qweight)return null;const o=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]),u=this.gpu.hasF16?"gptq_matmul_b2_f16":"gptq_matmul_b2",r=this.gpu.hasF16?"b2gptqf16_":"b2gptq_";return this.prepOpCached(`${r}${t}`,u,[a,n.qweight,n.scales,e,o],this.wg4(i))}fullAttentionB2(a,e,t,s){const i=`model.language_model.layers.${t}.self_attn`,n=this.hiddenSize,o=this.b2._dims,u=this.headDim,r=this.numHeads,h=this.numKVHeads,c=r/h,m=t===0,f=`model.language_model.layers.${t}.input_layernorm.weight`,p=this.weights[f],w=this.makeUniform("add_rmsnorm_params",[n,this.rmsEps]);m?(this.runWithOffsets("rmsnorm",[{buffer:a,offset:0,size:n*4},p,{buffer:this.b2.normed,offset:0,size:n*4},w],1),this.runWithOffsets("rmsnorm",[{buffer:a,offset:n*4,size:n*4},p,{buffer:this.b2.normed,offset:n*4,size:n*4},w],1)):this.run("add_rmsnorm_ro_b2",[a,e,p,this.b2.normed,w],2);const g=this.gptqMatvecB2Op(this.b2.normed,this.b2.qProjFull,`${i}.q_proj`,n,r*u*2),B=this.gptqMatvecB2Op(this.b2.normed,this.b2.kProj,`${i}.k_proj`,n,h*u),_=this.gptqMatvecB2Op(this.b2.normed,this.b2.vProj,`${i}.v_proj`,n,h*u);this.gpu.dispatchMulti([g,B,_].filter(Boolean));const l=this.kvCache[t],b=this._fusedSQKParams[t],U=o.qProjFullSize*4,y=o.kProjSize*4,v=o.vProjSize*4,q=o.qProjSize*4;this._gqaDv.setUint32(0,s,!0),this._gqaDv.setUint32(4,s,!0),this._gqaDv.setUint32(8,s,!0),this._gqaDv.setUint32(12,s,!0),this.gpu.device.queue.writeBuffer(b,16,this._gqaData,0,16),this.runWithOffsets(this._splitQKNormShaderKey,[{buffer:this.b2.qProjFull,offset:0,size:U},{buffer:this.b2.kProj,offset:0,size:y},{buffer:this.b2.vProj,offset:0,size:v},{buffer:this.b2.qProj,offset:0,size:q},{buffer:this.b2.qGate,offset:0,size:q},l.keys,l.values,b],r+h),this._gqaDv.setUint32(0,s+1,!0),this._gqaDv.setUint32(4,s+1,!0),this._gqaDv.setUint32(8,s+1,!0),this._gqaDv.setUint32(12,s+1,!0),this.gpu.device.queue.writeBuffer(b,16,this._gqaData,0,16),this.runWithOffsets(this._splitQKNormShaderKey,[{buffer:this.b2.qProjFull,offset:U,size:U},{buffer:this.b2.kProj,offset:y,size:y},{buffer:this.b2.vProj,offset:v,size:v},{buffer:this.b2.qProj,offset:q,size:q},{buffer:this.b2.qGate,offset:q,size:q},l.keys,l.values,b],r+h);const C=s+1,d=s+2;this._gqaDv.setUint32(0,C,!0),this._gqaDv.setUint32(4,u,!0),this._gqaDv.setUint32(8,h,!0),this._gqaDv.setUint32(12,r,!0),this._gqaDv.setUint32(16,c,!0),this._gqaDv.setUint32(20,this.partialDim,!0),this._gqaDv.setUint32(24,s,!0),this._gqaDv.setFloat32(28,this.ropeTheta,!0),this.gpu.device.queue.writeBuffer(this.b2._gqaParamBuf0,0,this._gqaData),this.runWithOffsets("gqa_attention_head",[{buffer:this.b2.qProj,offset:0,size:q},l.keys,l.values,{buffer:this.b2.attnOut,offset:0,size:o.qProjSize*4},this.b2._gqaParamBuf0],r),this._gqaDv.setUint32(0,d,!0),this._gqaDv.setUint32(24,s+1,!0),this.gpu.device.queue.writeBuffer(this.b2._gqaParamBuf1,0,this._gqaData),this.runWithOffsets("gqa_attention_head",[{buffer:this.b2.qProj,offset:q,size:q},l.keys,l.values,{buffer:this.b2.attnOut,offset:o.qProjSize*4,size:o.qProjSize*4},this.b2._gqaParamBuf1],r);const P=this.getQWeight(`${i}.o_proj`),k=r*u,D=this.makeUniform(`fused_sig_mv_${k}_${n}`,[k,n,this.groupSize]);this.runCached(`b2_fused_sig_oproj_${t}`,"fused_sigmoid_gptq_b2_f16",[this.b2.attnOut,this.b2.qGate,P.qweight,P.scales,this.b2.qProj,D],this.wg4(n))}linearAttentionB2(a,e,t){const s=`model.language_model.layers.${t}.linear_attn`,i=this.hiddenSize,n=this.b2._dims,o=n.linHeads,u=n.linKeyDim,r=n.linValDim;n.linEVD;const h=n.linQKVDim,c=n.valueDim,m=t===0,f=`model.language_model.layers.${t}.input_layernorm.weight`,p=this.weights[f],w=this.makeUniform("add_rmsnorm_params",[i,this.rmsEps]);m?(this.runWithOffsets("rmsnorm",[{buffer:a,offset:0,size:i*4},p,{buffer:this.b2.normed,offset:0,size:i*4},w],1),this.runWithOffsets("rmsnorm",[{buffer:a,offset:i*4,size:i*4},p,{buffer:this.b2.normed,offset:i*4,size:i*4},w],1)):this.run("add_rmsnorm_ro_b2",[a,e,p,this.b2.normed,w],2);{const q=[this.gptqMatvecB2Op(this.b2.normed,this.b2.linQKV,`${s}.in_proj_qkv`,i,h),this.gptqMatvecB2Op(this.b2.normed,this.b2.linZ,`${s}.in_proj_z`,i,c)];this.abQuantized&&(q.push(this.gptqMatvecB2Op(this.b2.normed,this.b2.linAlpha,`${s}.in_proj_a`,i,this.linValueHeads)),q.push(this.gptqMatvecB2Op(this.b2.normed,this.b2.linBeta,`${s}.in_proj_b`,i,this.linValueHeads))),this.gpu.dispatchMulti(q.filter(Boolean))}const g=this.weights[`${s}.conv1d.weight`],B=this.weights[`${s}.A_log`],_=this.weights[`${s}.dt_bias`],l=this.weights[`${s}.norm.weight`],b=h*4,U=c*4;if(this.abQuantized){const q=this.linValueHeads,C=q*4,d=`fused_cdn_q_${o}_${u}_${r}_${h}_${q}`,P=this.paramBufs[d];this.runWithOffsets("fused_conv_deltanet_norm",[{buffer:this.b2.linQKV,offset:0,size:b},this.linConvHist[t],g,this.linState[t],{buffer:this.b2.linOut,offset:0,size:U},{buffer:this.b2.linAlpha,offset:0,size:C},{buffer:this.b2.linBeta,offset:0,size:C},B,_,l,P],o),this.runWithOffsets("fused_conv_deltanet_norm",[{buffer:this.b2.linQKV,offset:b,size:b},this.linConvHist[t],g,this.linState[t],{buffer:this.b2.linOut,offset:U,size:U},{buffer:this.b2.linAlpha,offset:C,size:C},{buffer:this.b2.linBeta,offset:C,size:C},B,_,l,P],o)}else{const q=`fused_cdn_ext_${o}_${u}_${r}_${h}_${i}_${this.linValueHeads}`,C=this.paramBufs[q],d=this.linABWeight[t];this.runWithOffsets("fused_conv_deltanet_norm",[{buffer:this.b2.linQKV,offset:0,size:b},this.linConvHist[t],g,this.linState[t],{buffer:this.b2.linOut,offset:0,size:U},{buffer:this.b2.normed,offset:0,size:i*4},d,B,_,l,C],o),this.runWithOffsets("fused_conv_deltanet_norm",[{buffer:this.b2.linQKV,offset:b,size:b},this.linConvHist[t],g,this.linState[t],{buffer:this.b2.linOut,offset:U,size:U},{buffer:this.b2.normed,offset:i*4,size:i*4},d,B,_,l,C],o)}const y=this.getQWeight(`${s}.out_proj`),v=this.makeUniform(`fused_silu_mv_${c}_${i}`,[c,i,this.groupSize]);this.runCached(`b2_fused_silu_oproj_${t}`,"fused_silu_gptq_b2_f16",[this.b2.linZ,this.b2.linOut,y.qweight,y.scales,this.b2.attnOut,v],this.wg4(i))}fusedNormMLPB2(a,e,t,s,i){const n=this.hiddenSize,o=this.intermediateSize,u=`model.language_model.layers.${i}.mlp`,r=`model.language_model.layers.${i}.post_attention_layernorm.weight`,h=this.weights[r],c=this.makeUniform("add_rmsnorm_params",[n,this.rmsEps]);this.run("three_way_add_rmsnorm_b2",[a,t,s,h,e,this.b2.normed,c],2);const m=this.getQWeight(`${u}.gate_proj`),f=this.getQWeight(`${u}.up_proj`),p=this.makeUniform(`fused_mv_${n}_${o}`,[n,o,this.groupSize]);this.runCached(`b2_fused_gus_${i}`,"fused_gate_up_silu_b2_f16",[this.b2.normed,m.qweight,m.scales,f.qweight,f.scales,this.b2.mlpIntermediate,p],this.wg4(o)),this.gptqMatvecB2(this.b2.mlpIntermediate,this.b2.mlpOut,`${u}.down_proj`,o,n)}decoderLayerB2(a,e,t,s,i){let n;this.layerTypes[a]==="full_attention"?(this.fullAttentionB2(t,i,a,e),n=this.b2.qProj):(this.linearAttentionB2(t,i,a),n=this.b2.attnOut),this.fusedNormMLPB2(t,s,i,n,a)}_dispatchLmHeadB2(){if(this._lmHeadQWeight){const a=this.hiddenSize,e=this.vocabSize,t=this.makeUniform(`mv_${a}_${e}`,[a,e,this.groupSize]),s=this.gpu.hasF16?"gptq_matmul_b2_f16":"gptq_matmul_b2";this.runCached("b2_lm_head",s,[this.b2.normed,this._lmHeadQWeight,this._lmHeadScales,this.b2.logits,t],this.wg4(e))}else{const e=(this.config.tie_word_embeddings??this.textCfg.tie_word_embeddings)!==!1?"model.language_model.embed_tokens.weight":"lm_head.weight",t=this.weights[e],s=this.hiddenSize,i=this.vocabSize,n=this.makeUniform("lmhead_params",[s,i]);this.runWithOffsets("bf16_matvec",[{buffer:this.b2.normed,offset:0,size:s*4},t,{buffer:this.b2.logits,offset:0,size:i*4},n],this.wg(i)),this.runWithOffsets("bf16_matvec",[{buffer:this.b2.normed,offset:s*4,size:s*4},t,{buffer:this.b2.logits,offset:i*4,size:i*4},n],this.wg(i))}}async forwardB2(a,e,t){this.b2||this.initB2Buffers(),this.gpu.singlePassMode=this.singlePassMode,this.gpu.beginBatch(),this.embeddingB2(a,e);let s=this.b2.hidden,i=this.b2.hiddenB,n=this.b2.zeroBuf;for(let f=0;f<this.numLayers;f++){this.decoderLayerB2(f,t,s,i,n),n=this.b2.mlpOut;const p=s;s=i,i=p}if(this.mtp){const f=this.hiddenSize;this.gpu.copyBuffer(s,this.mtp.savedHidden,f*4,f*4,0),this.gpu.copyBuffer(this.b2.mlpOut,this.mtp.savedMlpOut,f*4,f*4,0)}const o=this.weights["model.language_model.norm.weight"],u=this.makeUniform("add_rmsnorm_params",[this.hiddenSize,this.rmsEps]);this.run("add_rmsnorm_b2",[s,this.b2.mlpOut,o,this.b2.normed,u],2),this._dispatchLmHeadB2();const h=(this.temperature??.7)>0,c=this.vocabSize,m=this.makeUniform("argmax_params",[c]);return h?(this.runWithOffsets("topk_extract",[{buffer:this.b2.logits,offset:0,size:c*4},this.b2.topkResult0,m],1),this.runWithOffsets("topk_extract",[{buffer:this.b2.logits,offset:c*4,size:c*4},this.b2.topkResult1,m],1)):(this.runWithOffsets("argmax",[{buffer:this.b2.logits,offset:0,size:c*4},this.b2.argmaxResult0,m],1),this.runWithOffsets("argmax",[{buffer:this.b2.logits,offset:c*4,size:c*4},this.b2.argmaxResult1,m],1)),this.gpu._passCount!==void 0&&!this._b2PassCountLogged&&(console.log(`[PERF] B2: ${this.gpu._passCount} compute passes per 2-token step`),this._b2PassCountLogged=!0),this.gpu._singlePass&&(this.gpu._singlePass.end(),this.gpu._singlePass=null),h?(this.gpu.copyBuffer(this.b2.topkResult0,this.b2.topkReadback0,2048),this.gpu.copyBuffer(this.b2.topkResult1,this.b2.topkReadback1,2048)):(this.gpu.copyBuffer(this.b2.argmaxResult0,this.b2.argmaxReadback0,8),this.gpu.copyBuffer(this.b2.argmaxResult1,this.b2.argmaxReadback1,8)),this.gpu.endBatch(),this._readAndSampleB2(h)}async _readAndSampleB2(a){const e=[0,0];a?(await Promise.all([this.b2.topkReadback0.mapAsync(GPUMapMode.READ),this.b2.topkReadback1.mapAsync(GPUMapMode.READ)]),e[0]=this._sampleFromTopkReadback(this.b2.topkReadback0),e[1]=this._sampleFromTopkReadback(this.b2.topkReadback1)):(await Promise.all([this.b2.argmaxReadback0.mapAsync(GPUMapMode.READ),this.b2.argmaxReadback1.mapAsync(GPUMapMode.READ)]),e[0]=new Uint32Array(this.b2.argmaxReadback0.getMappedRange().slice(0))[0],e[1]=new Uint32Array(this.b2.argmaxReadback1.getMappedRange().slice(0))[0],this.b2.argmaxReadback0.unmap(),this.b2.argmaxReadback1.unmap());for(const t of e)this._recentTokenCount<this._repMaxTokens?this._recentTokens[this._recentTokenCount++]=t:(this._recentTokens.copyWithin(0,1),this._recentTokens[this._repMaxTokens-1]=t);return e}_sampleFromTopkReadback(a){const e=a.getMappedRange(),t=new Uint32Array(e),s=new Float32Array(e),i=new Uint32Array(256),n=new Float32Array(256);for(let r=0;r<256;r++)i[r]=t[r*2],n[r]=s[r*2+1];a.unmap();const o=this.presencePenalty??0,u=this.repetitionPenalty??1;if((o>0||u>1)&&this._recentTokenCount>0){const r=new Set;for(let h=0;h<this._recentTokenCount;h++)r.add(this._recentTokens[h]);for(let h=0;h<256;h++)r.has(i[h])&&(o>0&&(n[h]-=o),u>1&&(n[h]=n[h]>0?n[h]/u:n[h]*u))}return this._sampleFromArrays(i,n,256)}async _readAndSample(){await this._argmaxReadback.mapAsync(GPUMapMode.READ);const a=new Uint32Array(this._argmaxReadback.getMappedRange().slice(0));this._argmaxReadback.unmap();const e=a[0];return this._recentTokenCount<this._repMaxTokens?this._recentTokens[this._recentTokenCount++]=e:(this._recentTokens.copyWithin(0,1),this._recentTokens[this._repMaxTokens-1]=e),e}_sampleFromArrays(a,e,t){const s=this.temperature??.7,i=this.topP??.8,n=this.topK??20,o=Math.min(n,t),u=this._selBuf||(this._selBuf=new Uint32Array(64)),r=this._selValBuf||(this._selValBuf=new Float32Array(64)),h=this._usedBuf||(this._usedBuf=new Uint8Array(256));h.fill(0);for(let l=0;l<o;l++){let b=-1,U=-1/0;for(let y=0;y<t;y++)!h[y]&&e[y]>U&&(U=e[y],b=y);if(b<0)break;u[l]=a[b],r[l]=U,h[b]=1}const c=r[0],m=this._probBuf||(this._probBuf=new Float32Array(64));let f=0;for(let l=0;l<o;l++)m[l]=Math.exp((r[l]-c)/s),f+=m[l];for(let l=0;l<o;l++)m[l]/=f;let p=0,w=o;for(let l=0;l<o;l++)if(p+=m[l],p>=i){w=l+1;break}let g=0;for(let l=0;l<w;l++)g+=m[l];const B=Math.random()*g;let _=0;for(let l=0;l<w;l++)if(_+=m[l],_>=B)return u[l];return u[0]}_sample(a){const e=this.temperature??.7,t=this.topP??.8,s=this.topK??20,i=a.length;if(e<=0){let _=0,l=a[0];for(let b=1;b<i;b++)a[b]>l&&(l=a[b],_=b);return _}const n=Math.max(s,64),o=new Int32Array(n).fill(-1),u=new Float32Array(n).fill(-1/0);let r=-1/0;for(let _=0;_<i;_++){const l=a[_];if(l>r&&(r=l),l>u[n-1]){let b=n-1;for(;b>0&&l>u[b-1];)u[b]=u[b-1],o[b]=o[b-1],b--;u[b]=l,o[b]=_}}const h=Math.min(s,n),c=new Float32Array(h);let m=0;for(let _=0;_<h&&!(o[_]<0);_++)c[_]=Math.exp((u[_]-r)/e),m+=c[_];for(let _=0;_<h;_++)c[_]/=m;let f=0,p=h;for(let _=0;_<h;_++){if(o[_]<0){p=_;break}if(f+=c[_],f>=t){p=_+1;break}}let w=0;for(let _=0;_<p;_++)w+=c[_];const g=Math.random()*w;let B=0;for(let _=0;_<p;_++)if(B+=c[_],B>=g)return o[_];return o[0]}async generate(a,e=512,t,s){this.seqLen=0,this._recentTokenCount=0,this._replayFlat=null,this._ropeDelta=s?.ropeDelta??0;const i=this.maxSeqLen||4096;if(e=Math.min(e,i-a.length),e<=0)throw new Error(`Prompt (${a.length} tokens) exceeds context size (${i})`);const n=[...a],o=this.textCfg.linear_num_key_heads,u=this.textCfg.linear_key_head_dim,r=this.textCfg.linear_value_head_dim,m=this.linValueHeads/o*r,f=o*(u+u+m);for(let q=0;q<this.numLayers;q++)if(this.layerTypes[q]==="linear_attention"){const C=o*u*m*4,d=3*f*4;this.gpu.device.queue.writeBuffer(this.linState[q],0,new Uint8Array(C)),this.gpu.device.queue.writeBuffer(this.linConvHist[q],0,new Uint8Array(d))}let p=null;if(s){let q=0;const C=s.imageTokenId,d=s.positionIds3D;for(let P=0;P<a.length;P++){const k=d[0][P],D=d[1][P],S=d[2][P];this.gpu.singlePassMode=this.singlePassMode,this.gpu.beginBatch(),a[P]===C?this.embeddingFromVisionBuffer(s.embedBuffer,q++):this.embedding(a[P]);let $=this.hidden,M=this.hiddenB,z=this.zeroBuf;for(let O=0;O<this.numLayers;O++){this.decoderLayer(O,k,$,M,z,D,S,P),z=this.mlpOut;const G=$;$=M,M=G}if(P===a.length-1){const O=this.weights["model.language_model.norm.weight"];this.addAndRmsNorm($,this.mlpOut,this.normed,O,"add_final_norm"),this._dispatchLmHead();const G=this.temperature??.7;if(G>0){const R=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("topk","topk_extract",[this.logits,this._topkResult,R],1);const F=Math.random()*4294967295>>>0,T=this._makeMixedUniform("sample_params",[{f:G},{u:this.topK??20},{f:this.topP??.8},{u:F}]);this.run("gpu_sample",[this._topkResult,this.argmaxResult,T],1)}else{const R=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("argmax","argmax",[this.logits,this.argmaxResult,R],1)}this.gpu.copyBuffer(this.argmaxResult,this._argmaxReadback,8)}this.gpu._singlePass&&(this.gpu._singlePass.end(),this.gpu._singlePass=null),this.gpu.endBatch(),this.seqLen=P+1}p=await this._readAndSample()}else for(let q=0;q<a.length;q++)p=await this.forward(a[q],q),this.seqLen=q+1;n.push(p);const w=this.config.eos_token_id??this.textCfg.eos_token_id,g=Array.isArray(w)?w:w!=null?[w]:[248044,248046];if(g.includes(p)||t?.(p,0))return n;this._recentTokenCount<this._repMaxTokens?this._recentTokens[this._recentTokenCount++]=p:(this._recentTokens.copyWithin(0,1),this._recentTokens[this._repMaxTokens-1]=p),this.gpu.device.queue.writeBuffer(this._gpuRecentTokens,0,this._recentTokens.buffer,0,this._recentTokenCount*4);const _=8;this._tokenHistoryBuf||(this._tokenHistoryBuf=this.gpu.createBuffer("token_history",_*4,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC),this._tokenHistoryReadback=this.gpu.device.createBuffer({size:_*4,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST,label:"token_history_readback"}));let l=0,b=0,U=1,y=p,v=!1;for(;U<e;){const q=performance.now(),C=Math.min(_,e-U);for(let S=0;S<C;S++){const $=this.seqLen+S+this._ropeDelta;if(this.gpu.singlePassMode=this.singlePassMode,this.gpu.beginBatch(),S===0?this.embedding(y):this.embeddingFromArgmax(),v)this._replayCoreForward($);else{let G=this.hidden,R=this.hiddenB,F=this.zeroBuf;S===0&&!this._replayFlat&&(this._forceMinSplits=2,this.gpu.startRecording());for(let x=0;x<this.numLayers;x++){this.decoderLayer(x,$,G,R,F),F=this.mlpOut;const A=G;G=R,R=A}const T=this.weights["model.language_model.norm.weight"];this.addAndRmsNorm(G,this.mlpOut,this.normed,T,"add_final_norm"),this._dispatchLmHead(),S===0&&this.gpu._recording&&(this._forceMinSplits=0,this._buildReplayFromRecording())}const M=this.temperature??.7;if(M>0){const G=this.repetitionPenalty??1,R=this.presencePenalty??0,F=this._recentTokenCount+S;if(F>0&&(G>1||R>0)){const j=this._makeMixedUniform("penalty_params",[{u:Math.min(F,this._repMaxTokens)},{f:G},{f:R},{u:0}]);this.run("rep_penalty",[this.logits,this._gpuRecentTokens,j],Math.ceil(Math.min(F,this._repMaxTokens)/256))}const T=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("topk","topk_extract",[this.logits,this._topkResult,T],1);const x=Math.random()*4294967295>>>0,A=this._makeMixedUniform("sample_params",[{f:M},{u:this.topK??20},{f:this.topP??.8},{u:x}]);this.run("gpu_sample",[this._topkResult,this.argmaxResult,A],1)}else{const G=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("argmax","argmax",[this.logits,this.argmaxResult,G],1)}const z=(this._recentTokenCount+S)%this._repMaxTokens,O=this.makeUniform(`append_${S}`,[z,S]);this.run("append_token",[this.argmaxResult,this._gpuRecentTokens,this._tokenHistoryBuf,O],1),this.gpu._singlePass&&(this.gpu._singlePass.end(),this.gpu._singlePass=null),this.gpu.endBatch()}!v&&this._replayFlat&&(v=!0);const d=this.gpu.device.createCommandEncoder();d.copyBufferToBuffer(this._tokenHistoryBuf,0,this._tokenHistoryReadback,0,C*4),this.gpu.device.queue.submit([d.finish()]),await this._tokenHistoryReadback.mapAsync(GPUMapMode.READ);const P=new Uint32Array(this._tokenHistoryReadback.getMappedRange().slice(0,C*4));this._tokenHistoryReadback.unmap();const k=performance.now();l+=k-q,b+=C;let D=!1;for(let S=0;S<C;S++){const $=P[S];if(n.push($),this.seqLen++,this._recentTokenCount<this._repMaxTokens?this._recentTokens[this._recentTokenCount++]=$:(this._recentTokens.copyWithin(0,1),this._recentTokens[this._repMaxTokens-1]=$),g.includes($)){D=!0;break}const M=t?.($,U);if(U++,M){D=!0;break}}if(b%50<_&&console.log(`[T @${b}] ${(l/b).toFixed(1)}ms/tok (batch=${_})`),D)break;y=P[C-1],this.gpu.device.queue.writeBuffer(this._gpuRecentTokens,0,this._recentTokens.buffer,0,this._recentTokenCount*4)}return b>0&&console.log(`[T final @${b}] ${(l/b).toFixed(1)}ms/tok (batch=${_})`),n}async _quantizeBF16Weight(a,e,t,s){const i=this.groupSize,n=e/8,o=e/i,u=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC,r=this.gpu.createBuffer(`${s}_qweight`,n*t*4,u),h=this.gpu.createBuffer(`${s}_scales_f32`,o*t*4,u),c=Math.ceil(o*t/2)*4,m=this.gpu.createBuffer(`${s}_scales`,c,u),f=this.gpu.getOrCreatePipeline("quantize_bf16_to_int4",(await H(async()=>{const{SHADERS:P}=await import("./gpu-ops-BbLjsC0p.js").then(k=>k.b);return{SHADERS:P}},[])).SHADERS.quantize_bf16_to_int4),p=this.gpu.createBufferFromData(`${s}_qparams`,new Uint32Array([e,t,i]),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),w=this.gpu.createBindGroup(f,0,[a,r,h,p]),g=65535,B=Math.min(t,g),_=Math.ceil(t/g),l=this.gpu.device.createCommandEncoder(),b=l.beginComputePass();b.setPipeline(f),b.setBindGroup(0,w),b.dispatchWorkgroups(B,_),b.end(),this.gpu.device.queue.submit([l.finish()]);const U=this.gpu.getOrCreatePipeline("pack_f32_to_f16_pairs",(await H(async()=>{const{SHADERS:P}=await import("./gpu-ops-BbLjsC0p.js").then(k=>k.b);return{SHADERS:P}},[])).SHADERS.pack_f32_to_f16_pairs),y=Math.ceil(o*t/2),v=this.gpu.createBufferFromData(`${s}_pparams`,new Uint32Array([y]),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),q=this.gpu.createBindGroup(U,0,[h,m,v]),C=this.gpu.device.createCommandEncoder(),d=C.beginComputePass();return d.setPipeline(U),d.setBindGroup(0,q),d.dispatchWorkgroups(Math.ceil(y/256)),d.end(),this.gpu.device.queue.submit([C.finish()]),await this.gpu.device.queue.onSubmittedWorkDone(),h.destroy(),p.destroy(),v.destroy(),{qweight:r,scales:m}}async initMTP(a){if(this.mtp)return;const e=this.hiddenSize,t=this.intermediateSize,s=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC,i=performance.now(),n="Qwen/Qwen3.5-2B";console.log(`[MTP] Downloading MTP weights from ${n}...`);const{loadMTPWeights:o}=await H(async()=>{const{loadMTPWeights:g}=await import("./safetensors-loader-CwGm5mJX.js");return{loadMTPWeights:g}},[]),u=await o(n,a);console.log(`[MTP] Downloaded ${Object.keys(u).length} MTP tensors`),this.mtp={weights:{},qweights:{}};const r={};for(const[g,B]of Object.entries(u)){const _=this.gpu.createBufferFromData(`mtp_${g}`,B.data);r[g]=_,this.mtp.weights[g]=_}const h=[{name:"mtp.fc.weight",K:e*2,N:e},{name:"mtp.layers.0.self_attn.q_proj.weight",K:e,N:this.numHeads*this.headDim*2},{name:"mtp.layers.0.self_attn.k_proj.weight",K:e,N:this.numKVHeads*this.headDim},{name:"mtp.layers.0.self_attn.v_proj.weight",K:e,N:this.numKVHeads*this.headDim},{name:"mtp.layers.0.self_attn.o_proj.weight",K:e,N:e},{name:"mtp.layers.0.mlp.gate_proj.weight",K:e,N:t},{name:"mtp.layers.0.mlp.up_proj.weight",K:e,N:t},{name:"mtp.layers.0.mlp.down_proj.weight",K:t,N:e}];for(const{name:g,K:B,N:_}of h){const{qweight:l,scales:b}=await this._quantizeBF16Weight(r[g],B,_,`mtp_${g}`);this.mtp.qweights[g]={qweight:l,scales:b},r[g].destroy(),delete this.mtp.weights[g]}this.mtp.normRaw={};const c=["mtp.pre_fc_norm_hidden.weight","mtp.pre_fc_norm_embedding.weight","mtp.layers.0.input_layernorm.weight","mtp.layers.0.post_attention_layernorm.weight","mtp.norm.weight","mtp.layers.0.self_attn.q_norm.weight","mtp.layers.0.self_attn.k_norm.weight"];for(const g of c){const B=u[g];B&&(this.mtp.normRaw[g]=new Uint32Array(B.data.buffer.slice(B.data.byteOffset,B.data.byteOffset+B.data.byteLength)))}const f=4096*this.numKVHeads*this.headDim*4;this.mtp.kvCache={keys:this.gpu.createBuffer("mtp_kv_keys",f,s),values:this.gpu.createBuffer("mtp_kv_values",f,s)},this.mtp.concatBuf=this.gpu.createBuffer("mtp_concat",e*2*4,s),this.mtp.fcOut=this.gpu.createBuffer("mtp_fc_out",e*4,s),this.mtp.savedHidden=this.gpu.createBuffer("mtp_saved_hidden",e*4,s),this.mtp.savedMlpOut=this.gpu.createBuffer("mtp_saved_mlp_out",e*4,s),this.mtp.seqLen=0,this._buildMTPSplitQKNormUniform(),this._buildMTPFusedMLPUniform(),await this._buildTrimmedLmHead(8e3);const p=((performance.now()-i)/1e3).toFixed(1),w=h.length;console.log(`[MTP] Initialized in ${p}s: ${w} INT4 weights, 1 KV cache layer`)}_buildMTPSplitQKNormUniform(){const a=this.numHeads,e=this.numKVHeads,t=this.headDim,s=a*t,i=e*t,n=(s+i)/2,u=32+Math.ceil(n/4)*16,r=new ArrayBuffer(u),h=new DataView(r);h.setUint32(0,a,!0),h.setUint32(4,e,!0),h.setUint32(8,t,!0),h.setFloat32(12,this.rmsEps,!0),h.setUint32(16,0,!0),h.setUint32(20,0,!0),h.setUint32(24,0,!0),h.setUint32(28,0,!0);const c=this.mtp.normRaw["mtp.layers.0.self_attn.q_norm.weight"],m=this.mtp.normRaw["mtp.layers.0.self_attn.k_norm.weight"];if(c)for(let p=0;p<s/2;p++){const w=Math.floor(p/4),g=p%4;h.setUint32(32+w*16+g*4,c[p],!0)}if(m){const p=s/2;for(let w=0;w<i/2;w++){const g=p+w,B=Math.floor(g/4),_=g%4;h.setUint32(32+B*16+_*4,m[w],!0)}}const f=this.gpu.device.createBuffer({size:u,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"mtp_fused_sqk"});this.gpu.device.queue.writeBuffer(f,0,new Uint8Array(r)),this.mtp.fusedSQKParams=f}_buildMTPFusedMLPUniform(){}async _buildTrimmedLmHead(a=8e3){const e=this.hiddenSize,t=this.gpu,s=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC,i=new Uint32Array(a);for(let U=0;U<a;U++)i[U]=U;this.mtp.trimmedToFull=i,this.mtp.trimmedVocabSize=a;const n=t.createBufferFromData("mtp_trim_indices",i),u=(this.config.tie_word_embeddings??this.textCfg.tie_word_embeddings)!==!1?"model.language_model.embed_tokens.weight":"lm_head.weight",r=this.weights[u],h=e/2,c=a*h*4,m=t.createBuffer("mtp_trim_gathered",c,s),f=(await H(async()=>{const{SHADERS:U}=await import("./gpu-ops-BbLjsC0p.js").then(y=>y.b);return{SHADERS:U}},[])).SHADERS.gather_rows_bf16,p=t.getOrCreatePipeline("gather_rows_bf16",f),w=t.createBufferFromData("mtp_trim_gparams",new Uint32Array([h,a]),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),g=t.createBindGroup(p,0,[r,n,m,w]),B=t.device.createCommandEncoder(),_=B.beginComputePass();_.setPipeline(p),_.setBindGroup(0,g),_.dispatchWorkgroups(Math.ceil(h/256),a),_.end(),t.device.queue.submit([B.finish()]);const{qweight:l,scales:b}=await this._quantizeBF16Weight(m,e,a,"mtp_trim_lmhead");this.mtp.trimmedLmHead={qweight:l,scales:b},this.mtp.trimmedLogits=t.createBuffer("mtp_trimmed_logits",a*4,s),m.destroy(),n.destroy(),w.destroy(),console.log(`[MTP] Trimmed lm_head: ${a} tokens, ${(l.size/1024/1024).toFixed(1)}MB qw + ${(b.size/1024/1024).toFixed(1)}MB sc`)}_dispatchTrimmedLmHead(){const a=this.hiddenSize,e=this.mtp.trimmedVocabSize,t=this.mtp.trimmedLmHead,i=a/this.groupSize%4===0,n=this.makeUniform(`mv_${a}_${e}`,[a,e,this.groupSize]);if(i){const o=this.gpu.hasF16?"gptq_matvec_4t_f16":"gptq_matvec_4t";this.runCached("mtp_trim_lmhead",o,[this.normed,t.qweight,t.scales,this.mtp.trimmedLogits,n],this.wg8(e))}else{const o=this.gpu.hasF16?"gptq_matvec_f16":"gptq_matvec";this.runCached("mtp_trim_lmhead",o,[this.normed,t.qweight,t.scales,this.mtp.trimmedLogits,n],this.wg4(e))}}_mtpGetQWeight(a){return this.mtp.qweights[a]}_mtpGptqMatvec(a,e,t,s,i){const n=this._mtpGetQWeight(t);if(!n)throw new Error(`MTP weight not found: ${t}`);const u=s/this.groupSize%4===0,r=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]);if(u){const h=this.gpu.hasF16?"gptq_matvec_4t_f16":"gptq_matvec_4t";this.runCached(`mtp_${t}`,h,[a,n.qweight,n.scales,e,r],this.wg8(i))}else{const h=this.gpu.hasF16?"gptq_matvec_f16":"gptq_matvec";this.runCached(`mtp_${t}`,h,[a,n.qweight,n.scales,e,r],this.wg4(i))}}_mtpSaveHidden(a,e){this.mtp&&(this.gpu.copyBuffer(a,this.mtp.savedHidden,this.hiddenSize*4),this.gpu.copyBuffer(e,this.mtp.savedMlpOut,this.hiddenSize*4))}async mtpForward(a){if(!this.mtp)throw new Error("MTP not initialized. Call initMTP() first.");return this.mtpForwardSubmit(a),await this._readAndSample()}mtpForwardSubmit(a){const e=this.hiddenSize,t=this.intermediateSize,s=this.mtp.seqLen;this.gpu.singlePassMode=this.singlePassMode,this.gpu.beginBatch(),this.embedding(a);const i=this.makeUniform("add_rmsnorm_params",[e,this.rmsEps]);this.runCached("mtp_emb_norm","rmsnorm",[this.hidden,this.mtp.weights["mtp.pre_fc_norm_embedding.weight"],this.normed,i],1),this.gpu.copyBuffer(this.normed,this.mtp.concatBuf,e*4),this.runCached("mtp_hid_norm","add_rmsnorm_ro",[this.mtp.savedHidden,this.mtp.savedMlpOut,this.mtp.weights["mtp.pre_fc_norm_hidden.weight"],this.normed,i],1),this.gpu.copyBuffer(this.normed,this.mtp.concatBuf,e*4,0,e*4),this._mtpGptqMatvec(this.mtp.concatBuf,this.mtp.fcOut,"mtp.fc.weight",e*2,e),this.runCached("mtp_input_norm","rmsnorm",[this.mtp.fcOut,this.mtp.weights["mtp.layers.0.input_layernorm.weight"],this.normed,i],1);const n="mtp.layers.0.self_attn",o=this._mtpGptqMatvecOp(this.normed,this.qProjFull,`${n}.q_proj.weight`,e,this.numHeads*this.headDim*2),u=this._mtpGptqMatvecOp(this.normed,this.kProj,`${n}.k_proj.weight`,e,this.numKVHeads*this.headDim),r=this._mtpGptqMatvecOp(this.normed,this.vProj,`${n}.v_proj.weight`,e,this.numKVHeads*this.headDim);this.gpu.dispatchMulti([o,u,r].filter(Boolean)),this._gqaDv.setUint32(0,s,!0),this._gqaDv.setUint32(4,s,!0),this._gqaDv.setUint32(8,s,!0),this._gqaDv.setUint32(12,s,!0),this.gpu.device.queue.writeBuffer(this.mtp.fusedSQKParams,16,this._gqaData,0,16),this.runCached("mtp_sqk",this._splitQKNormShaderKey,[this.qProjFull,this.kProj,this.vProj,this.qProj,this.qGate,this.mtp.kvCache.keys,this.mtp.kvCache.values,this.mtp.fusedSQKParams],this.numHeads+this.numKVHeads);const h=s+1;this._gqaDv.setUint32(0,h,!0),this._gqaDv.setUint32(4,this.headDim,!0),this._gqaDv.setUint32(8,this.numKVHeads,!0),this._gqaDv.setUint32(12,this.numHeads,!0),this._gqaDv.setUint32(16,this.numHeads/this.numKVHeads,!0),this._gqaDv.setUint32(20,this.partialDim,!0),this._gqaDv.setUint32(24,s,!0),this._gqaDv.setFloat32(28,this.ropeTheta,!0),this.gpu.device.queue.writeBuffer(this._gqaParamBuf,0,this._gqaData),this.runCached("mtp_gqa","gqa_attention_head",[this.qProj,this.mtp.kvCache.keys,this.mtp.kvCache.values,this.attnOut,this._gqaParamBuf],this.numHeads);const c=this._mtpGetQWeight(`${n}.o_proj.weight`),m=this.numHeads*this.headDim,f=this.makeUniform(`fused_sig_mv_${m}_${e}`,[m,e,this.groupSize]);m/this.groupSize%4===0&&this.gpu.hasF16?this.runCached("mtp_sig_oproj","fused_sigmoid_gptq_4t_f16",[this.attnOut,this.qGate,c.qweight,c.scales,this.qProj,f],this.wg8(e)):this.gpu.hasF16?this.runCached("mtp_sig_oproj","fused_sigmoid_gptq_f16",[this.attnOut,this.qGate,c.qweight,c.scales,this.qProj,f],this.wg4(e)):this.runCached("mtp_sig_oproj","fused_sigmoid_gptq",[this.attnOut,this.qGate,c.qweight,c.scales,this.qProj,f],this.wg4(e)),this.runCached("mtp_post_norm","add_rmsnorm_ro",[this.mtp.fcOut,this.qProj,this.mtp.weights["mtp.layers.0.post_attention_layernorm.weight"],this.normed,i],1);const g=this.gpu.hasF16?"fused_gate_up_silu_f16":"fused_gate_up_silu",B=this.makeUniform(`fused_mv_${e}_${t}`,[e,t,this.groupSize]),_=this.mtp.qweights["mtp.layers.0.mlp.gate_proj.weight"],l=this.mtp.qweights["mtp.layers.0.mlp.up_proj.weight"];this.runCached("mtp_gate_up_silu",g,[this.normed,_.qweight,_.scales,l.qweight,l.scales,this.mlpIntermediate,B],this.wg4(t)),this._mtpGptqMatvec(this.mlpIntermediate,this.mlpOut,"mtp.layers.0.mlp.down_proj.weight",t,e),this.runCached("mtp_final_norm","three_way_add_rmsnorm",[this.mtp.fcOut,this.qProj,this.mlpOut,this.mtp.weights["mtp.norm.weight"],this.hidden,this.normed,i],1),this._dispatchLmHead();const b=this.makeUniform("mtp_argmax_params",[this.vocabSize]);this.runCached("mtp_argmax","argmax",[this.logits,this.argmaxResult,b],1),this.gpu._singlePass&&(this.gpu._singlePass.end(),this.gpu._singlePass=null),this.gpu.copyBuffer(this.argmaxResult,this._argmaxReadback,8),this.gpu.endBatch(),this.mtp.seqLen++}_mtpSnapshotDeltaNet(){if(!this.mtp._deltaNetSnapshot){const e=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC;this.mtp._deltaNetSnapshot={states:{},convs:{}};for(let t=0;t<this.numLayers;t++)this.layerTypes[t]==="linear_attention"&&(this.mtp._deltaNetSnapshot.states[t]=this.gpu.createBuffer(`mtp_snap_state_${t}`,this.linState[t].size,e),this.mtp._deltaNetSnapshot.convs[t]=this.gpu.createBuffer(`mtp_snap_conv_${t}`,this.linConvHist[t].size,e))}const a=this.mtp._deltaNetSnapshot;a.mtpSeqLen=this.mtp.seqLen;for(const e of Object.keys(a.states))this.gpu.copyBuffer(this.linState[e],a.states[e],this.linState[e].size),this.gpu.copyBuffer(this.linConvHist[e],a.convs[e],this.linConvHist[e].size)}_mtpRestoreDeltaNet(){const a=this.mtp._deltaNetSnapshot;if(a){a.mtpSeqLen!==void 0&&(this.mtp.seqLen=a.mtpSeqLen);for(const e of Object.keys(a.states))this.gpu.copyBuffer(a.states[e],this.linState[e],this.linState[e].size),this.gpu.copyBuffer(a.convs[e],this.linConvHist[e],this.linConvHist[e].size)}}async generateWithMTP(a,e=512,t){if(!this.mtp)throw new Error("MTP not initialized. Call initMTP() first.");this.seqLen=0,this.mtp.seqLen=0,this._recentTokenCount=0;const s=[...a],i=this.textCfg.linear_num_key_heads,n=this.textCfg.linear_key_head_dim,o=this.textCfg.linear_value_head_dim,r=this.linValueHeads/i*o,h=i*(n+n+r);for(let v=0;v<this.numLayers;v++)if(this.layerTypes[v]==="linear_attention"){const q=i*n*r*4,C=3*h*4;this.gpu.device.queue.writeBuffer(this.linState[v],0,new Uint8Array(q)),this.gpu.device.queue.writeBuffer(this.linConvHist[v],0,new Uint8Array(C))}let c=null;for(let v=0;v<a.length;v++)c=await this.forward(a[v],v),this.seqLen=v+1;s.push(c);const m=this.config.eos_token_id??this.textCfg.eos_token_id,f=Array.isArray(m)?m:m!=null?[m]:[248044,248046];if(f.includes(c)||t?.(c,0))return s;let w=1,g=0,B=0,_=c,l=0,b=0;for(;w<e;){const v=performance.now(),q=await this.mtpForward(_);this._mtpSnapshotDeltaNet();const C=this.seqLen,d=await this.forwardB2(_,q,this.seqLen);this.seqLen+=2;const P=d[0],k=d[1];if(P===q){if(g++,s.push(q),w++,f.includes(q))break;let S=t?.(q,w-1);if(S||(s.push(k),w++,f.includes(k))||(S=t?.(k,w-1),S))break;_=k}else{B++,this._mtpRestoreDeltaNet(),this.seqLen=C;const S=await this.forward(_,this.seqLen);if(this.seqLen++,s.push(S),w++,f.includes(S)||t?.(S,w-1))break;_=S}const D=performance.now();if(l+=D-v,b++,b%25===0){const S=g/(g+B)*100,$=w/b;console.log(`[MTP @${b}] ${(l/b).toFixed(1)}ms/step, ${(w/(l/1e3)).toFixed(0)} tok/s, accept=${S.toFixed(0)}%, ${$.toFixed(1)} tok/step`)}}const U=g/Math.max(1,g+B)*100;return console.log(`[MTP final] ${(w/((l||1)/1e3)).toFixed(0)} tok/s, accept=${U.toFixed(0)}% (${g}/${g+B}), ${w} tokens`),s}_mtpGptqMatvecOp(a,e,t,s,i){const n=this._mtpGetQWeight(t);if(!n)return null;const u=s/this.groupSize%4===0,r=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]);if(u){const h=this.gpu.hasF16?"gptq_matvec_4t_f16":"gptq_matvec_4t";return this.prepOpCached(`mtp_${t}`,h,[a,n.qweight,n.scales,e,r],this.wg8(i))}else{const h=this.gpu.hasF16?"gptq_matvec_f16":"gptq_matvec";return this.prepOpCached(`mtp_${t}`,h,[a,n.qweight,n.scales,e,r],this.wg4(i))}}}export{Y as Qwen35Model};
 
 
assets/qwen35-model-BJNcT5Rw.js ADDED
@@ -0,0 +1 @@
 
 
1
+ import{S as V,a as W,_ as H}from"./gpu-ops-flxI8RuZ.js";function K(E){const a=E<<16,e=new ArrayBuffer(4);new Uint32Array(e)[0]=a,new Float32Array(e)[0];const t=a>>>31&1,s=a>>>23&255,i=a&8388607;if(s===0)return t<<15;if(s===255)return t<<15|31744|(i?512:0);const n=s-127+15;return n>=31?t<<15|31744:n<=0?t<<15:t<<15|n<<10|i>>>13}class Y{constructor(a,e,t){if(this.gpu=a,this.config=e,this.textCfg=e.text_config,this.quantConfig=t,this.hiddenSize=this.textCfg.hidden_size,this.intermediateSize=this.textCfg.intermediate_size,this.numLayers=this.textCfg.num_hidden_layers,this.numHeads=this.textCfg.num_attention_heads,this.numKVHeads=this.textCfg.num_key_value_heads,this.headDim=this.textCfg.head_dim,this.vocabSize=this.textCfg.vocab_size,this.rmsEps=this.textCfg.rms_norm_eps,this.ropeTheta=this.textCfg.rope_parameters?.rope_theta||1e7,this.partialRotary=this.textCfg.partial_rotary_factor,this.partialDim=Math.floor(this.headDim*this.partialRotary),this.mropeSection=this.textCfg.rope_parameters?.mrope_section||[11,11,10],this.layerTypes=this.textCfg.layer_types,this.groupSize=t?.group_size||t?.config_groups?.group_0?.weights?.group_size||128,this.fp16Layers=new Set,t?.extra_config)for(const u of Object.keys(t.extra_config))this.fp16Layers.add(u);this.weights={},this.kvCache={},this.seqLen=0,this.pipelines={},this.singlePassMode=!0,this._replayFlat=null,this._forceMinSplits=0,this._ropeDelta=0;const s=this.numHeads*this.headDim,i=this.numKVHeads*this.headDim,n=(s+i)/2,o=Math.ceil(n/4);this._splitQKNormShaderKey=`fused_split_qknorm_kvstore_${o}`,V[this._splitQKNormShaderKey]||(V[this._splitQKNormShaderKey]=W(o))}compilePipelines(){const a=this.gpu;for(const[e,t]of Object.entries(V))this.pipelines[e]=a.getOrCreatePipeline(e,t)}uploadTensors(a){this.quantConfig?.quant_method==="compressed-tensors"&&(a=this._convertCompressedTensors(a));for(const[t,s]of Object.entries(a)){let i=s.data;if(s.dtype==="F32"&&!t.endsWith(".qweight")&&!t.endsWith(".qzeros")&&!t.endsWith(".scales")){const n=new Float32Array(i.buffer,i.byteOffset,i.byteLength/4),o=new Uint16Array(n.length);for(let u=0;u<n.length;u++){const r=new Uint32Array(n.buffer,n.byteOffset+u*4,1)[0];o[u]=r>>>16}i=new Uint8Array(o.buffer)}if(s._partial){let{offset:n,totalSize:o}=s._partial;if(s.dtype==="F32"&&!t.endsWith(".qweight")&&!t.endsWith(".qzeros")&&!t.endsWith(".scales")&&(n/=2,o/=2),n===0){const r=this.gpu.createBuffer(t,o,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC);this.gpu.device.queue.writeBuffer(r,0,i),this.weights[t]=r}else{const r=this.weights[t];r&&this.gpu.device.queue.writeBuffer(r,n,i)}}else this.weights[t]=this.gpu.createBufferFromData(t,i);(t.includes("post_attention_layernorm.weight")||t.includes("input_layernorm.weight")||t.includes("q_norm.weight")||t.includes("k_norm.weight")||t==="model.language_model.norm.weight")&&(this._normWeightRaw||(this._normWeightRaw={}),this._normWeightRaw[t]=new Uint32Array(i.buffer.slice(i.byteOffset,i.byteOffset+i.byteLength))),this._uploadedCount=(this._uploadedCount||0)+1}}_convertCompressedTensors(a){const e={};for(const[t,s]of Object.entries(a))if(!t.endsWith(".weight_shape")){if(t.endsWith(".weight_packed")){const i=t.slice(0,-14),n=s.shape,o=n[0],u=n[1],r=new Int32Array(s.data.buffer,s.data.byteOffset,s.data.byteLength/4),h=new Int32Array(o*u);for(let c=0;c<o;c++)for(let m=0;m<u;m++)h[m*o+c]=r[c*u+m];e[`${i}.qweight`]={dtype:"I32",shape:[u,o],data:new Uint8Array(h.buffer)};continue}if(t.endsWith(".weight_scale")){const i=t.slice(0,-13),n=s.shape,o=n[0],u=n[1],r=new Uint16Array(s.data.buffer,s.data.byteOffset,s.data.byteLength/2),h=Math.ceil(o/2),c=new Uint32Array(u*h);for(let m=0;m<u;m++)for(let f=0;f<o;f+=2){const p=r[f*u+m],w=f+1<o?r[(f+1)*u+m]:0,g=K(p),P=K(w);c[m*h+(f>>1)]=g|P<<16}e[`${i}.scales`]={dtype:"I32",shape:[u,h],data:new Uint8Array(c.buffer)};continue}e[t]=s}return e}async postProcessWeights(){const a=this.textCfg.linear_num_key_heads||0,e=this.hiddenSize,t=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC,s=this.layerTypes.indexOf("linear_attention"),i=s>=0?`model.language_model.layers.${s}.linear_attn`:"";if(this.abQuantized=i&&!!this.weights[`${i}.in_proj_a.qweight`],this.abQuantized){const v=this.textCfg.linear_num_value_heads??a;this.linAlpha=this.gpu.createBuffer("lin_alpha",v*4,t),this.linBeta=this.gpu.createBuffer("lin_beta",v*4,t)}else{this.linABWeight={};const v=this.textCfg.linear_num_value_heads??a,q=e*2,C=v*q,d=2*C;for(let B=0;B<this.numLayers;B++)if(this.layerTypes[B]==="linear_attention"){const k=`model.language_model.layers.${B}.linear_attn`,D=this.weights[`${k}.in_proj_a.weight`],S=this.weights[`${k}.in_proj_b.weight`];if(D&&S){const $=this.gpu.createBuffer(`ab_merged_${B}`,d,t),M=this.gpu.device.createCommandEncoder();M.copyBufferToBuffer(D,0,$,0,C),M.copyBufferToBuffer(S,0,$,C,C),this.gpu.device.queue.submit([M.finish()]),this.linABWeight[B]=$}}}{const v=[];for(let d=0;d<this.numLayers;d++){if(this.layerTypes[d]==="linear_attention"){const B=`model.language_model.layers.${d}.linear_attn`,k=this.textCfg.linear_num_key_heads||0,D=this.textCfg.linear_key_head_dim||128,S=this.textCfg.linear_value_head_dim||128,$=this.textCfg.linear_num_value_heads??k,M=$*S,z=$/k*S,O=k*(D+D+z);v.push({prefix:`${B}.in_proj_qkv`,K:e,N:O}),v.push({prefix:`${B}.in_proj_z`,K:e,N:M}),v.push({prefix:`${B}.out_proj`,K:M,N:e})}else{const B=`model.language_model.layers.${d}.self_attn`,k=this.numHeads*this.headDim*2,D=this.numKVHeads*this.headDim;v.push({prefix:`${B}.q_proj`,K:e,N:k}),v.push({prefix:`${B}.k_proj`,K:e,N:D}),v.push({prefix:`${B}.v_proj`,K:e,N:D}),v.push({prefix:`${B}.o_proj`,K:this.numHeads*this.headDim,N:e})}v.push({prefix:`model.language_model.layers.${d}.mlp.gate_proj`,K:e,N:this.intermediateSize}),v.push({prefix:`model.language_model.layers.${d}.mlp.up_proj`,K:e,N:this.intermediateSize}),v.push({prefix:`model.language_model.layers.${d}.mlp.down_proj`,K:this.intermediateSize,N:e})}let q=0;const C=performance.now();for(const{prefix:d,K:B,N:k}of v)if(!this.weights[`${d}.qweight`]&&this.weights[`${d}.weight`]){const{qweight:D,scales:S}=await this._quantizeBF16ToINT4(this.weights[`${d}.weight`],B,k,this.groupSize,d.replace(/\./g,"_"));this.weights[`${d}.qweight`]=D,this.weights[`${d}.scales`]=S,q++}q>0&&console.log(`[QUANT] GPU-quantized ${q} BF16 projections to INT4 in ${(performance.now()-C).toFixed(0)}ms`)}this._mergedGateUp={};const n=this.intermediateSize,o=e/8,u=o*n*4,h=o/(this.groupSize/8)*n*2;for(let v=0;v<this.numLayers;v++){const q=`model.language_model.layers.${v}.mlp`,C=this.getQWeight(`${q}.gate_proj`),d=this.getQWeight(`${q}.up_proj`);if(C.qweight&&d.qweight){const B=this.gpu.createBuffer(`merged_qw_${v}`,u*2,t),k=this.gpu.createBuffer(`merged_sc_${v}`,h*2,t),D=this.gpu.device.createCommandEncoder();D.copyBufferToBuffer(C.qweight,0,B,0,u),D.copyBufferToBuffer(d.qweight,0,B,u,u),D.copyBufferToBuffer(C.scales,0,k,0,h),D.copyBufferToBuffer(d.scales,0,k,h,h),this.gpu.device.queue.submit([D.finish()]),this._mergedGateUp[v]={qweight:B,scales:k}}}this._fusedMLPParams={};const c=16+512*16;for(let v=0;v<this.numLayers;v++){const q=`model.language_model.layers.${v}.post_attention_layernorm.weight`,C=this._normWeightRaw?.[q];if(!C||!this._mergedGateUp[v])continue;const d=new ArrayBuffer(c),B=new Uint32Array(d),k=new Float32Array(d);B[0]=e,B[1]=n,B[2]=this.groupSize,k[3]=this.rmsEps;for(let D=0;D<C.length;D++)B[4+D]=C[D];this._fusedMLPParams[v]=this.gpu.createBufferFromData(`fused_mlp_params_${v}`,new Uint32Array(d),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST)}this._fusedSQKParams={};const m=this.headDim,f=this.numHeads,p=this.numKVHeads,w=f*m,g=p*m,P=(w+g)/2,_=Math.ceil(P/4),l=32+_*16,b=this.mropeSection[1]*3,U=this.mropeSection[2]*3,y=`fused_split_qknorm_kvstore_${_}`;V[y]||(V[y]=W(_,this.ropeTheta,b,U,this.partialDim)),this.pipelines[y]||(this.pipelines[y]=this.gpu.getOrCreatePipeline(y,V[y])),this._splitQKNormShaderKey=y;for(let v=0;v<this.numLayers;v++){if(this.layerTypes[v]!=="full_attention")continue;const q=`model.language_model.layers.${v}.self_attn`,C=`${q}.q_norm.weight`,d=`${q}.k_norm.weight`,B=this._normWeightRaw?.[C],k=this._normWeightRaw?.[d],D=new ArrayBuffer(l),S=new DataView(D);if(S.setUint32(0,f,!0),S.setUint32(4,p,!0),S.setUint32(8,m,!0),S.setFloat32(12,this.rmsEps,!0),S.setUint32(16,0,!0),S.setUint32(20,0,!0),S.setUint32(24,0,!0),S.setUint32(28,0,!0),B)for(let M=0;M<w/2;M++){const z=Math.floor(M/4),O=M%4;S.setUint32(32+z*16+O*4,B[M],!0)}if(k){const M=w/2;for(let z=0;z<g/2;z++){const O=M+z,G=Math.floor(O/4),R=O%4;S.setUint32(32+G*16+R*4,k[z],!0)}}const $=this.gpu.device.createBuffer({size:l,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:`fused_sqk_${v}`});this.gpu.device.queue.writeBuffer($,0,new Uint8Array(D)),this._fusedSQKParams[v]=$}await this._quantizeLmHead()}async _quantizeLmHead(){const e=(this.config.tie_word_embeddings??this.textCfg.tie_word_embeddings)!==!1?"model.language_model.embed_tokens.weight":"lm_head.weight";if(!this.weights[e])return;const t=this.hiddenSize,s=this.vocabSize,i=this.groupSize,n=t/8,o=t/i,u=performance.now(),r=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC,h=this.weights[e];this._lmHeadQWeight=this.gpu.createBuffer("lmhead_qweight",n*s*4,r);const c=this.gpu.createBuffer("lmhead_scales_f32",o*s*4,r),m=Math.ceil(o*s/2)*4;this._lmHeadScales=this.gpu.createBuffer("lmhead_scales",m,r);const f=this.gpu.getOrCreatePipeline("quantize_bf16_to_int4",(await H(async()=>{const{SHADERS:D}=await import("./gpu-ops-flxI8RuZ.js").then(S=>S.b);return{SHADERS:D}},[])).SHADERS.quantize_bf16_to_int4),p=this.gpu.createBufferFromData("quant_params",new Uint32Array([t,s,i]),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),w=this.gpu.createBindGroup(f,0,[h,this._lmHeadQWeight,c,p]),g=65535,P=Math.min(s,g),_=Math.ceil(s/g),l=this.gpu.device.createCommandEncoder(),b=l.beginComputePass();b.setPipeline(f),b.setBindGroup(0,w),b.dispatchWorkgroups(P,_),b.end(),this.gpu.device.queue.submit([l.finish()]);const U=this.gpu.getOrCreatePipeline("pack_f32_to_f16_pairs",(await H(async()=>{const{SHADERS:D}=await import("./gpu-ops-flxI8RuZ.js").then(S=>S.b);return{SHADERS:D}},[])).SHADERS.pack_f32_to_f16_pairs),y=Math.ceil(o*s/2),v=this.gpu.createBufferFromData("pack_params",new Uint32Array([y]),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),q=this.gpu.createBindGroup(U,0,[c,this._lmHeadScales,v]),C=this.gpu.device.createCommandEncoder(),d=C.beginComputePass();d.setPipeline(U),d.setBindGroup(0,q),d.dispatchWorkgroups(Math.ceil(y/256)),d.end(),this.gpu.device.queue.submit([C.finish()]),await this.gpu.device.queue.onSubmittedWorkDone(),c.destroy(),p.destroy(),v.destroy();const B=(n*s*4/1e6).toFixed(0),k=(m/1e6).toFixed(0);console.log(`[QUANT] lm_head INT4 (GPU): ${(performance.now()-u).toFixed(0)}ms, ${B}MB qw + ${k}MB sc`)}async _quantizeBF16ToINT4(a,e,t,s,i){const n=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC,o=e/8,u=e/s,r=this.gpu.createBuffer(`${i}_qweight`,o*t*4,n),h=this.gpu.createBuffer(`${i}_scales_f32`,u*t*4,n),c=Math.ceil(u*t/2)*4,m=this.gpu.createBuffer(`${i}_scales`,c,n),f=this.gpu.getOrCreatePipeline("quantize_bf16_to_int4",(await H(async()=>{const{SHADERS:B}=await import("./gpu-ops-flxI8RuZ.js").then(k=>k.b);return{SHADERS:B}},[])).SHADERS.quantize_bf16_to_int4),p=this.gpu.createBufferFromData(`${i}_qp`,new Uint32Array([e,t,s]),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),w=this.gpu.createBindGroup(f,0,[a,r,h,p]),g=65535,P=Math.min(t,g),_=Math.ceil(t/g),l=this.gpu.device.createCommandEncoder(),b=l.beginComputePass();b.setPipeline(f),b.setBindGroup(0,w),b.dispatchWorkgroups(P,_),b.end(),this.gpu.device.queue.submit([l.finish()]);const U=this.gpu.getOrCreatePipeline("pack_f32_to_f16_pairs",(await H(async()=>{const{SHADERS:B}=await import("./gpu-ops-flxI8RuZ.js").then(k=>k.b);return{SHADERS:B}},[])).SHADERS.pack_f32_to_f16_pairs),y=Math.ceil(u*t/2),v=this.gpu.createBufferFromData(`${i}_pp`,new Uint32Array([y]),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),q=this.gpu.createBindGroup(U,0,[h,m,v]),C=this.gpu.device.createCommandEncoder(),d=C.beginComputePass();return d.setPipeline(U),d.setBindGroup(0,q),d.dispatchWorkgroups(Math.ceil(y/256)),d.end(),this.gpu.device.queue.submit([C.finish()]),await this.gpu.device.queue.onSubmittedWorkDone(),h.destroy(),p.destroy(),v.destroy(),{qweight:r,scales:m}}initBuffers(a){a||(a=this.textCfg.max_position_embeddings||4096),this.maxSeqLen=a;const e=this.gpu,t=this.hiddenSize,s=this.intermediateSize,i=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC;this.hidden=e.createBuffer("hidden_a",t*4,i),this.hiddenB=e.createBuffer("hidden_b",t*4,i),this.normed=e.createBuffer("normed",t*4,i),this.normedB=e.createBuffer("normed_b",t*4,i),this.mlpIntermediate=e.createBuffer("mlp_inter",s*4,i),this.mlpOut=e.createBuffer("mlp_out",t*4,i),this.logits=e.createBuffer("logits",this.vocabSize*4,i),this.argmaxResult=e.createBuffer("argmax_result",8,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC),this._argmaxReadback=e.device.createBuffer({size:8,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST,label:"argmax_readback"}),this._topkResult=e.createBuffer("topk_result",2048,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC),this._repMaxTokens=64,this._recentTokens=new Uint32Array(this._repMaxTokens),this._recentTokenCount=0,this._gpuRecentTokens=e.createBuffer("recent_tokens",this._repMaxTokens*4,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST);for(let P=0;P<this.numLayers;P++)this.layerTypes[P]==="full_attention"&&(this.kvCache[P]={keys:e.createBuffer(`kv_k_${P}`,a*this.numKVHeads*this.headDim*4),values:e.createBuffer(`kv_v_${P}`,a*this.numKVHeads*this.headDim*4)});this.qProjFull=e.createBuffer("q_proj_full",this.numHeads*this.headDim*2*4,i),this.qProj=e.createBuffer("q_proj_out",this.numHeads*this.headDim*4,i),this.qGate=e.createBuffer("q_gate",this.numHeads*this.headDim*4,i),this.kProj=e.createBuffer("k_proj_out",this.numKVHeads*this.headDim*4,i),this.vProj=e.createBuffer("v_proj_out",this.numKVHeads*this.headDim*4,i);const n=this.textCfg.linear_num_key_heads,o=this.textCfg.linear_key_head_dim,u=this.textCfg.linear_value_head_dim,r=this.textCfg.linear_num_value_heads??n,c=r/n*u,m=n*(o+o+c),f=r*u;this.linValueDim=f,this.linValueHeads=r,this.linQKV=e.createBuffer("lin_qkv",m*4,i),this.linZ=e.createBuffer("lin_z",f*4,i),this.linOut=e.createBuffer("lin_out",f*4,i);const p=Math.max(t,this.numHeads*this.headDim,f)*4;this.attnOut=e.createBuffer("attn_out",p,i),this._maxGqaSplits=64;const w=this.numHeads*this._maxGqaSplits*(this.headDim+2)*4;this._gqaPartials=e.createBuffer("gqa_partials",w,i),this.linState={},this.linConvHist={};for(let P=0;P<this.numLayers;P++)this.layerTypes[P]==="linear_attention"&&(this.linState[P]=e.createBuffer(`lin_state_${P}`,n*o*c*4,i),this.linConvHist[P]=e.createBuffer(`lin_conv_hist_${P}`,3*m*4,i));this.zeroBuf=e.createBuffer("zero_buf",t*4,i),this.useSplitK=!1,this.splitKSplits=1;const g=Math.max(t,s);this.splitKPartials=e.createBuffer("splitk_partials",g*this.splitKSplits*4,i),this.paramBufs={},this._gqaParamBuf=e.device.createBuffer({size:32,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"gqa_params"}),this._kvStoreParamBuf=e.device.createBuffer({size:16,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"kv_store_params"}),this._gqaData=new Uint8Array(32),this._gqaDv=new DataView(this._gqaData.buffer),this._kvData=new Uint8Array(16),this._kvDv=new DataView(this._kvData.buffer)}initVision(){const a=this.config.vision_config;if(!a){this.vision=null;return}const e=this.gpu,t=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC,s=a.hidden_size,i=a.intermediate_size,n=a.out_hidden_size,o=a.depth,u=a.num_heads,r=s/u,h=a.patch_size,c=a.temporal_patch_size,m=a.spatial_merge_size,f=3*c*h*h,p=4096,w=s*m*m;this.vision={V:s,Vi:i,Vo:n,depth:o,heads:u,headDim:r,patchSize:h,temporalPatchSize:c,mergeSize:m,patchInputDim:f,maxVitTokens:p,mergedHidden:w,numPosEmbeddings:a.num_position_embeddings,numGridPerSide:Math.round(Math.sqrt(a.num_position_embeddings)),hidden:e.createBuffer("vit_hidden",p*s*4,t),hiddenB:e.createBuffer("vit_hidden_b",p*s*4,t),normed:e.createBuffer("vit_normed",p*s*4,t),q:e.createBuffer("vit_q",p*s*4,t),k:e.createBuffer("vit_k",p*s*4,t),v:e.createBuffer("vit_v",p*s*4,t),attnOut:e.createBuffer("vit_attn_out",p*s*4,t),mlpInter:e.createBuffer("vit_mlp_inter",p*i*4,t),mlpOut:e.createBuffer("vit_mlp_out",p*s*4,t),cos:e.createBuffer("vit_cos",p*r*4,t),sin:e.createBuffer("vit_sin",p*r*4,t),posEmbed:e.createBuffer("vit_pos_embed",p*s*4,t),merged:e.createBuffer("vit_merged",p/4*n*4,t),mergerNormed:e.createBuffer("vit_merger_normed",p*s*4,t),mergerInter:e.createBuffer("vit_merger_inter",p/4*w*4,t),patchInput:e.createBuffer("vit_patch_input",p*f*4,t),qkv:e.createBuffer("vit_qkv",p*3*s*4,t)},this.imageTokenId=this.config.image_token_id,this.visionStartTokenId=this.config.vision_start_token_id,this.visionEndTokenId=this.config.vision_end_token_id,console.log(`[VISION] Initialized: depth=${o}, hidden=${s}, heads=${u}, headDim=${r}, out=${n}`),console.log(`[VISION] Buffer allocation: ~${((p*s*4*12+p*i*4)/1024/1024).toFixed(0)} MB transient`)}async _readVisionPosEmbed(){const a=this.vision,e=this.weights["model.visual.pos_embed.weight"];if(!e)throw new Error("Vision pos_embed weight not found");const t=a.numPosEmbeddings,s=a.V,i=t*s*2,n=this.gpu.createReadbackBuffer("vit_pos_embed_readback",i),o=this.gpu.device.createCommandEncoder();o.copyBufferToBuffer(e,0,n,0,i),this.gpu.device.queue.submit([o.finish()]),await n.mapAsync(GPUMapMode.READ);const u=new Uint16Array(n.getMappedRange().slice(0));n.unmap(),n.destroy();const r=new Float32Array(t*s),h=new ArrayBuffer(4),c=new Uint32Array(h),m=new Float32Array(h);for(let f=0;f<u.length;f++)c[0]=u[f]<<16,r[f]=m[0];this._vitPosEmbedF32=r,console.log(`[VISION] Read pos_embed: ${t}×${s} (${(i/1024).toFixed(0)} KB)`)}_interpolatePosEmbed(a,e){const t=this.vision,s=t.V,i=t.numGridPerSide,n=this._vitPosEmbedF32,o=t.mergeSize,u=a*e,r=new Float32Array(u*s);for(let p=0;p<a;p++){const w=a===1?0:p*(i-1)/(a-1),g=Math.min(Math.floor(w),i-1),P=Math.min(g+1,i-1),_=w-g;for(let l=0;l<e;l++){const b=e===1?0:l*(i-1)/(e-1),U=Math.min(Math.floor(b),i-1),y=Math.min(U+1,i-1),v=b-U,q=g*i+U,C=g*i+y,d=P*i+U,B=P*i+y,k=(1-_)*(1-v),D=(1-_)*v,S=_*(1-v),$=_*v,M=p*e+l;for(let z=0;z<s;z++)r[M*s+z]=k*n[q*s+z]+D*n[C*s+z]+S*n[d*s+z]+$*n[B*s+z]}}const h=a/o,c=e/o,m=new Float32Array(u*s);let f=0;for(let p=0;p<h;p++)for(let w=0;w<c;w++)for(let g=0;g<o;g++)for(let P=0;P<o;P++){const _=p*o+g,l=w*o+P,b=_*e+l;m.set(r.subarray(b*s,b*s+s),f*s),f++}return m}_computeVisionRoPE(a,e){const t=this.vision;t.headDim/2;const s=t.mergeSize,i=a/s,n=e/s,o=a*e,u=t.headDim/2,r=u/2,h=Math.max(a,e),c=new Float32Array(h*r);for(let w=0;w<h;w++)for(let g=0;g<r;g++){const P=1/Math.pow(1e4,2*g/u);c[w*r+g]=w*P}const m=new Float32Array(o*t.headDim),f=new Float32Array(o*t.headDim);let p=0;for(let w=0;w<i;w++)for(let g=0;g<n;g++)for(let P=0;P<s;P++)for(let _=0;_<s;_++){const l=w*s+P,b=g*s+_,U=p*t.headDim;for(let y=0;y<r;y++){const v=c[l*r+y],q=c[b*r+y];m[U+y]=Math.cos(v),m[U+r+y]=Math.cos(q),m[U+2*r+y]=Math.cos(v),m[U+3*r+y]=Math.cos(q),f[U+y]=Math.sin(v),f[U+r+y]=Math.sin(q),f[U+2*r+y]=Math.sin(v),f[U+3*r+y]=Math.sin(q)}p++}return{cos:m,sin:f}}async preprocessImage(a){const e=this.vision,t=e.patchSize,s=e.mergeSize,i=t*s,n=65536,o=65536,u=new Image;await new Promise((k,D)=>{u.onload=k,u.onerror=D,u.src=a});let{width:r,height:h}=u,c=Math.max(i,Math.round(h/i)*i),m=Math.max(i,Math.round(r/i)*i);if(c*m>o){const k=Math.sqrt(h*r/o);c=Math.max(i,Math.floor(h/k/i)*i),m=Math.max(i,Math.floor(r/k/i)*i)}else if(c*m<n){const k=Math.sqrt(n/(h*r));c=Math.ceil(h*k/i)*i,m=Math.ceil(r*k/i)*i}const p=new OffscreenCanvas(m,c).getContext("2d");p.drawImage(u,0,0,m,c);const g=p.getImageData(0,0,m,c).data,P=[.5,.5,.5],_=[.5,.5,.5],l=c/t,b=m/t,U=l*b,y=l/s,v=b/s,q=e.temporalPatchSize,C=3*q*t*t,d=new Float32Array(U*C);let B=0;for(let k=0;k<y;k++)for(let D=0;D<v;D++)for(let S=0;S<s;S++)for(let $=0;$<s;$++){const M=k*s+S,z=D*s+$,O=M*t,G=z*t,R=B*C;for(let F=0;F<q;F++)for(let T=0;T<3;T++)for(let x=0;x<t;x++)for(let A=0;A<t;A++){const j=((O+x)*m+(G+A))*4+T,L=(g[j]/255-P[T])/_[T],Q=((T*q+F)*t+x)*t+A;d[R+Q]=L}B++}return console.log(`[VISION] Preprocessed: ${r}x${h} → ${m}x${c}, ${U} patches (${l}x${b}), merge→${U/4} tokens`),{pixels:d,gridH:l,gridW:b,numPatches:U,imgW:m,imgH:c}}async visionForward(a){const e=this.vision,t=this.gpu,{pixels:s,gridH:i,gridW:n,numPatches:o}=a;this._vitPosEmbedF32||await this._readVisionPosEmbed(),t.device.queue.writeBuffer(e.patchInput,0,s);const u=this._interpolatePosEmbed(i,n);t.device.queue.writeBuffer(e.posEmbed,0,u);const{cos:r,sin:h}=this._computeVisionRoPE(i,n);t.device.queue.writeBuffer(e.cos,0,r),t.device.queue.writeBuffer(e.sin,0,h),t.beginBatch();const c=this.weights["model.visual.patch_embed.proj.weight"],m=this.weights["model.visual.patch_embed.proj.bias"],f=this.makeUniform("vit_patch_params",[e.patchInputDim,e.V]);t.dispatch(this.pipelines.vit_bf16_matvec_bias,[t.createBindGroup(this.pipelines.vit_bf16_matvec_bias,0,[e.patchInput,c,m,e.hidden,f])],Math.ceil(e.V/32),o);const p=this.makeUniform("vit_add_len",[o*e.V]);t.dispatch(this.pipelines.vit_add,[t.createBindGroup(this.pipelines.vit_add,0,[e.hidden,e.posEmbed,p])],Math.ceil(o*e.V/256));for(let d=0;d<e.depth;d++)t.endBatch(),t.beginBatch(),this._vitBlock(d,o);const w=this.weights["model.visual.merger.norm.weight"],g=this.weights["model.visual.merger.norm.bias"],P=this.makeUniform("vit_merger_ln_params",[e.V,1e-6]);t.dispatch(this.pipelines.vit_layernorm,[t.createBindGroup(this.pipelines.vit_layernorm,0,[e.hidden,w,g,e.mergerNormed,P])],o);const _=o/4,l=this.weights["model.visual.merger.linear_fc1.weight"],b=this.weights["model.visual.merger.linear_fc1.bias"],U=this.makeUniform("vit_merger_fc1_params",[e.mergedHidden,e.mergedHidden]);t.dispatch(this.pipelines.vit_bf16_matvec_bias,[t.createBindGroup(this.pipelines.vit_bf16_matvec_bias,0,[e.mergerNormed,l,b,e.mergerInter,U])],Math.ceil(e.mergedHidden/32),_);const y=this.makeUniform("vit_gelu_len",[_*e.mergedHidden]);t.dispatch(this.pipelines.vit_gelu,[t.createBindGroup(this.pipelines.vit_gelu,0,[e.mergerInter,y])],Math.ceil(_*e.mergedHidden/256));const v=this.weights["model.visual.merger.linear_fc2.weight"],q=this.weights["model.visual.merger.linear_fc2.bias"],C=this.makeUniform("vit_merger_fc2_params",[e.mergedHidden,e.Vo]);return t.dispatch(this.pipelines.vit_bf16_matvec_bias,[t.createBindGroup(this.pipelines.vit_bf16_matvec_bias,0,[e.mergerInter,v,q,e.merged,C])],Math.ceil(e.Vo/32),_),t.endBatch(),await t.device.queue.onSubmittedWorkDone(),console.log(`[VISION] Forward done: ${o} patches → ${_} merged tokens (dim=${e.Vo})`),{numMergedTokens:_,embedBuffer:e.merged}}_vitBlock(a,e){const t=this.vision,s=this.gpu,i=`model.visual.blocks.${a}`,n=this.weights[`${i}.norm1.weight`],o=this.weights[`${i}.norm1.bias`],u=this.makeUniform(`vit_ln1_${a}`,[t.V,1e-6]);s.dispatch(this.pipelines.vit_layernorm,[s.createBindGroup(this.pipelines.vit_layernorm,0,[t.hidden,n,o,t.normed,u])],e);const r=this.weights[`${i}.attn.qkv.weight`],h=this.weights[`${i}.attn.qkv.bias`],c=t.V,m=c*c*2,f=c*2,p=this.makeUniform(`vit_qkv_mv_${a}`,[c,c]);for(let M=0;M<3;M++){const z=[t.q,t.k,t.v][M],O=s.createBindGroupWithOffsets(this.pipelines.vit_bf16_matvec_bias,0,[t.normed,{buffer:r,offset:M*m,size:m},{buffer:h,offset:M*f,size:f},z,p]);s.dispatch(this.pipelines.vit_bf16_matvec_bias,[O],Math.ceil(c/32),e)}const w=this.makeUniform(`vit_rope_${a}`,[e,t.heads,t.headDim]);s.dispatch(this.pipelines.vit_rope,[s.createBindGroup(this.pipelines.vit_rope,0,[t.q,t.k,t.attnOut,t.mlpOut,t.cos,t.sin,w])],Math.ceil(e*t.heads*t.headDim/256));const g=1/Math.sqrt(t.headDim),P=this.makeUniform(`vit_attn_${a}`,[e,t.heads,t.headDim,g]);s.dispatch(this.pipelines.vit_attention,[s.createBindGroup(this.pipelines.vit_attention,0,[t.attnOut,t.mlpOut,t.v,t.q,P])],e,t.heads);const _=this.weights[`${i}.attn.proj.weight`],l=this.weights[`${i}.attn.proj.bias`],b=this.makeUniform(`vit_proj_${a}`,[c,c]);s.dispatch(this.pipelines.vit_bf16_matvec_bias,[s.createBindGroup(this.pipelines.vit_bf16_matvec_bias,0,[t.q,_,l,t.mlpOut,b])],Math.ceil(c/32),e);const U=this.makeUniform(`vit_res1_${a}`,[e*c]);s.dispatch(this.pipelines.vit_add,[s.createBindGroup(this.pipelines.vit_add,0,[t.hidden,t.mlpOut,U])],Math.ceil(e*c/256));const y=this.weights[`${i}.norm2.weight`],v=this.weights[`${i}.norm2.bias`];s.dispatch(this.pipelines.vit_layernorm,[s.createBindGroup(this.pipelines.vit_layernorm,0,[t.hidden,y,v,t.normed,u])],e);const q=this.weights[`${i}.mlp.linear_fc1.weight`],C=this.weights[`${i}.mlp.linear_fc1.bias`],d=this.makeUniform(`vit_mlp_fc1_${a}`,[c,t.Vi]);s.dispatch(this.pipelines.vit_bf16_matvec_bias,[s.createBindGroup(this.pipelines.vit_bf16_matvec_bias,0,[t.normed,q,C,t.mlpInter,d])],Math.ceil(t.Vi/32),e);const B=this.makeUniform(`vit_gelu_${a}`,[e*t.Vi]);s.dispatch(this.pipelines.vit_gelu_tanh,[s.createBindGroup(this.pipelines.vit_gelu_tanh,0,[t.mlpInter,B])],Math.ceil(e*t.Vi/256));const k=this.weights[`${i}.mlp.linear_fc2.weight`],D=this.weights[`${i}.mlp.linear_fc2.bias`],S=this.makeUniform(`vit_mlp_fc2_${a}`,[t.Vi,c]);s.dispatch(this.pipelines.vit_bf16_matvec_bias,[s.createBindGroup(this.pipelines.vit_bf16_matvec_bias,0,[t.mlpInter,k,D,t.mlpOut,S])],Math.ceil(c/32),e);const $=this.makeUniform(`vit_res2_${a}`,[e*c]);s.dispatch(this.pipelines.vit_add,[s.createBindGroup(this.pipelines.vit_add,0,[t.hidden,t.mlpOut,$])],Math.ceil(e*c/256))}computeMultimodalPositions(a,e,t,s){const i=this.vision.mergeSize,n=t/i,o=s/i,u=a.length,r=new Array(3);for(let f=0;f<3;f++)r[f]=new Int32Array(u);let h=0,c=0;for(let f=0;f<u;f++)if(a[f]===this.imageTokenId){const w=c,g=Math.floor(w/o),P=w%o;r[0][f]=h,r[1][f]=h+g,r[2][f]=h+P,c++,c===e&&(h+=Math.max(n,o))}else r[0][f]=h,r[1][f]=h,r[2][f]=h,h++;const m=h-u;return{positionIds3D:r,ropeDelta:m}}initB2Buffers(){const a=this.gpu,e=this.hiddenSize,t=this.intermediateSize,s=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC;this.b2={},this.b2.hidden=a.createBuffer("b2_hidden_a",2*e*4,s),this.b2.hiddenB=a.createBuffer("b2_hidden_b",2*e*4,s),this.b2.normed=a.createBuffer("b2_normed",2*e*4,s),this.b2.mlpIntermediate=a.createBuffer("b2_mlp_inter",2*t*4,s),this.b2.mlpOut=a.createBuffer("b2_mlp_out",2*e*4,s),this.b2.logits=a.createBuffer("b2_logits",2*this.vocabSize*4,s),this.b2.zeroBuf=a.createBuffer("b2_zero_buf",2*e*4,s),this.b2.qProjFull=a.createBuffer("b2_q_proj_full",2*this.numHeads*this.headDim*2*4,s),this.b2.qProj=a.createBuffer("b2_q_proj_out",2*this.numHeads*this.headDim*4,s),this.b2.qGate=a.createBuffer("b2_q_gate",2*this.numHeads*this.headDim*4,s),this.b2.kProj=a.createBuffer("b2_k_proj_out",2*this.numKVHeads*this.headDim*4,s),this.b2.vProj=a.createBuffer("b2_v_proj_out",2*this.numKVHeads*this.headDim*4,s);const i=this.textCfg.linear_num_key_heads,n=this.textCfg.linear_key_head_dim,o=this.textCfg.linear_value_head_dim,h=this.linValueHeads/i*o,c=i*(n+n+h),m=this.linValueDim;this.b2.linQKV=a.createBuffer("b2_lin_qkv",2*c*4,s),this.b2.linZ=a.createBuffer("b2_lin_z",2*m*4,s),this.b2.linOut=a.createBuffer("b2_lin_out",2*m*4,s);const f=Math.max(e,this.numHeads*this.headDim,m)*4;if(this.b2.attnOut=a.createBuffer("b2_attn_out",2*f,s),this.abQuantized){const p=this.textCfg.linear_num_value_heads??i;this.b2.linAlpha=a.createBuffer("b2_lin_alpha",2*p*4,s),this.b2.linBeta=a.createBuffer("b2_lin_beta",2*p*4,s)}this.b2.topkResult0=a.createBuffer("b2_topk0",2048,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC),this.b2.topkResult1=a.createBuffer("b2_topk1",2048,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC),this.b2.topkReadback0=a.device.createBuffer({size:2048,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST,label:"b2_topk_rb0"}),this.b2.topkReadback1=a.device.createBuffer({size:2048,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST,label:"b2_topk_rb1"}),this.b2.argmaxResult0=a.createBuffer("b2_argmax0",8,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC),this.b2.argmaxResult1=a.createBuffer("b2_argmax1",8,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC),this.b2.argmaxReadback0=a.device.createBuffer({size:8,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST,label:"b2_argmax_rb0"}),this.b2.argmaxReadback1=a.device.createBuffer({size:8,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST,label:"b2_argmax_rb1"}),this.b2._gqaParamBuf0=a.device.createBuffer({size:32,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"b2_gqa_params0"}),this.b2._gqaParamBuf1=a.device.createBuffer({size:32,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"b2_gqa_params1"}),this.b2._dims={H:e,I:t,linQKVDim:c,valueDim:m,linHeads:i,linKeyDim:n,linValDim:o,linEVD:h,qProjSize:this.numHeads*this.headDim,qProjFullSize:this.numHeads*this.headDim*2,kProjSize:this.numKVHeads*this.headDim,vProjSize:this.numKVHeads*this.headDim,attnOutElems:f/4},console.log(`[B2] Allocated batch=2 buffers (${((2*(e+e+e+t+e+this.vocabSize)*4+2*f)/1e6).toFixed(1)} MB activations)`)}runWithOffsets(a,e,t,s=1){const i=this.pipelines[a],n=this.gpu.createBindGroupWithOffsets(i,0,e);this.gpu.dispatch(i,[n],t,s)}makeUniform(a,e){const t=new ArrayBuffer(Math.max(16,Math.ceil(e.length*4/16)*16)),s=new DataView(t);for(let o=0;o<e.length;o++)Number.isInteger(e[o])?s.setUint32(o*4,e[o],!0):s.setFloat32(o*4,e[o],!0);const i=a+"_"+e.join("_");if(this.paramBufs[i])return this.paramBufs[i];const n=this.gpu.device.createBuffer({size:t.byteLength,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:a});return this.gpu.device.queue.writeBuffer(n,0,new Uint8Array(t)),this.paramBufs[i]=n,n}_makeMixedUniform(a,e){const t=new ArrayBuffer(Math.max(16,Math.ceil(e.length*4/16)*16)),s=new DataView(t);for(let n=0;n<e.length;n++){const o=e[n];o.u!==void 0?s.setUint32(n*4,o.u,!0):s.setFloat32(n*4,o.f,!0)}this._mixedUniformBufs||(this._mixedUniformBufs={});let i=this._mixedUniformBufs[a];return i||(i=this.gpu.device.createBuffer({size:t.byteLength,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:a}),this._mixedUniformBufs[a]=i),this.gpu.device.queue.writeBuffer(i,0,new Uint8Array(t)),i}makeUniformTyped(a,e,t){const s=e.length+t.length,i=new ArrayBuffer(Math.max(16,Math.ceil(s*4/16)*16)),n=new DataView(i);let o=0;for(const h of e)n.setUint32(o,h,!0),o+=4;for(const h of t)n.setFloat32(o,h,!0),o+=4;const u=a+"_"+[...e,...t].join("_");if(this.paramBufs[u])return this.paramBufs[u];const r=this.gpu.device.createBuffer({size:i.byteLength,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:a});return this.gpu.device.queue.writeBuffer(r,0,new Uint8Array(i)),this.paramBufs[u]=r,r}makeFusedNormMLPUniform(a){const e=`fused_anorm_mlp_${a}`;if(this.paramBufs[e])return this.paramBufs[e];const t=this.hiddenSize,s=this.intermediateSize,i=16+640*16,n=new ArrayBuffer(i),o=new DataView(n);o.setUint32(0,t,!0),o.setUint32(4,s,!0),o.setUint32(8,this.groupSize,!0),o.setFloat32(12,this.rmsEps,!0);const u=`model.language_model.layers.${a}.post_attention_layernorm.weight`,r=this._normWeightRaw?.[u];if(!r)throw new Error(`Norm weight not cached for layer ${a}`);for(let c=0;c<t/2;c++){const m=Math.floor(c/4),f=c%4;o.setUint32(16+m*16+f*4,r[c],!0)}const h=this.gpu.device.createBuffer({size:i,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:e});return this.gpu.device.queue.writeBuffer(h,0,new Uint8Array(n)),this.paramBufs[e]=h,h}fusedNormGptqOp(a,e,t,s,i,n,o,u){const r=this.getQWeight(s);if(!r.qweight)return null;const h=this.makeFusedNormGPTQUniform(i,o),c=u?"fused_norm_gptq":"fused_norm_gptq_noadd",m=u?"fnorm_gptq_":"fnorm_gptqna_",f=u?[a,e,r.qweight,r.scales,t,h]:[a,r.qweight,r.scales,t,h];return this.prepOpCached(`${m}${s}`,c,f,this.wg4(o))}makeFusedNormGPTQUniform(a,e){const t=`fused_norm_gptq_${a}_${e}`;if(this.paramBufs[t])return this.paramBufs[t];const s=this.hiddenSize,i=16+512*16,n=new ArrayBuffer(i),o=new DataView(n);o.setUint32(0,s,!0),o.setUint32(4,e,!0),o.setUint32(8,this.groupSize,!0),o.setFloat32(12,this.rmsEps,!0);const u=this._normWeightRaw?.[a];if(!u)throw new Error(`Norm weight not cached: ${a}`);for(let h=0;h<s/2;h++){const c=Math.floor(h/4),m=h%4;o.setUint32(16+c*16+m*4,u[h],!0)}const r=this.gpu.device.createBuffer({size:i,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:t});return this.gpu.device.queue.writeBuffer(r,0,new Uint8Array(n)),this.paramBufs[t]=r,r}run(a,e,t,s=1){const i=this.pipelines[a],n=this.gpu.createBindGroup(i,0,e);this.gpu.dispatch(i,[n],t,s)}runCached(a,e,t,s,i=1){const n=this.pipelines[e],o=this.gpu.getCachedBindGroup(a,n,0,t);this.gpu.dispatch(n,[o],s,i)}prepOp(a,e,t,s=1){const i=this.pipelines[a],n=this.gpu.createBindGroup(i,0,e);return{pipeline:i,bindGroups:[n],workgroupsX:t,workgroupsY:s}}prepOpCached(a,e,t,s,i=1){const n=this.pipelines[e],o=this.gpu.getCachedBindGroup(a,n,0,t);return{pipeline:n,bindGroups:[o],workgroupsX:s,workgroupsY:i}}wg(a){return Math.ceil(a/256)}wg4(a){return Math.ceil(a/32)}wg8(a){return Math.ceil(a/8)}getQWeight(a){return{qweight:this.weights[a+".qweight"],scales:this.weights[a+".scales"],qzeros:this.weights[a+".qzeros"]}}embedding(a){const e=this.weights["model.language_model.embed_tokens.weight"],t=this.makeUniform("emb_params",[a,this.hiddenSize]);this.run("embedding",[e,this.hidden,t],this.wg(this.hiddenSize))}embeddingFromVisionBuffer(a,e){const t=this.hiddenSize;this.gpu.copyBuffer(a,this.hidden,t*4,e*t*4,0)}embeddingFromArgmax(){const a=this.weights["model.language_model.embed_tokens.weight"];this._embedArgmaxParams||(this._embedArgmaxParams=this.makeUniform("emb_argmax_params",[this.hiddenSize])),this.runCached("embed_from_argmax","embed_from_argmax",[a,this.hidden,this.argmaxResult,this._embedArgmaxParams],this.wg(this.hiddenSize))}_dispatchLmHead(){if(this._lmHeadQWeight){const a=this.hiddenSize,e=this.vocabSize,s=a/this.groupSize%4===0,i=this.makeUniform(`mv_${a}_${e}`,[a,e,this.groupSize]);if(s){const n=this.gpu.hasF16?"gptq_matvec_4t_f16":"gptq_matvec_4t";this.runCached("lm_head",n,[this.normed,this._lmHeadQWeight,this._lmHeadScales,this.logits,i],this.wg8(e))}else{const n=this.gpu.hasF16?"gptq_matvec_f16":"gptq_matvec";this.runCached("lm_head",n,[this.normed,this._lmHeadQWeight,this._lmHeadScales,this.logits,i],this.wg4(e))}}else{const e=(this.config.tie_word_embeddings??this.textCfg.tie_word_embeddings)!==!1?"model.language_model.embed_tokens.weight":"lm_head.weight",t=this.weights[e],s=this.makeUniform("lmhead_params",[this.hiddenSize,this.vocabSize]);this.runCached("lm_head","bf16_matvec",[this.normed,t,this.logits,s],this.wg(this.vocabSize))}}rmsNorm(a,e,t,s){const i=this.makeUniform("rmsnorm_params",[this.hiddenSize,this.rmsEps]);s?this.runCached(s,"rmsnorm",[a,t,e,i],1):this.run("rmsnorm",[a,t,e,i],1)}gptqMatvec(a,e,t,s,i){const n=this.getQWeight(t);if(!n.qweight)return;const o=s/this.groupSize,u=o%4===0;if(this.useSplitK&&u){let r=this.splitKSplits;for(;r>1&&o%(r*4)!==0;)r>>=1;if(r>1){const h=this.makeUniform(`mv_sk_${s}_${i}_${r}`,[s,i,this.groupSize,r]);this.runCached(`sk4tf16_${t}`,"gptq_matvec_4t_f16_sk",[a,n.qweight,n.scales,this.splitKPartials,h],this.wg8(i),r);const c=this.makeUniform(`rsk_${i}_${r}`,[i,r]);this.runCached(`rsk_${t}`,"reduce_splitk",[this.splitKPartials,e,c],this.wg(i));return}}if(u){const r=this.gpu.hasF16?"gptq_matvec_4t_f16":"gptq_matvec_4t",h=this.gpu.hasF16?"gptq4tf16_":"gptq4t_",c=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]);this.runCached(`${h}${t}`,r,[a,n.qweight,n.scales,e,c],this.wg8(i))}else{const r=this.gpu.hasF16?"gptq_matvec_f16":"gptq_matvec",h=this.gpu.hasF16?"gptqf16_":"gptq_",c=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]);this.runCached(`${h}${t}`,r,[a,n.qweight,n.scales,e,c],this.wg4(i))}}gptqMatvecOp(a,e,t,s,i){const n=this.getQWeight(t);if(!n.qweight)return null;if(s/this.groupSize%4===0){const r=this.gpu.hasF16?"gptq_matvec_4t_f16":"gptq_matvec_4t",h=this.gpu.hasF16?"gptq4tf16_":"gptq4t_",c=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]);return this.prepOpCached(`${h}${t}`,r,[a,n.qweight,n.scales,e,c],this.wg8(i))}else{const r=this.gpu.hasF16?"gptq_matvec_f16":"gptq_matvec",h=this.gpu.hasF16?"gptqf16_":"gptq_",c=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]);return this.prepOpCached(`${h}${t}`,r,[a,n.qweight,n.scales,e,c],this.wg4(i))}}bf16Matvec(a,e,t,s,i,n){const o=this.makeUniform(`bf16mv_${s}_${i}`,[s,i]);n?this.runCached(n,"bf16_matvec",[a,t,e,o],this.wg(i)):this.run("bf16_matvec",[a,t,e,o],this.wg(i))}siluMul(a,e,t,s,i){const n=this.makeUniform(`silu_${s}`,[s]);i?this.runCached(i,"silu_mul",[a,e,t,n],this.wg(s)):this.run("silu_mul",[a,e,t,n],this.wg(s))}addVectors(a,e,t,s){const i=this.makeUniform(`add_${t}`,[t]);s?this.runCached(s,"add",[a,e,i],this.wg(t)):this.run("add",[a,e,i],this.wg(t))}addAndRmsNorm(a,e,t,s,i){const n=this.makeUniform("add_rmsnorm_params",[this.hiddenSize,this.rmsEps]);i?this.runCached(i,"add_rmsnorm",[a,e,s,t,n],1):this.run("add_rmsnorm",[a,e,s,t,n],1)}mlp(a,e){const t=`model.language_model.layers.${e}.mlp`,s=this.hiddenSize,i=this.intermediateSize,n=this.getQWeight(`${t}.gate_proj`),o=this.getQWeight(`${t}.up_proj`),u=this.makeUniform(`fused_mv_${s}_${i}`,[s,i,this.groupSize]);if(s/this.groupSize%4===0){const c=this.gpu.hasF16?"fused_gate_up_silu_4t_f16":"fused_gate_up_silu_4t",m=this.gpu.hasF16?"fused_gus4t16_":"fused_gus4t_";this.runCached(`${m}${e}`,c,[a,n.qweight,n.scales,o.qweight,o.scales,this.mlpIntermediate,u],this.wg8(i))}else{const c=this.gpu.hasF16?"fused_gate_up_silu_f16":"fused_gate_up_silu",m=this.gpu.hasF16?"fused_gus16_":"fused_gus_";this.runCached(`${m}${e}`,c,[a,n.qweight,n.scales,o.qweight,o.scales,this.mlpIntermediate,u],this.wg4(i))}this.gptqMatvec(this.mlpIntermediate,this.mlpOut,`${t}.down_proj`,i,s)}fullAttentionFused(a,e,t,s,i,n,o,u){i=i||this.normed;const r=`model.language_model.layers.${t}.self_attn`,h=this.hiddenSize,c=this.headDim,m=this.numHeads,f=this.numKVHeads,p=m/f,w=t===0,g=`model.language_model.layers.${t}.input_layernorm.weight`,P=this.weights[g];if(w){const z=this.makeUniform("add_rmsnorm_params",[h,this.rmsEps]);this.runCached(`fa_norm_${t}`,"rmsnorm",[a,P,i,z],1)}else{const z=this.makeUniform("add_rmsnorm_params",[h,this.rmsEps]);this.runCached(`fa_norm_${t}`,"add_rmsnorm_ro",[a,e,P,i,z],1)}const _=this.gptqMatvecOp(i,this.qProjFull,`${r}.q_proj`,h,m*c*2),l=this.gptqMatvecOp(i,this.kProj,`${r}.k_proj`,h,f*c),b=this.gptqMatvecOp(i,this.vProj,`${r}.v_proj`,h,f*c);this.gpu.dispatchMulti([_,l,b].filter(Boolean));const U=this.kvCache[t],y=this._fusedSQKParams[t],v=u??s;this._gqaDv.setUint32(0,v,!0),this._gqaDv.setUint32(4,s,!0),this._gqaDv.setUint32(8,n??s,!0),this._gqaDv.setUint32(12,o??s,!0),this.gpu.device.queue.writeBuffer(y,16,this._gqaData,0,16),this.runCached(`fa_sqk_${t}`,this._splitQKNormShaderKey,[this.qProjFull,this.kProj,this.vProj,this.qProj,this.qGate,U.keys,U.values,y],m+f);const q=(u??s)+1,C=this._forceMinSplits||1,d=Math.max(C,Math.min(Math.max(1,Math.ceil(q/32)),this._maxGqaSplits)),B=d>1?this._gqaPartials:this.attnOut;if(this._gqaDv.setUint32(0,q,!0),this._gqaDv.setUint32(4,c,!0),this._gqaDv.setUint32(8,f,!0),this._gqaDv.setUint32(12,m,!0),this._gqaDv.setUint32(16,p,!0),this._gqaDv.setUint32(20,d,!0),this.gpu.device.queue.writeBuffer(this._gqaParamBuf,0,this._gqaData),this.run("gqa_attention_head",[this.qProj,U.keys,U.values,B,this._gqaParamBuf],m,d),d>1){this._gqaReduceParams||(this._gqaReduceParams=this.gpu.device.createBuffer({size:16,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"gqa_reduce_params"}));const z=new Uint8Array(16),O=new DataView(z.buffer);O.setUint32(0,c,!0),O.setUint32(4,d,!0),O.setUint32(8,m,!0),this.gpu.device.queue.writeBuffer(this._gqaReduceParams,0,z),this.run("gqa_reduce",[this._gqaPartials,this.attnOut,this._gqaReduceParams],m)}const k=this.getQWeight(`${r}.o_proj`),D=m*c,$=D/this.groupSize%4===0,M=this.makeUniform(`fused_sig_mv_${D}_${h}`,[D,h,this.groupSize]);if($){const z=this.gpu.hasF16?"fused_sigmoid_gptq_4t_f16":"fused_sigmoid_gptq_4t",O=this.gpu.hasF16?"fused_sig4t16_oproj_":"fused_sig4t_oproj_";this.runCached(`${O}${t}`,z,[this.attnOut,this.qGate,k.qweight,k.scales,this.qProj,M],this.wg8(h))}else{const z=this.gpu.hasF16?"fused_sigmoid_gptq_f16":"fused_sigmoid_gptq",O=this.gpu.hasF16?"fused_sig16_oproj_":"fused_sig_oproj_";this.runCached(`${O}${t}`,z,[this.attnOut,this.qGate,k.qweight,k.scales,this.qProj,M],this.wg4(h))}}linearAttentionFused(a,e,t,s){s=s||this.normed;const i=`model.language_model.layers.${t}.linear_attn`,n=this.hiddenSize,o=this.textCfg.linear_num_key_heads,u=this.textCfg.linear_key_head_dim,r=this.textCfg.linear_value_head_dim,h=this.linValueHeads,m=h/o*r,f=o*(u+u+m),p=this.linValueDim,w=t===0,g=`model.language_model.layers.${t}.input_layernorm.weight`,P=this.weights[g];if(w){const d=this.makeUniform("add_rmsnorm_params",[n,this.rmsEps]);this.runCached(`la_norm_${t}`,"rmsnorm",[a,P,s,d],1)}else{const d=this.makeUniform("add_rmsnorm_params",[n,this.rmsEps]);this.runCached(`la_norm_${t}`,"add_rmsnorm_ro",[a,e,P,s,d],1)}{const d=[this.gptqMatvecOp(s,this.linQKV,`${i}.in_proj_qkv`,n,f),this.gptqMatvecOp(s,this.linZ,`${i}.in_proj_z`,n,p)];this.abQuantized&&(d.push(this.gptqMatvecOp(s,this.linAlpha,`${i}.in_proj_a`,n,h)),d.push(this.gptqMatvecOp(s,this.linBeta,`${i}.in_proj_b`,n,h))),this.gpu.dispatchMulti(d.filter(Boolean))}const _=this.weights[`${i}.conv1d.weight`],l=this.weights[`${i}.A_log`],b=this.weights[`${i}.dt_bias`],U=this.weights[`${i}.norm.weight`];if(this.abQuantized){const d=`fused_cdn_q_${o}_${u}_${r}_${f}_${h}`;let B=this.paramBufs[d];if(!B){const k=new ArrayBuffer(32),D=new DataView(k);D.setUint32(0,o,!0),D.setUint32(4,u,!0),D.setUint32(8,r,!0),D.setUint32(12,f,!0),D.setFloat32(16,this.rmsEps,!0),D.setUint32(20,0,!0),D.setUint32(24,h,!0),B=this.gpu.device.createBuffer({size:32,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"fused_cdn_q_params"}),this.gpu.device.queue.writeBuffer(B,0,new Uint8Array(k)),this.paramBufs[d]=B}this.runCached(`fused_cdn_${t}`,"fused_conv_deltanet_norm",[this.linQKV,this.linConvHist[t],_,this.linState[t],this.linOut,this.linAlpha,this.linBeta,l,b,U,B],o)}else{const d=`fused_cdn_ext_${o}_${u}_${r}_${f}_${n}_${h}`;let B=this.paramBufs[d];if(!B){const D=new ArrayBuffer(32),S=new DataView(D);S.setUint32(0,o,!0),S.setUint32(4,u,!0),S.setUint32(8,r,!0),S.setUint32(12,f,!0),S.setFloat32(16,this.rmsEps,!0),S.setUint32(20,n,!0),S.setUint32(24,h,!0),B=this.gpu.device.createBuffer({size:32,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"fused_cdn_ext_params"}),this.gpu.device.queue.writeBuffer(B,0,new Uint8Array(D)),this.paramBufs[d]=B}const k=this.linABWeight[t];this.runCached(`fused_cdn_${t}`,"fused_conv_deltanet_norm",[this.linQKV,this.linConvHist[t],_,this.linState[t],this.linOut,s,k,l,b,U,B],o)}const y=this.getQWeight(`${i}.out_proj`),q=p/this.groupSize%4===0,C=this.makeUniform(`fused_silu_mv_${p}_${n}`,[p,n,this.groupSize]);if(q){const d=this.gpu.hasF16?"fused_silu_gptq_4t_f16":"fused_silu_gptq_4t",B=this.gpu.hasF16?"fused_silu4t16_oproj_":"fused_silu4t_oproj_";this.runCached(`${B}${t}`,d,[this.linZ,this.linOut,y.qweight,y.scales,this.attnOut,C],this.wg8(n))}else{const d=this.gpu.hasF16?"fused_silu_gptq_f16":"fused_silu_gptq",B=this.gpu.hasF16?"fused_silu16_oproj_":"fused_silu_oproj_";this.runCached(`${B}${t}`,d,[this.linZ,this.linOut,y.qweight,y.scales,this.attnOut,C],this.wg4(n))}}fusedNormMLP(a,e,t,s,i,n){n=n||this.normed;const o=this.hiddenSize,u=`model.language_model.layers.${i}.post_attention_layernorm.weight`,r=this.weights[u],h=this.makeUniform("add_rmsnorm_params",[o,this.rmsEps]),c=`mlp_norm_${i}_${a===this.hidden?"a":"b"}`;this.runCached(c,"three_way_add_rmsnorm",[a,t,s,r,e,n,h],1),this.mlp(n,i)}decoderLayer(a,e,t,s,i,n,o,u){let r;this.layerTypes[a]==="full_attention"?(this.fullAttentionFused(t,i,a,e,void 0,n,o,u),r=this.qProj):(this.linearAttentionFused(t,i,a),r=this.attnOut),this.fusedNormMLP(t,s,i,r,a)}async forward(a,e){this.gpu.singlePassMode=this.singlePassMode,this.gpu.beginBatch(),this.embedding(a);let t=this.hidden,s=this.hiddenB,i=this.zeroBuf;for(let r=0;r<this.numLayers;r++){this.decoderLayer(r,e,t,s,i),i=this.mlpOut;const h=t;t=s,s=h}this.mtp&&this._mtpSaveHidden(t,this.mlpOut);const n=this.weights["model.language_model.norm.weight"];this.addAndRmsNorm(t,this.mlpOut,this.normed,n,"add_final_norm"),this._dispatchLmHead();const o=this.temperature??.7;if(o>0){const r=this.repetitionPenalty??1,h=this.presencePenalty??0;if(this._recentTokenCount>0&&(r>1||h>0)){this.gpu.device.queue.writeBuffer(this._gpuRecentTokens,0,this._recentTokens.buffer,0,this._recentTokenCount*4);const p=this._makeMixedUniform("penalty_params",[{u:this._recentTokenCount},{f:r},{f:h},{u:0}]);this.run("rep_penalty",[this.logits,this._gpuRecentTokens,p],Math.ceil(this._recentTokenCount/256))}const c=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("topk","topk_extract",[this.logits,this._topkResult,c],1);const m=Math.random()*4294967295>>>0,f=this._makeMixedUniform("sample_params",[{f:o},{u:this.topK??20},{f:this.topP??.8},{u:m}]);this.run("gpu_sample",[this._topkResult,this.argmaxResult,f],1)}else{const r=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("argmax","argmax",[this.logits,this.argmaxResult,r],1)}return this.gpu._passCount!==void 0&&!this._passCountLogged&&(console.log(`[PERF] ${this.gpu._passCount} compute passes per token`),this._passCountLogged=!0),this.gpu._singlePass&&(this.gpu._singlePass.end(),this.gpu._singlePass=null),this.gpu.copyBuffer(this.argmaxResult,this._argmaxReadback,8),this.gpu.endBatch(),this._readAndSample()}forwardSubmit(a,e){this.gpu.singlePassMode=this.singlePassMode,this.gpu.beginBatch(),this.embedding(a);let t=this.hidden,s=this.hiddenB,i=this.zeroBuf;for(let r=0;r<this.numLayers;r++){this.decoderLayer(r,e,t,s,i),i=this.mlpOut;const h=t;t=s,s=h}this.mtp&&this._mtpSaveHidden(t,this.mlpOut);const n=this.weights["model.language_model.norm.weight"];this.addAndRmsNorm(t,this.mlpOut,this.normed,n,"add_final_norm"),this._dispatchLmHead();const o=this.temperature??.7;if(o>0){const r=this.repetitionPenalty??1,h=this.presencePenalty??0;if(this._recentTokenCount>0&&(r>1||h>0)){this.gpu.device.queue.writeBuffer(this._gpuRecentTokens,0,this._recentTokens.buffer,0,this._recentTokenCount*4);const p=this._makeMixedUniform("penalty_params",[{u:this._recentTokenCount},{f:r},{f:h},{u:0}]);this.run("rep_penalty",[this.logits,this._gpuRecentTokens,p],Math.ceil(this._recentTokenCount/256))}const c=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("topk","topk_extract",[this.logits,this._topkResult,c],1);const m=Math.random()*4294967295>>>0,f=this._makeMixedUniform("sample_params",[{f:o},{u:this.topK??20},{f:this.topP??.8},{u:m}]);this.run("gpu_sample",[this._topkResult,this.argmaxResult,f],1)}else{const r=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("argmax","argmax",[this.logits,this.argmaxResult,r],1)}this.gpu._passCount!==void 0&&!this._passCountLogged&&(console.log(`[PERF] ${this.gpu._passCount} compute passes per token`),this._passCountLogged=!0),this.gpu._singlePass&&(this.gpu._singlePass.end(),this.gpu._singlePass=null),this.gpu.copyBuffer(this.argmaxResult,this._argmaxReadback,8),this.gpu.endBatch()}_buildReplayFromRecording(){const a=this.gpu.stopRecording(),e=this.pipelines.gqa_attention_head,t=[];for(const s of a)if(s.multi)for(const i of s.ops)t.push({p:i.pipeline,bg:i.bindGroup,x:i.wgX,y:i.wgY,gqa:i.pipeline===e?1:0});else t.push({p:s.pipeline,bg:s.bindGroup,x:s.wgX,y:s.wgY,gqa:s.pipeline===e?1:0});this._replayFlat=t,console.log(`[PERF] Recorded ${t.length} flat replay ops for core forward`)}_replayCoreForward(a){const e=this.gpu,t=a-this._ropeDelta,s=t+1,i=Math.max(2,Math.min(Math.max(1,Math.ceil(s/32)),this._maxGqaSplits));this._gqaDv.setUint32(0,t,!0),this._gqaDv.setUint32(4,a,!0),this._gqaDv.setUint32(8,a,!0),this._gqaDv.setUint32(12,a,!0);for(let r=0;r<this.numLayers;r++)this.layerTypes[r]==="full_attention"&&e.device.queue.writeBuffer(this._fusedSQKParams[r],16,this._gqaData,0,16);this._gqaDv.setUint32(0,s,!0),this._gqaDv.setUint32(4,this.headDim,!0),this._gqaDv.setUint32(8,this.numKVHeads,!0),this._gqaDv.setUint32(12,this.numHeads,!0),this._gqaDv.setUint32(16,this.numHeads/this.numKVHeads,!0),this._gqaDv.setUint32(20,i,!0),e.device.queue.writeBuffer(this._gqaParamBuf,0,this._gqaData,0,24),this._gqaReduceParams&&(this._gqaDv.setUint32(0,this.headDim,!0),this._gqaDv.setUint32(4,i,!0),this._gqaDv.setUint32(8,this.numHeads,!0),this._gqaDv.setUint32(12,0,!0),e.device.queue.writeBuffer(this._gqaReduceParams,0,this._gqaData,0,16));const n=e._singlePass,o=this._replayFlat,u=o.length;for(let r=0;r<u;r++){const h=o[r];n.setPipeline(h.p),n.setBindGroup(0,h.bg),n.dispatchWorkgroups(h.x,h.gqa?i:h.y)}}embeddingB2(a,e){const t=this.hiddenSize,s=this.weights["model.language_model.embed_tokens.weight"],i=this.makeUniform("emb_params",[a,t]);this.runWithOffsets("embedding",[s,{buffer:this.b2.hidden,offset:0,size:t*4},i],this.wg(t));const n=this.makeUniform(`emb_params_${e}`,[e,t]);this.runWithOffsets("embedding",[s,{buffer:this.b2.hidden,offset:t*4,size:t*4},n],this.wg(t))}gptqMatvecB2(a,e,t,s,i){const n=this.getQWeight(t);if(!n.qweight)return;const o=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]),u=this.gpu.hasF16?"gptq_matmul_b2_f16":"gptq_matmul_b2",r=this.gpu.hasF16?"b2gptqf16_":"b2gptq_";this.runCached(`${r}${t}`,u,[a,n.qweight,n.scales,e,o],this.wg4(i))}gptqMatvecB2Op(a,e,t,s,i){const n=this.getQWeight(t);if(!n.qweight)return null;const o=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]),u=this.gpu.hasF16?"gptq_matmul_b2_f16":"gptq_matmul_b2",r=this.gpu.hasF16?"b2gptqf16_":"b2gptq_";return this.prepOpCached(`${r}${t}`,u,[a,n.qweight,n.scales,e,o],this.wg4(i))}fullAttentionB2(a,e,t,s){const i=`model.language_model.layers.${t}.self_attn`,n=this.hiddenSize,o=this.b2._dims,u=this.headDim,r=this.numHeads,h=this.numKVHeads,c=r/h,m=t===0,f=`model.language_model.layers.${t}.input_layernorm.weight`,p=this.weights[f],w=this.makeUniform("add_rmsnorm_params",[n,this.rmsEps]);m?(this.runWithOffsets("rmsnorm",[{buffer:a,offset:0,size:n*4},p,{buffer:this.b2.normed,offset:0,size:n*4},w],1),this.runWithOffsets("rmsnorm",[{buffer:a,offset:n*4,size:n*4},p,{buffer:this.b2.normed,offset:n*4,size:n*4},w],1)):this.run("add_rmsnorm_ro_b2",[a,e,p,this.b2.normed,w],2);const g=this.gptqMatvecB2Op(this.b2.normed,this.b2.qProjFull,`${i}.q_proj`,n,r*u*2),P=this.gptqMatvecB2Op(this.b2.normed,this.b2.kProj,`${i}.k_proj`,n,h*u),_=this.gptqMatvecB2Op(this.b2.normed,this.b2.vProj,`${i}.v_proj`,n,h*u);this.gpu.dispatchMulti([g,P,_].filter(Boolean));const l=this.kvCache[t],b=this._fusedSQKParams[t],U=o.qProjFullSize*4,y=o.kProjSize*4,v=o.vProjSize*4,q=o.qProjSize*4;this._gqaDv.setUint32(0,s,!0),this._gqaDv.setUint32(4,s,!0),this._gqaDv.setUint32(8,s,!0),this._gqaDv.setUint32(12,s,!0),this.gpu.device.queue.writeBuffer(b,16,this._gqaData,0,16),this.runWithOffsets(this._splitQKNormShaderKey,[{buffer:this.b2.qProjFull,offset:0,size:U},{buffer:this.b2.kProj,offset:0,size:y},{buffer:this.b2.vProj,offset:0,size:v},{buffer:this.b2.qProj,offset:0,size:q},{buffer:this.b2.qGate,offset:0,size:q},l.keys,l.values,b],r+h),this._gqaDv.setUint32(0,s+1,!0),this._gqaDv.setUint32(4,s+1,!0),this._gqaDv.setUint32(8,s+1,!0),this._gqaDv.setUint32(12,s+1,!0),this.gpu.device.queue.writeBuffer(b,16,this._gqaData,0,16),this.runWithOffsets(this._splitQKNormShaderKey,[{buffer:this.b2.qProjFull,offset:U,size:U},{buffer:this.b2.kProj,offset:y,size:y},{buffer:this.b2.vProj,offset:v,size:v},{buffer:this.b2.qProj,offset:q,size:q},{buffer:this.b2.qGate,offset:q,size:q},l.keys,l.values,b],r+h);const C=s+1,d=s+2;this._gqaDv.setUint32(0,C,!0),this._gqaDv.setUint32(4,u,!0),this._gqaDv.setUint32(8,h,!0),this._gqaDv.setUint32(12,r,!0),this._gqaDv.setUint32(16,c,!0),this._gqaDv.setUint32(20,this.partialDim,!0),this._gqaDv.setUint32(24,s,!0),this._gqaDv.setFloat32(28,this.ropeTheta,!0),this.gpu.device.queue.writeBuffer(this.b2._gqaParamBuf0,0,this._gqaData),this.runWithOffsets("gqa_attention_head",[{buffer:this.b2.qProj,offset:0,size:q},l.keys,l.values,{buffer:this.b2.attnOut,offset:0,size:o.qProjSize*4},this.b2._gqaParamBuf0],r),this._gqaDv.setUint32(0,d,!0),this._gqaDv.setUint32(24,s+1,!0),this.gpu.device.queue.writeBuffer(this.b2._gqaParamBuf1,0,this._gqaData),this.runWithOffsets("gqa_attention_head",[{buffer:this.b2.qProj,offset:q,size:q},l.keys,l.values,{buffer:this.b2.attnOut,offset:o.qProjSize*4,size:o.qProjSize*4},this.b2._gqaParamBuf1],r);const B=this.getQWeight(`${i}.o_proj`),k=r*u,D=this.makeUniform(`fused_sig_mv_${k}_${n}`,[k,n,this.groupSize]);this.runCached(`b2_fused_sig_oproj_${t}`,"fused_sigmoid_gptq_b2_f16",[this.b2.attnOut,this.b2.qGate,B.qweight,B.scales,this.b2.qProj,D],this.wg4(n))}linearAttentionB2(a,e,t){const s=`model.language_model.layers.${t}.linear_attn`,i=this.hiddenSize,n=this.b2._dims,o=n.linHeads,u=n.linKeyDim,r=n.linValDim;n.linEVD;const h=n.linQKVDim,c=n.valueDim,m=t===0,f=`model.language_model.layers.${t}.input_layernorm.weight`,p=this.weights[f],w=this.makeUniform("add_rmsnorm_params",[i,this.rmsEps]);m?(this.runWithOffsets("rmsnorm",[{buffer:a,offset:0,size:i*4},p,{buffer:this.b2.normed,offset:0,size:i*4},w],1),this.runWithOffsets("rmsnorm",[{buffer:a,offset:i*4,size:i*4},p,{buffer:this.b2.normed,offset:i*4,size:i*4},w],1)):this.run("add_rmsnorm_ro_b2",[a,e,p,this.b2.normed,w],2);{const q=[this.gptqMatvecB2Op(this.b2.normed,this.b2.linQKV,`${s}.in_proj_qkv`,i,h),this.gptqMatvecB2Op(this.b2.normed,this.b2.linZ,`${s}.in_proj_z`,i,c)];this.abQuantized&&(q.push(this.gptqMatvecB2Op(this.b2.normed,this.b2.linAlpha,`${s}.in_proj_a`,i,this.linValueHeads)),q.push(this.gptqMatvecB2Op(this.b2.normed,this.b2.linBeta,`${s}.in_proj_b`,i,this.linValueHeads))),this.gpu.dispatchMulti(q.filter(Boolean))}const g=this.weights[`${s}.conv1d.weight`],P=this.weights[`${s}.A_log`],_=this.weights[`${s}.dt_bias`],l=this.weights[`${s}.norm.weight`],b=h*4,U=c*4;if(this.abQuantized){const q=this.linValueHeads,C=q*4,d=`fused_cdn_q_${o}_${u}_${r}_${h}_${q}`,B=this.paramBufs[d];this.runWithOffsets("fused_conv_deltanet_norm",[{buffer:this.b2.linQKV,offset:0,size:b},this.linConvHist[t],g,this.linState[t],{buffer:this.b2.linOut,offset:0,size:U},{buffer:this.b2.linAlpha,offset:0,size:C},{buffer:this.b2.linBeta,offset:0,size:C},P,_,l,B],o),this.runWithOffsets("fused_conv_deltanet_norm",[{buffer:this.b2.linQKV,offset:b,size:b},this.linConvHist[t],g,this.linState[t],{buffer:this.b2.linOut,offset:U,size:U},{buffer:this.b2.linAlpha,offset:C,size:C},{buffer:this.b2.linBeta,offset:C,size:C},P,_,l,B],o)}else{const q=`fused_cdn_ext_${o}_${u}_${r}_${h}_${i}_${this.linValueHeads}`,C=this.paramBufs[q],d=this.linABWeight[t];this.runWithOffsets("fused_conv_deltanet_norm",[{buffer:this.b2.linQKV,offset:0,size:b},this.linConvHist[t],g,this.linState[t],{buffer:this.b2.linOut,offset:0,size:U},{buffer:this.b2.normed,offset:0,size:i*4},d,P,_,l,C],o),this.runWithOffsets("fused_conv_deltanet_norm",[{buffer:this.b2.linQKV,offset:b,size:b},this.linConvHist[t],g,this.linState[t],{buffer:this.b2.linOut,offset:U,size:U},{buffer:this.b2.normed,offset:i*4,size:i*4},d,P,_,l,C],o)}const y=this.getQWeight(`${s}.out_proj`),v=this.makeUniform(`fused_silu_mv_${c}_${i}`,[c,i,this.groupSize]);this.runCached(`b2_fused_silu_oproj_${t}`,"fused_silu_gptq_b2_f16",[this.b2.linZ,this.b2.linOut,y.qweight,y.scales,this.b2.attnOut,v],this.wg4(i))}fusedNormMLPB2(a,e,t,s,i){const n=this.hiddenSize,o=this.intermediateSize,u=`model.language_model.layers.${i}.mlp`,r=`model.language_model.layers.${i}.post_attention_layernorm.weight`,h=this.weights[r],c=this.makeUniform("add_rmsnorm_params",[n,this.rmsEps]);this.run("three_way_add_rmsnorm_b2",[a,t,s,h,e,this.b2.normed,c],2);const m=this.getQWeight(`${u}.gate_proj`),f=this.getQWeight(`${u}.up_proj`),p=this.makeUniform(`fused_mv_${n}_${o}`,[n,o,this.groupSize]);this.runCached(`b2_fused_gus_${i}`,"fused_gate_up_silu_b2_f16",[this.b2.normed,m.qweight,m.scales,f.qweight,f.scales,this.b2.mlpIntermediate,p],this.wg4(o)),this.gptqMatvecB2(this.b2.mlpIntermediate,this.b2.mlpOut,`${u}.down_proj`,o,n)}decoderLayerB2(a,e,t,s,i){let n;this.layerTypes[a]==="full_attention"?(this.fullAttentionB2(t,i,a,e),n=this.b2.qProj):(this.linearAttentionB2(t,i,a),n=this.b2.attnOut),this.fusedNormMLPB2(t,s,i,n,a)}_dispatchLmHeadB2(){if(this._lmHeadQWeight){const a=this.hiddenSize,e=this.vocabSize,t=this.makeUniform(`mv_${a}_${e}`,[a,e,this.groupSize]),s=this.gpu.hasF16?"gptq_matmul_b2_f16":"gptq_matmul_b2";this.runCached("b2_lm_head",s,[this.b2.normed,this._lmHeadQWeight,this._lmHeadScales,this.b2.logits,t],this.wg4(e))}else{const e=(this.config.tie_word_embeddings??this.textCfg.tie_word_embeddings)!==!1?"model.language_model.embed_tokens.weight":"lm_head.weight",t=this.weights[e],s=this.hiddenSize,i=this.vocabSize,n=this.makeUniform("lmhead_params",[s,i]);this.runWithOffsets("bf16_matvec",[{buffer:this.b2.normed,offset:0,size:s*4},t,{buffer:this.b2.logits,offset:0,size:i*4},n],this.wg(i)),this.runWithOffsets("bf16_matvec",[{buffer:this.b2.normed,offset:s*4,size:s*4},t,{buffer:this.b2.logits,offset:i*4,size:i*4},n],this.wg(i))}}async forwardB2(a,e,t){this.b2||this.initB2Buffers(),this.gpu.singlePassMode=this.singlePassMode,this.gpu.beginBatch(),this.embeddingB2(a,e);let s=this.b2.hidden,i=this.b2.hiddenB,n=this.b2.zeroBuf;for(let f=0;f<this.numLayers;f++){this.decoderLayerB2(f,t,s,i,n),n=this.b2.mlpOut;const p=s;s=i,i=p}if(this.mtp){const f=this.hiddenSize;this.gpu.copyBuffer(s,this.mtp.savedHidden,f*4,f*4,0),this.gpu.copyBuffer(this.b2.mlpOut,this.mtp.savedMlpOut,f*4,f*4,0)}const o=this.weights["model.language_model.norm.weight"],u=this.makeUniform("add_rmsnorm_params",[this.hiddenSize,this.rmsEps]);this.run("add_rmsnorm_b2",[s,this.b2.mlpOut,o,this.b2.normed,u],2),this._dispatchLmHeadB2();const h=(this.temperature??.7)>0,c=this.vocabSize,m=this.makeUniform("argmax_params",[c]);return h?(this.runWithOffsets("topk_extract",[{buffer:this.b2.logits,offset:0,size:c*4},this.b2.topkResult0,m],1),this.runWithOffsets("topk_extract",[{buffer:this.b2.logits,offset:c*4,size:c*4},this.b2.topkResult1,m],1)):(this.runWithOffsets("argmax",[{buffer:this.b2.logits,offset:0,size:c*4},this.b2.argmaxResult0,m],1),this.runWithOffsets("argmax",[{buffer:this.b2.logits,offset:c*4,size:c*4},this.b2.argmaxResult1,m],1)),this.gpu._passCount!==void 0&&!this._b2PassCountLogged&&(console.log(`[PERF] B2: ${this.gpu._passCount} compute passes per 2-token step`),this._b2PassCountLogged=!0),this.gpu._singlePass&&(this.gpu._singlePass.end(),this.gpu._singlePass=null),h?(this.gpu.copyBuffer(this.b2.topkResult0,this.b2.topkReadback0,2048),this.gpu.copyBuffer(this.b2.topkResult1,this.b2.topkReadback1,2048)):(this.gpu.copyBuffer(this.b2.argmaxResult0,this.b2.argmaxReadback0,8),this.gpu.copyBuffer(this.b2.argmaxResult1,this.b2.argmaxReadback1,8)),this.gpu.endBatch(),this._readAndSampleB2(h)}async _readAndSampleB2(a){const e=[0,0];a?(await Promise.all([this.b2.topkReadback0.mapAsync(GPUMapMode.READ),this.b2.topkReadback1.mapAsync(GPUMapMode.READ)]),e[0]=this._sampleFromTopkReadback(this.b2.topkReadback0),e[1]=this._sampleFromTopkReadback(this.b2.topkReadback1)):(await Promise.all([this.b2.argmaxReadback0.mapAsync(GPUMapMode.READ),this.b2.argmaxReadback1.mapAsync(GPUMapMode.READ)]),e[0]=new Uint32Array(this.b2.argmaxReadback0.getMappedRange().slice(0))[0],e[1]=new Uint32Array(this.b2.argmaxReadback1.getMappedRange().slice(0))[0],this.b2.argmaxReadback0.unmap(),this.b2.argmaxReadback1.unmap());for(const t of e)this._recentTokenCount<this._repMaxTokens?this._recentTokens[this._recentTokenCount++]=t:(this._recentTokens.copyWithin(0,1),this._recentTokens[this._repMaxTokens-1]=t);return e}_sampleFromTopkReadback(a){const e=a.getMappedRange(),t=new Uint32Array(e),s=new Float32Array(e),i=new Uint32Array(256),n=new Float32Array(256);for(let r=0;r<256;r++)i[r]=t[r*2],n[r]=s[r*2+1];a.unmap();const o=this.presencePenalty??0,u=this.repetitionPenalty??1;if((o>0||u>1)&&this._recentTokenCount>0){const r=new Set;for(let h=0;h<this._recentTokenCount;h++)r.add(this._recentTokens[h]);for(let h=0;h<256;h++)r.has(i[h])&&(o>0&&(n[h]-=o),u>1&&(n[h]=n[h]>0?n[h]/u:n[h]*u))}return this._sampleFromArrays(i,n,256)}async _readAndSample(){await this._argmaxReadback.mapAsync(GPUMapMode.READ);const a=new Uint32Array(this._argmaxReadback.getMappedRange().slice(0));this._argmaxReadback.unmap();const e=a[0];return this._recentTokenCount<this._repMaxTokens?this._recentTokens[this._recentTokenCount++]=e:(this._recentTokens.copyWithin(0,1),this._recentTokens[this._repMaxTokens-1]=e),e}_sampleFromArrays(a,e,t){const s=this.temperature??.7,i=this.topP??.8,n=this.topK??20,o=Math.min(n,t),u=this._selBuf||(this._selBuf=new Uint32Array(64)),r=this._selValBuf||(this._selValBuf=new Float32Array(64)),h=this._usedBuf||(this._usedBuf=new Uint8Array(256));h.fill(0);for(let l=0;l<o;l++){let b=-1,U=-1/0;for(let y=0;y<t;y++)!h[y]&&e[y]>U&&(U=e[y],b=y);if(b<0)break;u[l]=a[b],r[l]=U,h[b]=1}const c=r[0],m=this._probBuf||(this._probBuf=new Float32Array(64));let f=0;for(let l=0;l<o;l++)m[l]=Math.exp((r[l]-c)/s),f+=m[l];for(let l=0;l<o;l++)m[l]/=f;let p=0,w=o;for(let l=0;l<o;l++)if(p+=m[l],p>=i){w=l+1;break}let g=0;for(let l=0;l<w;l++)g+=m[l];const P=Math.random()*g;let _=0;for(let l=0;l<w;l++)if(_+=m[l],_>=P)return u[l];return u[0]}_sample(a){const e=this.temperature??.7,t=this.topP??.8,s=this.topK??20,i=a.length;if(e<=0){let _=0,l=a[0];for(let b=1;b<i;b++)a[b]>l&&(l=a[b],_=b);return _}const n=Math.max(s,64),o=new Int32Array(n).fill(-1),u=new Float32Array(n).fill(-1/0);let r=-1/0;for(let _=0;_<i;_++){const l=a[_];if(l>r&&(r=l),l>u[n-1]){let b=n-1;for(;b>0&&l>u[b-1];)u[b]=u[b-1],o[b]=o[b-1],b--;u[b]=l,o[b]=_}}const h=Math.min(s,n),c=new Float32Array(h);let m=0;for(let _=0;_<h&&!(o[_]<0);_++)c[_]=Math.exp((u[_]-r)/e),m+=c[_];for(let _=0;_<h;_++)c[_]/=m;let f=0,p=h;for(let _=0;_<h;_++){if(o[_]<0){p=_;break}if(f+=c[_],f>=t){p=_+1;break}}let w=0;for(let _=0;_<p;_++)w+=c[_];const g=Math.random()*w;let P=0;for(let _=0;_<p;_++)if(P+=c[_],P>=g)return o[_];return o[0]}async generate(a,e=512,t,s){this.seqLen=0,this._recentTokenCount=0,this._replayFlat=null,this._ropeDelta=s?.ropeDelta??0;const i=this.maxSeqLen||4096;if(e=Math.min(e,i-a.length),e<=0)throw new Error(`Prompt (${a.length} tokens) exceeds context size (${i})`);const n=[...a],o=this.textCfg.linear_num_key_heads,u=this.textCfg.linear_key_head_dim,r=this.textCfg.linear_value_head_dim,m=this.linValueHeads/o*r,f=o*(u+u+m);for(let q=0;q<this.numLayers;q++)if(this.layerTypes[q]==="linear_attention"){const C=o*u*m*4,d=3*f*4;this.gpu.device.queue.writeBuffer(this.linState[q],0,new Uint8Array(C)),this.gpu.device.queue.writeBuffer(this.linConvHist[q],0,new Uint8Array(d))}let p=null;if(s){let q=0;const C=s.imageTokenId,d=s.positionIds3D;for(let B=0;B<a.length;B++){const k=d[0][B],D=d[1][B],S=d[2][B];this.gpu.singlePassMode=this.singlePassMode,this.gpu.beginBatch(),a[B]===C?this.embeddingFromVisionBuffer(s.embedBuffer,q++):this.embedding(a[B]);let $=this.hidden,M=this.hiddenB,z=this.zeroBuf;for(let O=0;O<this.numLayers;O++){this.decoderLayer(O,k,$,M,z,D,S,B),z=this.mlpOut;const G=$;$=M,M=G}if(B===a.length-1){const O=this.weights["model.language_model.norm.weight"];this.addAndRmsNorm($,this.mlpOut,this.normed,O,"add_final_norm"),this._dispatchLmHead();const G=this.temperature??.7;if(G>0){const R=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("topk","topk_extract",[this.logits,this._topkResult,R],1);const F=Math.random()*4294967295>>>0,T=this._makeMixedUniform("sample_params",[{f:G},{u:this.topK??20},{f:this.topP??.8},{u:F}]);this.run("gpu_sample",[this._topkResult,this.argmaxResult,T],1)}else{const R=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("argmax","argmax",[this.logits,this.argmaxResult,R],1)}this.gpu.copyBuffer(this.argmaxResult,this._argmaxReadback,8)}this.gpu._singlePass&&(this.gpu._singlePass.end(),this.gpu._singlePass=null),this.gpu.endBatch(),this.seqLen=B+1,B%64===63&&await this.gpu.device.queue.onSubmittedWorkDone()}p=await this._readAndSample()}else for(let q=0;q<a.length;q++)p=await this.forward(a[q],q),this.seqLen=q+1;n.push(p);const w=this.config.eos_token_id??this.textCfg.eos_token_id,g=Array.isArray(w)?w:w!=null?[w]:[248044,248046];if(g.includes(p)||t?.(p,0))return n;this._recentTokenCount<this._repMaxTokens?this._recentTokens[this._recentTokenCount++]=p:(this._recentTokens.copyWithin(0,1),this._recentTokens[this._repMaxTokens-1]=p),this.gpu.device.queue.writeBuffer(this._gpuRecentTokens,0,this._recentTokens.buffer,0,this._recentTokenCount*4);const _=8;this._tokenHistoryBuf||(this._tokenHistoryBuf=this.gpu.createBuffer("token_history",_*4,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC),this._tokenHistoryReadback=this.gpu.device.createBuffer({size:_*4,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST,label:"token_history_readback"}));let l=0,b=0,U=1,y=p,v=!1;for(;U<e;){const q=performance.now(),C=Math.min(_,e-U);for(let S=0;S<C;S++){const $=this.seqLen+S+this._ropeDelta;if(this.gpu.singlePassMode=this.singlePassMode,this.gpu.beginBatch(),S===0?this.embedding(y):this.embeddingFromArgmax(),v)this._replayCoreForward($);else{let G=this.hidden,R=this.hiddenB,F=this.zeroBuf;S===0&&!this._replayFlat&&(this._forceMinSplits=2,this.gpu.startRecording());for(let x=0;x<this.numLayers;x++){this.decoderLayer(x,$,G,R,F),F=this.mlpOut;const A=G;G=R,R=A}const T=this.weights["model.language_model.norm.weight"];this.addAndRmsNorm(G,this.mlpOut,this.normed,T,"add_final_norm"),this._dispatchLmHead(),S===0&&this.gpu._recording&&(this._forceMinSplits=0,this._buildReplayFromRecording())}const M=this.temperature??.7;if(M>0){const G=this.repetitionPenalty??1,R=this.presencePenalty??0,F=this._recentTokenCount+S;if(F>0&&(G>1||R>0)){const j=this._makeMixedUniform("penalty_params",[{u:Math.min(F,this._repMaxTokens)},{f:G},{f:R},{u:0}]);this.run("rep_penalty",[this.logits,this._gpuRecentTokens,j],Math.ceil(Math.min(F,this._repMaxTokens)/256))}const T=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("topk","topk_extract",[this.logits,this._topkResult,T],1);const x=Math.random()*4294967295>>>0,A=this._makeMixedUniform("sample_params",[{f:M},{u:this.topK??20},{f:this.topP??.8},{u:x}]);this.run("gpu_sample",[this._topkResult,this.argmaxResult,A],1)}else{const G=this.makeUniform("argmax_params",[this.vocabSize]);this.runCached("argmax","argmax",[this.logits,this.argmaxResult,G],1)}const z=(this._recentTokenCount+S)%this._repMaxTokens,O=this.makeUniform(`append_${S}`,[z,S]);this.run("append_token",[this.argmaxResult,this._gpuRecentTokens,this._tokenHistoryBuf,O],1),this.gpu._singlePass&&(this.gpu._singlePass.end(),this.gpu._singlePass=null),this.gpu.endBatch()}!v&&this._replayFlat&&(v=!0);const d=this.gpu.device.createCommandEncoder();d.copyBufferToBuffer(this._tokenHistoryBuf,0,this._tokenHistoryReadback,0,C*4),this.gpu.device.queue.submit([d.finish()]),await this._tokenHistoryReadback.mapAsync(GPUMapMode.READ);const B=new Uint32Array(this._tokenHistoryReadback.getMappedRange().slice(0,C*4));this._tokenHistoryReadback.unmap();const k=performance.now();l+=k-q,b+=C;let D=!1;for(let S=0;S<C;S++){const $=B[S];if(n.push($),this.seqLen++,this._recentTokenCount<this._repMaxTokens?this._recentTokens[this._recentTokenCount++]=$:(this._recentTokens.copyWithin(0,1),this._recentTokens[this._repMaxTokens-1]=$),g.includes($)){D=!0;break}const M=t?.($,U);if(U++,M){D=!0;break}}if(b%50<_&&console.log(`[T @${b}] ${(l/b).toFixed(1)}ms/tok (batch=${_})`),D)break;y=B[C-1],this.gpu.device.queue.writeBuffer(this._gpuRecentTokens,0,this._recentTokens.buffer,0,this._recentTokenCount*4)}return b>0&&console.log(`[T final @${b}] ${(l/b).toFixed(1)}ms/tok (batch=${_})`),n}async _quantizeBF16Weight(a,e,t,s){const i=this.groupSize,n=e/8,o=e/i,u=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC,r=this.gpu.createBuffer(`${s}_qweight`,n*t*4,u),h=this.gpu.createBuffer(`${s}_scales_f32`,o*t*4,u),c=Math.ceil(o*t/2)*4,m=this.gpu.createBuffer(`${s}_scales`,c,u),f=this.gpu.getOrCreatePipeline("quantize_bf16_to_int4",(await H(async()=>{const{SHADERS:B}=await import("./gpu-ops-flxI8RuZ.js").then(k=>k.b);return{SHADERS:B}},[])).SHADERS.quantize_bf16_to_int4),p=this.gpu.createBufferFromData(`${s}_qparams`,new Uint32Array([e,t,i]),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),w=this.gpu.createBindGroup(f,0,[a,r,h,p]),g=65535,P=Math.min(t,g),_=Math.ceil(t/g),l=this.gpu.device.createCommandEncoder(),b=l.beginComputePass();b.setPipeline(f),b.setBindGroup(0,w),b.dispatchWorkgroups(P,_),b.end(),this.gpu.device.queue.submit([l.finish()]);const U=this.gpu.getOrCreatePipeline("pack_f32_to_f16_pairs",(await H(async()=>{const{SHADERS:B}=await import("./gpu-ops-flxI8RuZ.js").then(k=>k.b);return{SHADERS:B}},[])).SHADERS.pack_f32_to_f16_pairs),y=Math.ceil(o*t/2),v=this.gpu.createBufferFromData(`${s}_pparams`,new Uint32Array([y]),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),q=this.gpu.createBindGroup(U,0,[h,m,v]),C=this.gpu.device.createCommandEncoder(),d=C.beginComputePass();return d.setPipeline(U),d.setBindGroup(0,q),d.dispatchWorkgroups(Math.ceil(y/256)),d.end(),this.gpu.device.queue.submit([C.finish()]),await this.gpu.device.queue.onSubmittedWorkDone(),h.destroy(),p.destroy(),v.destroy(),{qweight:r,scales:m}}async initMTP(a){if(this.mtp)return;const e=this.hiddenSize,t=this.intermediateSize,s=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC,i=performance.now(),n="Qwen/Qwen3.5-2B";console.log(`[MTP] Downloading MTP weights from ${n}...`);const{loadMTPWeights:o}=await H(async()=>{const{loadMTPWeights:g}=await import("./safetensors-loader-CwGm5mJX.js");return{loadMTPWeights:g}},[]),u=await o(n,a);console.log(`[MTP] Downloaded ${Object.keys(u).length} MTP tensors`),this.mtp={weights:{},qweights:{}};const r={};for(const[g,P]of Object.entries(u)){const _=this.gpu.createBufferFromData(`mtp_${g}`,P.data);r[g]=_,this.mtp.weights[g]=_}const h=[{name:"mtp.fc.weight",K:e*2,N:e},{name:"mtp.layers.0.self_attn.q_proj.weight",K:e,N:this.numHeads*this.headDim*2},{name:"mtp.layers.0.self_attn.k_proj.weight",K:e,N:this.numKVHeads*this.headDim},{name:"mtp.layers.0.self_attn.v_proj.weight",K:e,N:this.numKVHeads*this.headDim},{name:"mtp.layers.0.self_attn.o_proj.weight",K:e,N:e},{name:"mtp.layers.0.mlp.gate_proj.weight",K:e,N:t},{name:"mtp.layers.0.mlp.up_proj.weight",K:e,N:t},{name:"mtp.layers.0.mlp.down_proj.weight",K:t,N:e}];for(const{name:g,K:P,N:_}of h){const{qweight:l,scales:b}=await this._quantizeBF16Weight(r[g],P,_,`mtp_${g}`);this.mtp.qweights[g]={qweight:l,scales:b},r[g].destroy(),delete this.mtp.weights[g]}this.mtp.normRaw={};const c=["mtp.pre_fc_norm_hidden.weight","mtp.pre_fc_norm_embedding.weight","mtp.layers.0.input_layernorm.weight","mtp.layers.0.post_attention_layernorm.weight","mtp.norm.weight","mtp.layers.0.self_attn.q_norm.weight","mtp.layers.0.self_attn.k_norm.weight"];for(const g of c){const P=u[g];P&&(this.mtp.normRaw[g]=new Uint32Array(P.data.buffer.slice(P.data.byteOffset,P.data.byteOffset+P.data.byteLength)))}const f=4096*this.numKVHeads*this.headDim*4;this.mtp.kvCache={keys:this.gpu.createBuffer("mtp_kv_keys",f,s),values:this.gpu.createBuffer("mtp_kv_values",f,s)},this.mtp.concatBuf=this.gpu.createBuffer("mtp_concat",e*2*4,s),this.mtp.fcOut=this.gpu.createBuffer("mtp_fc_out",e*4,s),this.mtp.savedHidden=this.gpu.createBuffer("mtp_saved_hidden",e*4,s),this.mtp.savedMlpOut=this.gpu.createBuffer("mtp_saved_mlp_out",e*4,s),this.mtp.seqLen=0,this._buildMTPSplitQKNormUniform(),this._buildMTPFusedMLPUniform(),await this._buildTrimmedLmHead(8e3);const p=((performance.now()-i)/1e3).toFixed(1),w=h.length;console.log(`[MTP] Initialized in ${p}s: ${w} INT4 weights, 1 KV cache layer`)}_buildMTPSplitQKNormUniform(){const a=this.numHeads,e=this.numKVHeads,t=this.headDim,s=a*t,i=e*t,n=(s+i)/2,u=32+Math.ceil(n/4)*16,r=new ArrayBuffer(u),h=new DataView(r);h.setUint32(0,a,!0),h.setUint32(4,e,!0),h.setUint32(8,t,!0),h.setFloat32(12,this.rmsEps,!0),h.setUint32(16,0,!0),h.setUint32(20,0,!0),h.setUint32(24,0,!0),h.setUint32(28,0,!0);const c=this.mtp.normRaw["mtp.layers.0.self_attn.q_norm.weight"],m=this.mtp.normRaw["mtp.layers.0.self_attn.k_norm.weight"];if(c)for(let p=0;p<s/2;p++){const w=Math.floor(p/4),g=p%4;h.setUint32(32+w*16+g*4,c[p],!0)}if(m){const p=s/2;for(let w=0;w<i/2;w++){const g=p+w,P=Math.floor(g/4),_=g%4;h.setUint32(32+P*16+_*4,m[w],!0)}}const f=this.gpu.device.createBuffer({size:u,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,label:"mtp_fused_sqk"});this.gpu.device.queue.writeBuffer(f,0,new Uint8Array(r)),this.mtp.fusedSQKParams=f}_buildMTPFusedMLPUniform(){}async _buildTrimmedLmHead(a=8e3){const e=this.hiddenSize,t=this.gpu,s=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC,i=new Uint32Array(a);for(let U=0;U<a;U++)i[U]=U;this.mtp.trimmedToFull=i,this.mtp.trimmedVocabSize=a;const n=t.createBufferFromData("mtp_trim_indices",i),u=(this.config.tie_word_embeddings??this.textCfg.tie_word_embeddings)!==!1?"model.language_model.embed_tokens.weight":"lm_head.weight",r=this.weights[u],h=e/2,c=a*h*4,m=t.createBuffer("mtp_trim_gathered",c,s),f=(await H(async()=>{const{SHADERS:U}=await import("./gpu-ops-flxI8RuZ.js").then(y=>y.b);return{SHADERS:U}},[])).SHADERS.gather_rows_bf16,p=t.getOrCreatePipeline("gather_rows_bf16",f),w=t.createBufferFromData("mtp_trim_gparams",new Uint32Array([h,a]),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),g=t.createBindGroup(p,0,[r,n,m,w]),P=t.device.createCommandEncoder(),_=P.beginComputePass();_.setPipeline(p),_.setBindGroup(0,g),_.dispatchWorkgroups(Math.ceil(h/256),a),_.end(),t.device.queue.submit([P.finish()]);const{qweight:l,scales:b}=await this._quantizeBF16Weight(m,e,a,"mtp_trim_lmhead");this.mtp.trimmedLmHead={qweight:l,scales:b},this.mtp.trimmedLogits=t.createBuffer("mtp_trimmed_logits",a*4,s),m.destroy(),n.destroy(),w.destroy(),console.log(`[MTP] Trimmed lm_head: ${a} tokens, ${(l.size/1024/1024).toFixed(1)}MB qw + ${(b.size/1024/1024).toFixed(1)}MB sc`)}_dispatchTrimmedLmHead(){const a=this.hiddenSize,e=this.mtp.trimmedVocabSize,t=this.mtp.trimmedLmHead,i=a/this.groupSize%4===0,n=this.makeUniform(`mv_${a}_${e}`,[a,e,this.groupSize]);if(i){const o=this.gpu.hasF16?"gptq_matvec_4t_f16":"gptq_matvec_4t";this.runCached("mtp_trim_lmhead",o,[this.normed,t.qweight,t.scales,this.mtp.trimmedLogits,n],this.wg8(e))}else{const o=this.gpu.hasF16?"gptq_matvec_f16":"gptq_matvec";this.runCached("mtp_trim_lmhead",o,[this.normed,t.qweight,t.scales,this.mtp.trimmedLogits,n],this.wg4(e))}}_mtpGetQWeight(a){return this.mtp.qweights[a]}_mtpGptqMatvec(a,e,t,s,i){const n=this._mtpGetQWeight(t);if(!n)throw new Error(`MTP weight not found: ${t}`);const u=s/this.groupSize%4===0,r=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]);if(u){const h=this.gpu.hasF16?"gptq_matvec_4t_f16":"gptq_matvec_4t";this.runCached(`mtp_${t}`,h,[a,n.qweight,n.scales,e,r],this.wg8(i))}else{const h=this.gpu.hasF16?"gptq_matvec_f16":"gptq_matvec";this.runCached(`mtp_${t}`,h,[a,n.qweight,n.scales,e,r],this.wg4(i))}}_mtpSaveHidden(a,e){this.mtp&&(this.gpu.copyBuffer(a,this.mtp.savedHidden,this.hiddenSize*4),this.gpu.copyBuffer(e,this.mtp.savedMlpOut,this.hiddenSize*4))}async mtpForward(a){if(!this.mtp)throw new Error("MTP not initialized. Call initMTP() first.");return this.mtpForwardSubmit(a),await this._readAndSample()}mtpForwardSubmit(a){const e=this.hiddenSize,t=this.intermediateSize,s=this.mtp.seqLen;this.gpu.singlePassMode=this.singlePassMode,this.gpu.beginBatch(),this.embedding(a);const i=this.makeUniform("add_rmsnorm_params",[e,this.rmsEps]);this.runCached("mtp_emb_norm","rmsnorm",[this.hidden,this.mtp.weights["mtp.pre_fc_norm_embedding.weight"],this.normed,i],1),this.gpu.copyBuffer(this.normed,this.mtp.concatBuf,e*4),this.runCached("mtp_hid_norm","add_rmsnorm_ro",[this.mtp.savedHidden,this.mtp.savedMlpOut,this.mtp.weights["mtp.pre_fc_norm_hidden.weight"],this.normed,i],1),this.gpu.copyBuffer(this.normed,this.mtp.concatBuf,e*4,0,e*4),this._mtpGptqMatvec(this.mtp.concatBuf,this.mtp.fcOut,"mtp.fc.weight",e*2,e),this.runCached("mtp_input_norm","rmsnorm",[this.mtp.fcOut,this.mtp.weights["mtp.layers.0.input_layernorm.weight"],this.normed,i],1);const n="mtp.layers.0.self_attn",o=this._mtpGptqMatvecOp(this.normed,this.qProjFull,`${n}.q_proj.weight`,e,this.numHeads*this.headDim*2),u=this._mtpGptqMatvecOp(this.normed,this.kProj,`${n}.k_proj.weight`,e,this.numKVHeads*this.headDim),r=this._mtpGptqMatvecOp(this.normed,this.vProj,`${n}.v_proj.weight`,e,this.numKVHeads*this.headDim);this.gpu.dispatchMulti([o,u,r].filter(Boolean)),this._gqaDv.setUint32(0,s,!0),this._gqaDv.setUint32(4,s,!0),this._gqaDv.setUint32(8,s,!0),this._gqaDv.setUint32(12,s,!0),this.gpu.device.queue.writeBuffer(this.mtp.fusedSQKParams,16,this._gqaData,0,16),this.runCached("mtp_sqk",this._splitQKNormShaderKey,[this.qProjFull,this.kProj,this.vProj,this.qProj,this.qGate,this.mtp.kvCache.keys,this.mtp.kvCache.values,this.mtp.fusedSQKParams],this.numHeads+this.numKVHeads);const h=s+1;this._gqaDv.setUint32(0,h,!0),this._gqaDv.setUint32(4,this.headDim,!0),this._gqaDv.setUint32(8,this.numKVHeads,!0),this._gqaDv.setUint32(12,this.numHeads,!0),this._gqaDv.setUint32(16,this.numHeads/this.numKVHeads,!0),this._gqaDv.setUint32(20,this.partialDim,!0),this._gqaDv.setUint32(24,s,!0),this._gqaDv.setFloat32(28,this.ropeTheta,!0),this.gpu.device.queue.writeBuffer(this._gqaParamBuf,0,this._gqaData),this.runCached("mtp_gqa","gqa_attention_head",[this.qProj,this.mtp.kvCache.keys,this.mtp.kvCache.values,this.attnOut,this._gqaParamBuf],this.numHeads);const c=this._mtpGetQWeight(`${n}.o_proj.weight`),m=this.numHeads*this.headDim,f=this.makeUniform(`fused_sig_mv_${m}_${e}`,[m,e,this.groupSize]);m/this.groupSize%4===0&&this.gpu.hasF16?this.runCached("mtp_sig_oproj","fused_sigmoid_gptq_4t_f16",[this.attnOut,this.qGate,c.qweight,c.scales,this.qProj,f],this.wg8(e)):this.gpu.hasF16?this.runCached("mtp_sig_oproj","fused_sigmoid_gptq_f16",[this.attnOut,this.qGate,c.qweight,c.scales,this.qProj,f],this.wg4(e)):this.runCached("mtp_sig_oproj","fused_sigmoid_gptq",[this.attnOut,this.qGate,c.qweight,c.scales,this.qProj,f],this.wg4(e)),this.runCached("mtp_post_norm","add_rmsnorm_ro",[this.mtp.fcOut,this.qProj,this.mtp.weights["mtp.layers.0.post_attention_layernorm.weight"],this.normed,i],1);const g=this.gpu.hasF16?"fused_gate_up_silu_f16":"fused_gate_up_silu",P=this.makeUniform(`fused_mv_${e}_${t}`,[e,t,this.groupSize]),_=this.mtp.qweights["mtp.layers.0.mlp.gate_proj.weight"],l=this.mtp.qweights["mtp.layers.0.mlp.up_proj.weight"];this.runCached("mtp_gate_up_silu",g,[this.normed,_.qweight,_.scales,l.qweight,l.scales,this.mlpIntermediate,P],this.wg4(t)),this._mtpGptqMatvec(this.mlpIntermediate,this.mlpOut,"mtp.layers.0.mlp.down_proj.weight",t,e),this.runCached("mtp_final_norm","three_way_add_rmsnorm",[this.mtp.fcOut,this.qProj,this.mlpOut,this.mtp.weights["mtp.norm.weight"],this.hidden,this.normed,i],1),this._dispatchLmHead();const b=this.makeUniform("mtp_argmax_params",[this.vocabSize]);this.runCached("mtp_argmax","argmax",[this.logits,this.argmaxResult,b],1),this.gpu._singlePass&&(this.gpu._singlePass.end(),this.gpu._singlePass=null),this.gpu.copyBuffer(this.argmaxResult,this._argmaxReadback,8),this.gpu.endBatch(),this.mtp.seqLen++}_mtpSnapshotDeltaNet(){if(!this.mtp._deltaNetSnapshot){const e=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC;this.mtp._deltaNetSnapshot={states:{},convs:{}};for(let t=0;t<this.numLayers;t++)this.layerTypes[t]==="linear_attention"&&(this.mtp._deltaNetSnapshot.states[t]=this.gpu.createBuffer(`mtp_snap_state_${t}`,this.linState[t].size,e),this.mtp._deltaNetSnapshot.convs[t]=this.gpu.createBuffer(`mtp_snap_conv_${t}`,this.linConvHist[t].size,e))}const a=this.mtp._deltaNetSnapshot;a.mtpSeqLen=this.mtp.seqLen;for(const e of Object.keys(a.states))this.gpu.copyBuffer(this.linState[e],a.states[e],this.linState[e].size),this.gpu.copyBuffer(this.linConvHist[e],a.convs[e],this.linConvHist[e].size)}_mtpRestoreDeltaNet(){const a=this.mtp._deltaNetSnapshot;if(a){a.mtpSeqLen!==void 0&&(this.mtp.seqLen=a.mtpSeqLen);for(const e of Object.keys(a.states))this.gpu.copyBuffer(a.states[e],this.linState[e],this.linState[e].size),this.gpu.copyBuffer(a.convs[e],this.linConvHist[e],this.linConvHist[e].size)}}async generateWithMTP(a,e=512,t){if(!this.mtp)throw new Error("MTP not initialized. Call initMTP() first.");this.seqLen=0,this.mtp.seqLen=0,this._recentTokenCount=0;const s=[...a],i=this.textCfg.linear_num_key_heads,n=this.textCfg.linear_key_head_dim,o=this.textCfg.linear_value_head_dim,r=this.linValueHeads/i*o,h=i*(n+n+r);for(let v=0;v<this.numLayers;v++)if(this.layerTypes[v]==="linear_attention"){const q=i*n*r*4,C=3*h*4;this.gpu.device.queue.writeBuffer(this.linState[v],0,new Uint8Array(q)),this.gpu.device.queue.writeBuffer(this.linConvHist[v],0,new Uint8Array(C))}let c=null;for(let v=0;v<a.length;v++)c=await this.forward(a[v],v),this.seqLen=v+1;s.push(c);const m=this.config.eos_token_id??this.textCfg.eos_token_id,f=Array.isArray(m)?m:m!=null?[m]:[248044,248046];if(f.includes(c)||t?.(c,0))return s;let w=1,g=0,P=0,_=c,l=0,b=0;for(;w<e;){const v=performance.now(),q=await this.mtpForward(_);this._mtpSnapshotDeltaNet();const C=this.seqLen,d=await this.forwardB2(_,q,this.seqLen);this.seqLen+=2;const B=d[0],k=d[1];if(B===q){if(g++,s.push(q),w++,f.includes(q))break;let S=t?.(q,w-1);if(S||(s.push(k),w++,f.includes(k))||(S=t?.(k,w-1),S))break;_=k}else{P++,this._mtpRestoreDeltaNet(),this.seqLen=C;const S=await this.forward(_,this.seqLen);if(this.seqLen++,s.push(S),w++,f.includes(S)||t?.(S,w-1))break;_=S}const D=performance.now();if(l+=D-v,b++,b%25===0){const S=g/(g+P)*100,$=w/b;console.log(`[MTP @${b}] ${(l/b).toFixed(1)}ms/step, ${(w/(l/1e3)).toFixed(0)} tok/s, accept=${S.toFixed(0)}%, ${$.toFixed(1)} tok/step`)}}const U=g/Math.max(1,g+P)*100;return console.log(`[MTP final] ${(w/((l||1)/1e3)).toFixed(0)} tok/s, accept=${U.toFixed(0)}% (${g}/${g+P}), ${w} tokens`),s}_mtpGptqMatvecOp(a,e,t,s,i){const n=this._mtpGetQWeight(t);if(!n)return null;const u=s/this.groupSize%4===0,r=this.makeUniform(`mv_${s}_${i}`,[s,i,this.groupSize]);if(u){const h=this.gpu.hasF16?"gptq_matvec_4t_f16":"gptq_matvec_4t";return this.prepOpCached(`mtp_${t}`,h,[a,n.qweight,n.scales,e,r],this.wg8(i))}else{const h=this.gpu.hasF16?"gptq_matvec_f16":"gptq_matvec";return this.prepOpCached(`mtp_${t}`,h,[a,n.qweight,n.scales,e,r],this.wg4(i))}}}export{Y as Qwen35Model};
assets/{test-vukVrAzP.js → test-DQDfUwQY.js} RENAMED
@@ -1,5 +1,5 @@
1
- const __vite__mapDeps=(i,m=__vite__mapDeps,d=(m.f||(m.f=["assets/qwen35-model-7KVn_FLm.js","assets/gpu-ops-BbLjsC0p.js"])))=>i.map(i=>d[i]);
2
- import{G as nt,S as ct,_ as et}from"./gpu-ops-BbLjsC0p.js";class ot{constructor(){this.gpu=null,this.results=[],this.pipelines={}}async init(){this.gpu=new nt,await this.gpu.init();for(const[e,t]of Object.entries(ct))this.pipelines[e]=this.gpu.getOrCreatePipeline(e,t);return this}async runAll(e){const t=[["GPTQ MatVec",()=>this.testGPTQMatvec()],["GPTQ MatVec (group boundary)",()=>this.testGPTQMatvecGroups()],["GPTQ MatVec (gs=32)",()=>this.testGPTQMatvecGS32()],["GPTQ Split-K",()=>this.testGPTQSplitK()],["GPTQ F16",()=>this.testGPTQF16()],["BF16 MatVec",()=>this.testBF16Matvec()],["RMSNorm",()=>this.testRMSNorm()],["RMSNorm (1+w formula)",()=>this.testRMSNorm1PlusW()],["SiLU * Mul",()=>this.testSiLUMul()],["Vector Add",()=>this.testAdd()],["Embedding (BF16)",()=>this.testEmbedding()],["Argmax",()=>this.testArgmax()],["Argmax (large)",()=>this.testArgmaxLarge()],["Split interleaved",()=>this.testSplit()],["Sigmoid Mul",()=>this.testSigmoidMul()],["Head RMSNorm (1+w)",()=>this.testHeadRMSNorm()],["Head RMSNorm (nogated, w)",()=>this.testHeadRMSNormNogated()],["Causal Conv1d",()=>this.testCausalConv1d()],["KV Cache Store",()=>this.testKVCacheStore()],["GQA Attention (single pos)",()=>this.testGQAAttention()],["DeltaNet Recurrent",()=>this.testDeltaNetRecurrent()],["Add + RMSNorm (fused)",()=>this.testAddRMSNorm()],["Three-way Add + RMSNorm",()=>this.testThreeWayAddRMSNorm()],["GQA RoPE rotate_half",()=>this.testGQARoPERotateHalf()],["Fused sigmoid + GPTQ",()=>this.testFusedSigmoidGPTQ()],["Fused SiLU + GPTQ",()=>this.testFusedSiluGPTQ()],["Fused AddNorm+GateUp+SiLU",()=>this.testFusedAddNormGateUpSiLU()],["Fused Split+QKNorm+KVStore",()=>this.testFusedSplitQKNormKVStore()],["Add+RMSNorm B=2",()=>this.testAddRMSNormB2()],["Add+RMSNorm RO B=2",()=>this.testAddRMSNormROB2()],["Three-way Add+RMSNorm B=2",()=>this.testThreeWayAddRMSNormB2()],["Fused SiLU+GPTQ B=2",()=>this.testFusedSiluGPTQB2()],["Fused Sigmoid+GPTQ B=2",()=>this.testFusedSigmoidGPTQB2()],["Fused GateUp+SiLU B=2",()=>this.testFusedGateUpSiLUB2()],["GPTQ Matmul B=2",()=>this.testGPTQMatmulB2()],["GPTQ Matmul B=2 F16",()=>this.testGPTQMatmulB2F16()],["GPTQ Matmul B=2 4T F16",()=>this.testGPTQMatmulB2_4T_F16()]];for(const[o,s]of t)try{const r=await s();this.results.push({name:o,...r}),e?.({name:o,...r})}catch(r){this.results.push({name:o,pass:!1,error:r.message,maxErr:NaN}),e?.({name:o,pass:!1,error:r.message,maxErr:NaN})}return this.results}destroy(){this.gpu?.destroy()}makeBuffer(e,t,o=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC){return this.gpu.createBufferFromData(e,new Uint8Array(t.buffer,t.byteOffset,t.byteLength),o)}makeF32Buffer(e,t){return this.makeBuffer(e,new Float32Array(t))}makeU32Buffer(e,t){return this.makeBuffer(e,new Uint32Array(t))}makeOutputBuffer(e,t){return this.gpu.createBuffer(e,t*4,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC)}makeUniform(e){const t=new ArrayBuffer(Math.max(16,Math.ceil(e.length*4/16)*16)),o=new DataView(t);for(let s=0;s<e.length;s++)typeof e[s]=="number"&&!Number.isInteger(e[s])?o.setFloat32(s*4,e[s],!0):o.setUint32(s*4,e[s],!0);return this.gpu.createBufferFromData("uniform",new Uint8Array(t),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST)}makeUniformTyped(e,t){const o=e.length+t.length,s=new ArrayBuffer(Math.max(16,Math.ceil(o*4/16)*16)),r=new DataView(s);let a=0;for(const d of e)r.setUint32(a,d,!0),a+=4;for(const d of t)r.setFloat32(a,d,!0),a+=4;return this.gpu.createBufferFromData("uniform_typed",new Uint8Array(s),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST)}dispatch(e,t,o,s=1){const r=this.pipelines[e],a=this.gpu.createBindGroup(r,0,t);this.gpu.dispatch(r,[a],o,s)}async readback(e,t){return this.gpu.readBuffer(e,t*4)}async readbackU32(e,t){const o=this.gpu.createReadbackBuffer("_rb",t*4),s=this.gpu.device.createCommandEncoder();s.copyBufferToBuffer(e,0,o,0,t*4),this.gpu.device.queue.submit([s.finish()]),await o.mapAsync(GPUMapMode.READ);const r=new Uint32Array(o.getMappedRange().slice(0));return o.unmap(),o.destroy(),r}compare(e,t,o=1e-4){let s=0;const r=[];for(let a=0;a<t.length;a++){const d=Math.abs(e[a]-t[a]);s=Math.max(s,d),d>o&&r.push({idx:a,got:e[a],expected:t[a],err:d})}return{pass:r.length===0,maxErr:s,errors:r.slice(0,5)}}cpuF32ToBF16Packed(e){const t=Math.ceil(e.length/2),o=new Uint32Array(t);for(let s=0;s<e.length;s++){const a=new Uint32Array(new Float32Array([e[s]]).buffer)[0]>>>16,d=Math.floor(s/2);s%2===0?o[d]=a:o[d]|=a<<16}return o}cpuF32ToF16Packed(e){const t=Math.ceil(e.length/2),o=new Uint32Array(t);for(let s=0;s<e.length;s++){const r=this.f32ToF16(e[s]),a=Math.floor(s/2);s%2===0?o[a]=r:o[a]|=r<<16}return o}f32ToF16(e){const t=new ArrayBuffer(4);new Float32Array(t)[0]=e;const o=new Uint32Array(t)[0],s=o>>31&1;let r=o>>23&255,a=o&8388607;return r===0?s<<15:r===255?s<<15|31744|(a?512:0):(r=r-127+15,r>=31?s<<15|31744:r<=0?s<<15:s<<15|r<<10|a>>13)}f16ToF32(e){const t=e>>15&1,o=e>>10&31,s=e&1023;return o===0?s===0?t?-0:0:(t?-1:1)*Math.pow(2,-14)*(s/1024):o===31?s?NaN:t?-1/0:1/0:(t?-1:1)*Math.pow(2,o-15)*(1+s/1024)}bf16ToF32(e){const t=e<<16,o=new ArrayBuffer(4);return new Uint32Array(o)[0]=t,new Float32Array(o)[0]}async testGPTQMatvec(){const r=new Float32Array(64);for(let k=0;k<64;k++)r[k]=(k+1)*.1;const a=new Uint32Array(32),d=new Float32Array(256);for(let k=0;k<8;k++)for(let u=0;u<4;u++){let m=0;for(let g=0;g<8;g++){const i=k*8+g,c=(i+u)%15;d[i*4+u]=c-8,m|=c<<g*4}a[k*4+u]=m}const f=new Float32Array(4);for(let k=0;k<4;k++)f[k]=.5+k*.1;const w=this.cpuF32ToF16Packed(f),h=new Float32Array(4);for(let k=0;k<4;k++){let u=0;for(let m=0;m<64;m++){const i=Math.floor(m/64)*4+k,c=w[Math.floor(i/2)]>>i%2*16&65535,_=this.f16ToF32(c);u+=r[m]*d[m*4+k]*_}h[k]=u}const F=this.makeF32Buffer("gptq_input",r),n=this.makeU32Buffer("gptq_qw",a),B=this.makeU32Buffer("gptq_scales",w),l=this.makeOutputBuffer("gptq_out",4),p=this.makeUniform([64,4,64]);this.dispatch("gptq_matvec",[F,n,B,l,p],Math.ceil(4/32));const y=await this.readback(l,4);return this.compare(y,h,.01)}async testGPTQMatvecGroups(){const r=new Float32Array(128);for(let u=0;u<128;u++)r[u]=Math.sin(u*.5)*.5;const a=new Uint32Array(64),d=new Float32Array(512);for(let u=0;u<16;u++)for(let m=0;m<4;m++){let g=0;for(let i=0;i<8;i++){const c=u*8+i,_=(c*3+m*7)%15;d[c*4+m]=_-8,g|=_<<i*4}a[u*4+m]=g}const f=128/64,w=new Float32Array(f*4);for(let u=0;u<f*4;u++)w[u]=.3+u*.15;const h=this.cpuF32ToF16Packed(w),F=new Float32Array(4);for(let u=0;u<4;u++){let m=0;for(let g=0;g<128;g++){const c=Math.floor(g/64)*4+u,_=h[Math.floor(c/2)]>>c%2*16&65535,b=this.f16ToF32(_);m+=r[g]*d[g*4+u]*b}F[u]=m}const n=this.makeF32Buffer("gptq2_in",r),B=this.makeU32Buffer("gptq2_qw",a),l=this.makeU32Buffer("gptq2_sc",h),p=this.makeOutputBuffer("gptq2_out",4),y=this.makeUniform([128,4,64]);this.dispatch("gptq_matvec",[n,B,l,p,y],Math.ceil(4/32));const k=await this.readback(p,4);return this.compare(k,F,.01)}async testGPTQMatvecGS32(){const r=new Float32Array(128);for(let u=0;u<128;u++)r[u]=Math.sin(u*.5)*.5;const a=new Uint32Array(64),d=new Float32Array(512);for(let u=0;u<16;u++)for(let m=0;m<4;m++){let g=0;for(let i=0;i<8;i++){const c=u*8+i,_=(c*3+m*7)%15;d[c*4+m]=_-8,g|=_<<i*4}a[u*4+m]=g}const f=128/32,w=new Float32Array(f*4);for(let u=0;u<f*4;u++)w[u]=.3+u*.15;const h=this.cpuF32ToF16Packed(w),F=new Float32Array(4);for(let u=0;u<4;u++){let m=0;for(let g=0;g<128;g++){const c=Math.floor(g/32)*4+u,_=h[Math.floor(c/2)]>>c%2*16&65535,b=this.f16ToF32(_);m+=r[g]*d[g*4+u]*b}F[u]=m}const n=this.makeF32Buffer("gptq32_in",r),B=this.makeU32Buffer("gptq32_qw",a),l=this.makeU32Buffer("gptq32_sc",h),p=this.makeOutputBuffer("gptq32_out",4),y=this.makeUniform([128,4,32]);this.dispatch("gptq_matvec",[n,B,l,p,y],Math.ceil(4/32));const k=await this.readback(p,4);return this.compare(k,F,.01)}async testGPTQSplitK(){const a=new Float32Array(256);for(let i=0;i<256;i++)a[i]=Math.sin(i*.3)*.5;const d=new Uint32Array(1024),f=new Float32Array(256*32);for(let i=0;i<32;i++)for(let c=0;c<32;c++){let _=0;for(let b=0;b<8;b++){const N=i*8+b,x=(N*3+c*7)%15;f[N*32+c]=x-8,_|=x<<b*4}d[i*32+c]=_}const w=256/64,h=new Float32Array(w*32);for(let i=0;i<w;i++)for(let c=0;c<32;c++)h[i*32+c]=.3+(i*32+c)*.02;const F=this.cpuF32ToF16Packed(h),n=new Float32Array(32);for(let i=0;i<32;i++){let c=0;for(let _=0;_<256;_++){const N=Math.floor(_/64)*32+i,x=F[Math.floor(N/2)]>>N%2*16&65535,q=this.f16ToF32(x);c+=a[_]*f[_*32+i]*q}n[i]=c}const B=this.makeF32Buffer("sk_in",a),l=this.makeU32Buffer("sk_qw",d),p=this.makeU32Buffer("sk_sc",F),y=this.makeOutputBuffer("sk_partials",128),k=this.makeOutputBuffer("sk_out",32),u=this.makeUniform([256,32,64,4]),m=this.makeUniform([32,4]);this.dispatch("gptq_splitk",[B,l,p,y,u],Math.ceil(32/32),4),this.dispatch("reduce_splitk",[y,k,m],Math.ceil(32/256));const g=await this.readback(k,32);return this.compare(g,n,.01)}async testGPTQF16(){if(!this.gpu.hasF16||!this.pipelines.gptq_matvec_f16)return{pass:!0,maxErr:0,note:"shader-f16 not available, skipped"};const e=128,t=4,o=64,s=e/8,r=new Float32Array(e);for(let u=0;u<e;u++)r[u]=Math.sin(u*.5)*.5;const a=new Uint32Array(s*t),d=new Float32Array(e*t);for(let u=0;u<s;u++)for(let m=0;m<t;m++){let g=0;for(let i=0;i<8;i++){const c=u*8+i,_=(c+m*3)%15;d[c*t+m]=_-8,g|=_<<i*4}a[u*t+m]=g}const f=e/o,w=new Float32Array(f*t);for(let u=0;u<f;u++)for(let m=0;m<t;m++)w[u*t+m]=.5+u*.1;const h=this.cpuF32ToF16Packed(w),F=new Float32Array(t);for(let u=0;u<t;u++){let m=0;for(let g=0;g<e;g++){const c=Math.floor(g/o)*t+u,_=h[Math.floor(c/2)]>>c%2*16&65535,b=this.f16ToF32(_);m+=r[g]*d[g*t+u]*b}F[u]=m}const n=this.makeF32Buffer("f16_in",r),B=this.makeU32Buffer("f16_qw",a),l=this.makeU32Buffer("f16_sc",h),p=this.makeOutputBuffer("f16_out",t),y=this.makeUniform([e,t,o]);this.dispatch("gptq_matvec_f16",[n,B,l,p,y],Math.ceil(t/32));const k=await this.readback(p,t);return this.compare(k,F,.5)}async testBF16Matvec(){const o=new Float32Array(8);for(let n=0;n<8;n++)o[n]=(n+1)*.2;const s=new Float32Array(32);for(let n=0;n<4;n++)for(let B=0;B<8;B++)s[n*8+B]=Math.cos(n*.5+B*.3);const r=this.cpuF32ToBF16Packed(s),a=new Float32Array(4);for(let n=0;n<4;n++){let B=0;for(let l=0;l<8;l++){const p=n*8+l,y=r[Math.floor(p/2)],k=p%2===0?y&65535:y>>16&65535,u=this.bf16ToF32(k);B+=o[l]*u}a[n]=B}const d=this.makeF32Buffer("bf16mv_in",o),f=this.makeU32Buffer("bf16mv_w",r),w=this.makeOutputBuffer("bf16mv_out",4),h=this.makeUniform([8,4]);this.dispatch("bf16_matvec",[d,f,w,h],Math.ceil(4/32));const F=await this.readback(w,4);return this.compare(F,a,.001)}async testRMSNorm(){const o=new Float32Array(16);for(let l=0;l<16;l++)o[l]=(l-8)*.3;const s=new Float32Array(16);for(let l=0;l<16;l++)s[l]=Math.sin(l*.4)*.1;const r=this.cpuF32ToBF16Packed(s);let a=0;for(let l=0;l<16;l++)a+=o[l]*o[l];const d=1/Math.sqrt(a/16+1e-6),f=new Float32Array(16);for(let l=0;l<16;l++){const p=l,y=r[Math.floor(p/2)],k=p%2===0?y&65535:y>>16&65535,u=this.bf16ToF32(k);f[l]=o[l]*d*(1+u)}const w=this.makeF32Buffer("rmsn_in",o),h=this.makeU32Buffer("rmsn_w",r),F=this.makeOutputBuffer("rmsn_out",16),n=this.makeUniformTyped([16],[1e-6]);this.dispatch("rmsnorm",[w,h,F,n],1);const B=await this.readback(F,16);return this.compare(B,f,1e-4)}async testRMSNorm1PlusW(){const o=new Float32Array([1,2,3,4,5,6,7,8]),s=new Float32Array(8).fill(0),r=this.cpuF32ToBF16Packed(s);let a=0;for(let l=0;l<8;l++)a+=o[l]*o[l];const d=1/Math.sqrt(a/8+1e-6),f=new Float32Array(8);for(let l=0;l<8;l++)f[l]=o[l]*d;const w=this.makeF32Buffer("rmsn1_in",o),h=this.makeU32Buffer("rmsn1_w",r),F=this.makeOutputBuffer("rmsn1_out",8),n=this.makeUniformTyped([8],[1e-6]);this.dispatch("rmsnorm",[w,h,F,n],1);const B=await this.readback(F,8);return this.compare(B,f,1e-4)}async testSiLUMul(){const t=new Float32Array([0,1,-1,2,-2,.5,-.5,3]),o=new Float32Array([1,2,3,4,5,6,7,8]),s=new Float32Array(8);for(let h=0;h<8;h++){const F=t[h]/(1+Math.exp(-t[h]));s[h]=F*o[h]}const r=this.makeF32Buffer("silu_gate",t),a=this.makeF32Buffer("silu_up",o),d=this.makeOutputBuffer("silu_out",8),f=this.makeUniform([8]);this.dispatch("silu_mul",[r,a,d,f],Math.ceil(8/32));const w=await this.readback(d,8);return this.compare(w,s,1e-5)}async testAdd(){const t=new Float32Array([1,2,3,4,5,6,7,8]),o=new Float32Array([.1,.2,.3,.4,.5,.6,.7,.8]),s=new Float32Array(8);for(let w=0;w<8;w++)s[w]=t[w]+o[w];const r=this.makeF32Buffer("add_a",t),a=this.makeF32Buffer("add_b",o),d=this.makeUniform([8]);this.dispatch("add",[r,a,d],Math.ceil(8/32));const f=await this.readback(r,8);return this.compare(f,s,1e-6)}async testEmbedding(){const o=new Float32Array(32);for(let F=0;F<o.length;F++)o[F]=(F+1)*.01;const s=this.cpuF32ToBF16Packed(o),r=2,a=new Float32Array(8);for(let F=0;F<8;F++){const n=r*8+F,B=s[Math.floor(n/2)],l=n%2===0?B&65535:B>>16&65535;a[F]=this.bf16ToF32(l)}const d=this.makeU32Buffer("emb_w",s),f=this.makeOutputBuffer("emb_out",8),w=this.makeUniform([r,8]);this.dispatch("embedding",[d,f,w],Math.ceil(8/256));const h=await this.readback(f,8);return this.compare(h,a,.001)}async testArgmax(){const t=new Float32Array(16);for(let w=0;w<16;w++)t[w]=Math.sin(w*.7)*3;const o=t.indexOf(Math.max(...t)),s=this.makeF32Buffer("argmax_in",t),r=this.makeOutputBuffer("argmax_res",2),a=this.makeUniform([16]);this.dispatch("argmax",[s,r,a],1);const f=(await this.readbackU32(r,2))[0];return{pass:f===o,maxErr:Math.abs(f-o),errors:f!==o?[{idx:0,got:f,expected:o}]:[]}}async testArgmaxLarge(){const t=new Float32Array(1024);for(let d=0;d<1024;d++)t[d]=Math.sin(d*.01)-2;t[773]=99;const o=this.makeF32Buffer("argmax_lg_in",t),s=this.makeOutputBuffer("argmax_lg_res",2),r=this.makeUniform([1024]);this.dispatch("argmax",[o,s,r],1);const a=await this.readbackU32(s,2);return{pass:a[0]===773,maxErr:Math.abs(a[0]-773),errors:a[0]!==773?[{got:a[0],expected:773}]:[]}}async testSplit(){const s=new Float32Array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]),r=new Float32Array([1,2,3,4,9,10,11,12]),a=new Float32Array([5,6,7,8,13,14,15,16]),d=this.makeF32Buffer("split_src",s),f=this.makeOutputBuffer("split_a",8),w=this.makeOutputBuffer("split_b",8),h=this.makeUniform([8,2,4]);this.dispatch("split",[d,f,w,h],Math.ceil(8/32));const F=await this.readback(f,8),n=await this.readback(w,8),B=this.compare(F,r,1e-6),l=this.compare(n,a,1e-6);return{pass:B.pass&&l.pass,maxErr:Math.max(B.maxErr,l.maxErr),errors:[...B.errors,...l.errors]}}async testSigmoidMul(){const t=new Float32Array([1,2,3,4,5,6,7,8]),o=new Float32Array([0,1,-1,2,-2,.5,-.5,3]),s=new Float32Array(8);for(let h=0;h<8;h++)s[h]=t[h]/(1+Math.exp(-o[h]));const r=this.makeF32Buffer("sigmul_x",t),a=this.makeF32Buffer("sigmul_g",o),d=this.makeOutputBuffer("sigmul_out",8),f=this.makeUniform([8]);this.dispatch("sigmoid_mul",[r,a,d,f],Math.ceil(8/32));const w=await this.readback(d,8);return this.compare(w,s,1e-5)}async testHeadRMSNorm(){const s=new Float32Array([1,2,3,4,5,6,7,8]),r=new Float32Array(4);for(let n=0;n<4;n++)r[n]=.1*(n+1);const a=this.cpuF32ToBF16Packed(r),d=new Float32Array(8);for(let n=0;n<2;n++){let B=0;for(let p=0;p<4;p++)B+=s[n*4+p]**2;const l=1/Math.sqrt(B/4+1e-6);for(let p=0;p<4;p++){const y=a[Math.floor(p/2)],k=p%2===0?y&65535:y>>16&65535,u=this.bf16ToF32(k);d[n*4+p]=s[n*4+p]*l*(1+u)}}const f=this.makeF32Buffer("hrmsn_x",s),w=this.makeU32Buffer("hrmsn_w",a),h=this.makeUniformTyped([2,4],[1e-6]);this.dispatch("head_rmsnorm",[f,w,h],2);const F=await this.readback(f,8);return this.compare(F,d,.001)}async testHeadRMSNormNogated(){const s=new Float32Array([1,2,3,4,5,6,7,8]),r=new Float32Array(4);for(let n=0;n<4;n++)r[n]=.5+.1*n;const a=this.cpuF32ToBF16Packed(r),d=new Float32Array(8);for(let n=0;n<2;n++){let B=0;for(let p=0;p<4;p++)B+=s[n*4+p]**2;const l=1/Math.sqrt(B/4+1e-6);for(let p=0;p<4;p++){const y=a[Math.floor(p/2)],k=p%2===0?y&65535:y>>16&65535,u=this.bf16ToF32(k);d[n*4+p]=s[n*4+p]*l*u}}const f=this.makeF32Buffer("hrmsng_x",s),w=this.makeU32Buffer("hrmsng_w",a),h=this.makeUniformTyped([2,4],[1e-6]);this.dispatch("head_rmsnorm_nogated",[f,w,h],2);const F=await this.readback(f,8);return this.compare(F,d,.001)}async testCausalConv1d(){const t=new Float32Array([1,2,3,4]),o=new Float32Array(12);for(let y=0;y<o.length;y++)o[y]=(y+1)*.1;const s=new Float32Array(16);for(let y=0;y<s.length;y++)s[y]=.25;const r=this.cpuF32ToBF16Packed(s),a=new Float32Array(4),d=new Float32Array(12);for(let y=0;y<4;y++){const k=o[y],u=o[4+y],m=o[8+y],g=t[y],i=[];for(let _=0;_<4;_++){const b=y*4+_,N=r[Math.floor(b/2)],x=b%2===0?N&65535:N>>16&65535;i.push(this.bf16ToF32(x))}const c=i[0]*k+i[1]*u+i[2]*m+i[3]*g;a[y]=c/(1+Math.exp(-c)),d[y]=u,d[4+y]=m,d[8+y]=g}const f=this.makeF32Buffer("conv_x",t),w=this.makeF32Buffer("conv_hist",o),h=this.makeU32Buffer("conv_w",r),F=this.makeUniform([4]);this.dispatch("causal_conv1d",[f,w,h,F],Math.ceil(4/256));const n=await this.readback(f,4),B=await this.readback(w,12),l=this.compare(n,a,1e-4),p=this.compare(B,d,1e-4);return{pass:l.pass&&p.pass,maxErr:Math.max(l.maxErr,p.maxErr),errors:[...l.errors.map(y=>({...y,note:"conv output"})),...p.errors.map(y=>({...y,note:"history"}))]}}async testKVCacheStore(){const r=new Float32Array(8),a=new Float32Array(8);for(let c=0;c<8;c++)r[c]=c+1,a[c]=(c+1)*10;const f=8*8,w=new Float32Array(f),h=new Float32Array(f),F=new Float32Array(f),n=new Float32Array(f);for(let c=0;c<8;c++)F[24+c]=r[c],n[24+c]=a[c];const B=this.makeF32Buffer("kvs_k",r),l=this.makeF32Buffer("kvs_v",a),p=this.makeF32Buffer("kvs_kc",w),y=this.makeF32Buffer("kvs_vc",h),k=this.makeUniform([3,2,4]);this.dispatch("kv_cache_store",[B,l,p,y,k],Math.ceil(8/256));const u=await this.readback(p,f),m=await this.readback(y,f),g=this.compare(u,F,1e-6),i=this.compare(m,n,1e-6);return{pass:g.pass&&i.pass,maxErr:Math.max(g.maxErr,i.maxErr),errors:[...g.errors,...i.errors]}}async testGQAAttention(){const a=new Float32Array([1,0,0,0]),d=new Float32Array(4);d.set([1,0,0,0]);const f=new Float32Array(4);f.set([.5,.6,.7,.8]);const w=new Float32Array([.5,.6,.7,.8]),h=this.makeF32Buffer("gqa_q",a),F=this.makeF32Buffer("gqa_kc",d),n=this.makeF32Buffer("gqa_vc",f),B=this.makeOutputBuffer("gqa_out",4),l=new ArrayBuffer(32),p=new DataView(l);p.setUint32(0,1,!0),p.setUint32(4,4,!0),p.setUint32(8,1,!0),p.setUint32(12,1,!0),p.setUint32(16,1/1,!0),p.setUint32(20,1,!0);const y=this.gpu.createBufferFromData("gqa_params",new Uint8Array(l),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST);this.dispatch("gqa_attention_head",[h,F,n,B,y],1);const k=await this.readback(B,4);return this.compare(k,w,1e-4)}async testDeltaNetRecurrent(){const s=[.5,.5],r=[1,0],a=[.8,.2],d=new Float32Array([...s,...r,...a]),f=new Float32Array(4),w=new Float32Array([0]),h=new Float32Array([0]),F=new Float32Array([0]),n=new Float32Array([0]),B=this.cpuF32ToBF16Packed(F),l=this.cpuF32ToBF16Packed(n),p=1/(1+Math.exp(-0)),y=Math.sqrt(s[0]**2+s[1]**2),k=Math.sqrt(r[0]**2+r[1]**2),u=s.map(U=>U/y),m=r.map(U=>U/k),g=[0,0,0,0],i=[0,0],c=a.map((U,S)=>(U-i[S])*p);for(let U=0;U<2;U++)for(let S=0;S<2;S++)g[U*2+S]+=m[U]*c[S];const _=1/Math.sqrt(2),b=new Float32Array(2);for(let U=0;U<2;U++){let S=0;for(let O=0;O<2;O++)S+=g[O*2+U]*u[O]*_;b[U]=S}const N=this.makeF32Buffer("dn_qkv",d),x=this.makeF32Buffer("dn_state",f),q=this.makeOutputBuffer("dn_out",2),K=this.makeF32Buffer("dn_a",w),v=this.makeF32Buffer("dn_b",h),T=this.makeU32Buffer("dn_alog",B),V=this.makeU32Buffer("dn_dtb",l),G=this.makeUniform([1,2,2]);this.dispatch("deltanet_recurrent",[N,x,q,K,v,T,V,G],1);const C=await this.readback(q,2);return this.compare(C,b,.001)}async testAddRMSNorm(){const o=new Float32Array([1,2,3,4,5,6,7,8]),s=new Float32Array([.1,-.2,.3,-.4,.5,-.6,.7,-.8]),r=new Float32Array(8);for(let _=0;_<8;_++)r[_]=.1*(_+1);const a=this.cpuF32ToBF16Packed(r),d=new Float32Array(o);for(let _=0;_<8;_++)d[_]+=s[_];let f=0;for(let _=0;_<8;_++)f+=d[_]*d[_];const w=1/Math.sqrt(f/8+1e-6),h=new Float32Array(8);for(let _=0;_<8;_++){const b=a[Math.floor(_/2)]>>_%2*16&65535,N=this.bf16ToF32(b);h[_]=d[_]*w*(1+N)}const F=this.makeF32Buffer("addnorm_h",o),n=this.makeF32Buffer("addnorm_a",s),B=this.makeU32Buffer("addnorm_w",a),l=this.makeOutputBuffer("addnorm_out",8);this.makeUniform([8]);const p=new ArrayBuffer(16),y=new DataView(p);y.setUint32(0,8,!0),y.setFloat32(4,1e-6,!0);const k=this.gpu.createBufferFromData("addnorm_params",new Uint8Array(p),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST);this.dispatch("add_rmsnorm",[F,n,B,l,k],1);const u=await this.readback(l,8),m=await this.readback(F,8),g=d,i=this.compare(u,h,.001),c=this.compare(m,g,1e-6);return{pass:i.pass&&c.pass,maxErr:Math.max(i.maxErr,c.maxErr),errors:[...i.errors,...c.errors]}}async testThreeWayAddRMSNorm(){const o=new Float32Array([1,2,3,4,5,6,7,8]),s=new Float32Array([.1,-.2,.3,-.4,.5,-.6,.7,-.8]),r=new Float32Array([.5,.5,-.5,-.5,1,1,-1,-1]),a=new Float32Array(8);for(let N=0;N<8;N++)a[N]=.1*(N+1);const d=this.cpuF32ToBF16Packed(a),f=new Float32Array(8);for(let N=0;N<8;N++)f[N]=o[N]+s[N]+r[N];let w=0;for(let N=0;N<8;N++)w+=f[N]*f[N];const h=1/Math.sqrt(w/8+1e-6),F=new Float32Array(8);for(let N=0;N<8;N++){const x=d[Math.floor(N/2)]>>N%2*16&65535,q=this.bf16ToF32(x);F[N]=f[N]*h*(1+q)}const n=this.makeF32Buffer("twa_a",o),B=this.makeF32Buffer("twa_b",s),l=this.makeF32Buffer("twa_c",r),p=this.makeU32Buffer("twa_w",d),y=this.makeOutputBuffer("twa_hout",8),k=this.makeOutputBuffer("twa_normed",8),u=new ArrayBuffer(16),m=new DataView(u);m.setUint32(0,8,!0),m.setFloat32(4,1e-6,!0);const g=this.gpu.createBufferFromData("twa_params",new Uint8Array(u),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST);this.dispatch("three_way_add_rmsnorm",[n,B,l,p,y,k,g],1);const i=await this.readback(y,8),c=await this.readback(k,8),_=this.compare(i,f,1e-6),b=this.compare(c,F,.001);return{pass:_.pass&&b.pass,maxErr:Math.max(_.maxErr,b.maxErr),errors:[..._.errors,...b.errors]}}async testGQARoPERotateHalf(){const h=1/Math.sqrt(256),F=[];for(let D=0;D<32;D++)F.push(1/Math.pow(100,2*D/64));function n(D,M){const H=new Float32Array(D.length);for(let P=0;P<D.length;P++)H[P]=D[P];for(let P=0;P<32;P++){const Q=M*F[P],E=Math.cos(Q),J=Math.sin(Q);H[P]=D[P]*E-D[P+32]*J,H[P+32]=D[P+32]*E+D[P]*J}return H}const B=new Float32Array(256);B[0]=1,B[1]=.5,B[2]=.3,B[32]=.7,B[33]=.2;const l=new Float32Array(256);l[0]=1,l[32]=.5;const p=new Float32Array(256);p[1]=1,p[33]=.3;const y=new Float32Array(256);y[2]=1;const k=new Float32Array(256);k[0]=1;const u=new Float32Array(256);u[0]=0,u[1]=1;const m=new Float32Array(256);m[0]=0,m[2]=1;const g=n(B,2),i=n(l,0),c=n(p,1),_=n(y,2);let b=0,N=0,x=0;for(let D=0;D<256;D++)b+=g[D]*i[D],N+=g[D]*c[D],x+=g[D]*_[D];b*=h,N*=h,x*=h;const q=Math.max(b,N,x),K=Math.exp(b-q),v=Math.exp(N-q),T=Math.exp(x-q),V=K+v+T,G=K/V,C=v/V,U=T/V,S=new Float32Array(256);for(let D=0;D<256;D++)S[D]=G*k[D]+C*u[D]+U*m[D];const O=1*256,Y=new Float32Array(3*O),z=new Float32Array(3*O);Y.set(i,0),Y.set(c,O),Y.set(_,2*O),z.set(k,0),z.set(u,O),z.set(m,2*O);const X=this.makeF32Buffer("rope2_q",g),tt=this.makeF32Buffer("rope2_kc",Y),A=this.makeF32Buffer("rope2_vc",z),R=this.makeOutputBuffer("rope2_out",1*256),L=new ArrayBuffer(32),W=new DataView(L);W.setUint32(0,3,!0),W.setUint32(4,256,!0),W.setUint32(8,1,!0),W.setUint32(12,1,!0),W.setUint32(16,1/1,!0),W.setUint32(20,1,!0);const Z=this.gpu.createBufferFromData("rope2_params",new Uint8Array(L),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST);this.dispatch("gqa_attention_head",[X,tt,A,R,Z],1);const j=await this.readback(R,1*256);return this.compare(j,S,.001)}async testFusedSigmoidGPTQ(){const r=new Float32Array(64),a=new Float32Array(64);for(let m=0;m<64;m++)r[m]=(m+1)*.1,a[m]=(m-8)*.3;const d=new Uint32Array(32),f=new Float32Array(256);for(let m=0;m<8;m++)for(let g=0;g<4;g++){let i=0;for(let c=0;c<8;c++){const _=m*8+c,b=(_+g)%15;f[_*4+g]=b-8,i|=b<<c*4}d[m*4+g]=i}const w=new Float32Array(4);for(let m=0;m<4;m++)w[m]=.5+m*.1;const h=this.cpuF32ToF16Packed(w),F=new Float32Array(4);for(let m=0;m<4;m++){let g=0;for(let i=0;i<64;i++){const c=r[i]/(1+Math.exp(-a[i])),b=Math.floor(i/64)*4+m,N=h[Math.floor(b/2)]>>b%2*16&65535,x=this.f16ToF32(N);g+=c*f[i*4+m]*x}F[m]=g}const n=this.makeF32Buffer("fsg_x",r),B=this.makeF32Buffer("fsg_g",a),l=this.makeU32Buffer("fsg_qw",d),p=this.makeU32Buffer("fsg_sc",h),y=this.makeOutputBuffer("fsg_out",4),k=this.makeUniform([64,4,64]);this.dispatch("fused_sigmoid_gptq",[n,B,l,p,y,k],Math.ceil(4/32));const u=await this.readback(y,4);return this.compare(u,F,.01)}async testFusedSiluGPTQ(){const r=new Float32Array(64),a=new Float32Array(64);for(let m=0;m<64;m++)r[m]=(m-8)*.2,a[m]=(m+1)*.15;const d=new Uint32Array(32),f=new Float32Array(256);for(let m=0;m<8;m++)for(let g=0;g<4;g++){let i=0;for(let c=0;c<8;c++){const _=m*8+c,b=(_*2+g)%15;f[_*4+g]=b-8,i|=b<<c*4}d[m*4+g]=i}const w=new Float32Array(4);for(let m=0;m<4;m++)w[m]=.4+m*.2;const h=this.cpuF32ToF16Packed(w),F=new Float32Array(4);for(let m=0;m<4;m++){let g=0;for(let i=0;i<64;i++){const _=r[i]/(1+Math.exp(-r[i]))*a[i],N=Math.floor(i/64)*4+m,x=h[Math.floor(N/2)]>>N%2*16&65535,q=this.f16ToF32(x);g+=_*f[i*4+m]*q}F[m]=g}const n=this.makeF32Buffer("fslg_a",r),B=this.makeF32Buffer("fslg_b",a),l=this.makeU32Buffer("fslg_qw",d),p=this.makeU32Buffer("fslg_sc",h),y=this.makeOutputBuffer("fslg_out",4),k=this.makeUniform([64,4,64]);this.dispatch("fused_silu_gptq",[n,B,l,p,y,k],Math.ceil(4/32));const u=await this.readback(y,4);return this.compare(u,F,.01)}async testFusedAddNormGateUpSiLU(){const a=new Float32Array(64),d=new Float32Array(64),f=new Float32Array(64),w=new Float32Array(64);for(let A=0;A<64;A++)a[A]=(A+1)*.1,d[A]=(A-5)*.05,f[A]=(A%7-3)*.03,w[A]=(A%3-1)*.1;const h=new Uint32Array(32),F=new Uint32Array(32),n=new Float32Array(256),B=new Float32Array(256);for(let A=0;A<64;A++)for(let R=0;R<4;R++)n[A*4+R]=Math.sin(A*.7+R*1.3)*.5,B[A*4+R]=Math.cos(A*.3+R*.9)*.5;const l=new Uint32Array(4/2),p=new Uint32Array(4/2);for(let A=0;A<4;A++){let R=0,L=0;for(let P=0;P<64;P++)R=Math.max(R,Math.abs(n[P*4+A])),L=Math.max(L,Math.abs(B[P*4+A]));const W=R/7,Z=L/7,j=this.f32ToF16(W),D=this.f32ToF16(Z),M=this.f16ToF32(j),H=this.f16ToF32(D);A%2===0?(l[A/2]=j,p[A/2]=D):(l[(A-1)/2]|=j<<16,p[(A-1)/2]|=D<<16);for(let P=0;P<64;P++){const Q=Math.round(n[P*4+A]/M)+8,E=Math.round(B[P*4+A]/H)+8,J=Math.max(0,Math.min(15,Q)),rt=Math.max(0,Math.min(15,E)),st=Math.floor(P/8),at=P%8*4;h[st*4+A]|=J<<at,F[st*4+A]|=rt<<at}}const y=new Uint32Array(64/2);for(let A=0;A<64;A+=2){const R=this.f32ToBF16(w[A]),L=this.f32ToBF16(w[A+1]);y[A/2]=R|L<<16}const k=new Float32Array(64);let u=0;for(let A=0;A<64;A++)k[A]=a[A]+d[A]+f[A],u+=k[A]*k[A];const m=1/Math.sqrt(u/64+1e-6),g=new Float32Array(64);for(let A=0;A<64;A++){const R=this.unpackBF16(y[Math.floor(A/2)],A%2);g[A]=k[A]*m*(1+R)}const i=new Float32Array(4);for(let A=0;A<4;A++){let R=0,L=0;const W=this.f16ToF32(A%2===0?l[Math.floor(A/2)]&65535:l[Math.floor(A/2)]>>>16),Z=this.f16ToF32(A%2===0?p[Math.floor(A/2)]&65535:p[Math.floor(A/2)]>>>16);for(let j=0;j<64;j++){const D=Math.floor(j/8),M=j%8*4,H=h[D*4+A]>>>M&15,P=F[D*4+A]>>>M&15;R+=g[j]*W*(H-8),L+=g[j]*Z*(P-8)}i[A]=R/(1+Math.exp(-R))*L}const c=k,_=16+640*16,b=new ArrayBuffer(_),N=new DataView(b);N.setUint32(0,64,!0),N.setUint32(4,4,!0),N.setUint32(8,64,!0),N.setFloat32(12,1e-6,!0);for(let A=0;A<64/2;A++){const R=Math.floor(A/4),L=A%4;N.setUint32(16+R*16+L*4,y[A],!0)}const x=this.gpu.createBufferFromData("fang_params",new Uint8Array(b),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),q=new Uint32Array(64);q.set(h,0),q.set(F,32);const K=8/(64/8),v=new Uint32Array(K*4/2*2);v.set(l,0),v.set(p,K*4/2);const T=this.makeF32Buffer("fang_hin",a),V=this.makeF32Buffer("fang_mlpres",d),G=this.makeF32Buffer("fang_attnres",f),C=this.makeU32Buffer("fang_mqw",q),U=this.makeU32Buffer("fang_msc",v),S=this.makeOutputBuffer("fang_hout",64),O=this.makeOutputBuffer("fang_out",4);this.dispatch("fused_addnorm_gate_up_silu",[T,V,G,C,U,S,O,x],Math.max(1,Math.ceil(4/32)));const Y=await this.readback(O,4),z=await this.readback(S,64),X=this.compare(Y,i,.02),tt=this.compare(z,c,1e-6);return{pass:X.pass&&tt.pass,maxErr:Math.max(X.maxErr,tt.maxErr),errors:[...X.errors||[],...tt.errors||[]]}}async testFusedSplitQKNormKVStore(){const f=new Float32Array(32);for(let M=0;M<f.length;M++)f[M]=Math.sin(M*.5+1)*.3;const w=new Float32Array(8),h=new Float32Array(8);for(let M=0;M<8;M++)w[M]=Math.cos(M*.7+2)*.4,h[M]=Math.sin(M*1.1+3)*.2;const F=new Float32Array(16),n=new Float32Array(8);for(let M=0;M<16;M++)F[M]=(M%3-1)*.05;for(let M=0;M<8;M++)n[M]=(M%4-2)*.03;const B=new Uint32Array(16/2);for(let M=0;M<16;M+=2)B[M/2]=this.f32ToBF16(F[M])|this.f32ToBF16(F[M+1])<<16;const l=new Uint32Array(8/2);for(let M=0;M<8;M+=2)l[M/2]=this.f32ToBF16(n[M])|this.f32ToBF16(n[M+1])<<16;const p=new Float32Array(16),y=new Float32Array(16);for(let M=0;M<2;M++){const H=new Float32Array(8);for(let E=0;E<8;E++)H[E]=f[M*8*2+E],y[M*8+E]=f[M*8*2+8+E];let P=0;for(let E=0;E<8;E++)P+=H[E]*H[E];const Q=1/Math.sqrt(P/8+1e-6);for(let E=0;E<8;E++){const J=this.unpackBF16(B[Math.floor((M*8+E)/2)],(M*8+E)%2);p[M*8+E]=H[E]*Q*(1+J)}}const k=new Float32Array(8),u=new Float32Array(32),m=new Float32Array(32);for(let M=0;M<1;M++){let H=0;for(let Q=0;Q<8;Q++)H+=w[M*8+Q]*w[M*8+Q];const P=1/Math.sqrt(H/8+1e-6);for(let Q=0;Q<8;Q++){const E=this.unpackBF16(l[Math.floor((M*8+Q)/2)],(M*8+Q)%2),J=w[M*8+Q]*P*(1+E);k[M*8+Q]=J,u[24+M*8+Q]=J,m[24+M*8+Q]=h[M*8+Q]}}const g=32+320*16,i=new ArrayBuffer(g),c=new DataView(i);c.setUint32(0,2,!0),c.setUint32(4,1,!0),c.setUint32(8,8,!0),c.setFloat32(12,1e-6,!0),c.setUint32(16,3,!0);for(let M=0;M<16/2;M++){const H=Math.floor(M/4),P=M%4;c.setUint32(32+H*16+P*4,B[M],!0)}const _=16/2;for(let M=0;M<8/2;M++){const H=_+M,P=Math.floor(H/4),Q=H%4;c.setUint32(32+P*16+Q*4,l[M],!0)}const b=this.gpu.createBufferFromData("fsqk_params",new Uint8Array(i),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),N=this.makeF32Buffer("fsqk_qpf",f),x=this.makeF32Buffer("fsqk_kp",w),q=this.makeF32Buffer("fsqk_vp",h),K=this.makeOutputBuffer("fsqk_qp",16),v=this.makeOutputBuffer("fsqk_qg",16),T=this.makeOutputBuffer("fsqk_kc",32),V=this.makeOutputBuffer("fsqk_vc",32);this.dispatch("fused_split_qknorm_kvstore",[N,x,q,K,v,T,V,b],3);const G=await this.readback(K,16),C=await this.readback(v,16),U=await this.readback(x,8),S=await this.readback(T,32),O=await this.readback(V,32),Y=this.compare(G,p,1e-5),z=this.compare(C,y,1e-6),X=this.compare(U,k,1e-5),tt=S.slice(24,32),A=O.slice(24,32),R=u.slice(24,32),L=m.slice(24,32),W=this.compare(tt,R,1e-5),Z=this.compare(A,L,1e-6),j=Math.max(Y.maxErr,z.maxErr,X.maxErr,W.maxErr,Z.maxErr);return{pass:Y.pass&&z.pass&&X.pass&&W.pass&&Z.pass,maxErr:j,errors:[...Y.errors||[],...z.errors||[],...X.errors||[],...W.errors||[],...Z.errors||[]]}}async benchmarkGPTQ(e){const t=[{K:2048,N:2048,gs:128,label:"2048→2048 (Z/out_proj)"},{K:2048,N:6144,gs:128,label:"2048→6144 (QKV/gate+up)"},{K:6144,N:2048,gs:128,label:"6144→2048 (down_proj)"}],o=[1,2,4,8],s=50,r=3,a=10,d=[];for(const f of t){const{K:w,N:h,gs:F,label:n}=f,B=w/8,l=B/(F/8),p=new Float32Array(w);for(let b=0;b<w;b++)p[b]=Math.random()*2-1;const y=new Uint32Array(B*h);for(let b=0;b<y.length;b++)y[b]=Math.random()*4294967295>>>0;const k=new Float32Array(l*h);for(let b=0;b<k.length;b++)k[b]=Math.random()*.5;const u=this.cpuF32ToF16Packed(k),m=this.makeF32Buffer("bm_in",p),g=this.makeU32Buffer("bm_qw",y),i=this.makeU32Buffer("bm_sc",u),c=[];for(let b=0;b<s;b++)c.push(this.makeOutputBuffer(`bm_out_${b}`,h));const _=B*h*4+l*h*2;for(const b of o){if(b>1&&l%b!==0)continue;let N;if(b===1){const v=this.makeUniform([w,h,F]);for(let G=0;G<r;G++){this.gpu.beginBatch();for(let C=0;C<s;C++)this.dispatch("gptq_matvec",[m,g,i,c[C],v],Math.ceil(h/32));this.gpu.endBatch(),await this.gpu.device.queue.onSubmittedWorkDone()}const T=[];for(let G=0;G<a;G++){await this.gpu.device.queue.onSubmittedWorkDone();const C=performance.now();this.gpu.beginBatch();for(let U=0;U<s;U++)this.dispatch("gptq_matvec",[m,g,i,c[U],v],Math.ceil(h/32));this.gpu.endBatch(),await this.gpu.device.queue.onSubmittedWorkDone(),T.push(performance.now()-C)}T.sort((G,C)=>G-C),N=T.slice(1,-1).reduce((G,C)=>G+C,0)/(T.length-2)/s}else{const v=[];for(let U=0;U<s;U++)v.push(this.makeOutputBuffer(`bm_part_${U}`,h*b));const T=this.makeUniform([w,h,F,b]),V=this.makeUniform([h,b]);for(let U=0;U<r;U++){this.gpu.beginBatch();for(let S=0;S<s;S++)this.dispatch("gptq_splitk",[m,g,i,v[S],T],Math.ceil(h/32),b),this.dispatch("reduce_splitk",[v[S],c[S],V],Math.ceil(h/256));this.gpu.endBatch(),await this.gpu.device.queue.onSubmittedWorkDone()}const G=[];for(let U=0;U<a;U++){await this.gpu.device.queue.onSubmittedWorkDone();const S=performance.now();this.gpu.beginBatch();for(let O=0;O<s;O++)this.dispatch("gptq_splitk",[m,g,i,v[O],T],Math.ceil(h/32),b),this.dispatch("reduce_splitk",[v[O],c[O],V],Math.ceil(h/256));this.gpu.endBatch(),await this.gpu.device.queue.onSubmittedWorkDone(),G.push(performance.now()-S)}G.sort((U,S)=>U-S),N=G.slice(1,-1).reduce((U,S)=>U+S,0)/(G.length-2)/s}const x=_/1e9/(N/1e3),q=Math.ceil(h/32)*b,K={label:n,ns:b,wgs:q,avgMs:N.toFixed(4),bwGBs:x.toFixed(1)};d.push(K),e?.(K)}if(this.gpu.hasF16&&this.pipelines.gptq_matvec_f16){const b=this.makeUniform([w,h,F]);for(let T=0;T<r;T++){this.gpu.beginBatch();for(let V=0;V<s;V++)this.dispatch("gptq_matvec_f16",[m,g,i,c[V],b],Math.ceil(h/32));this.gpu.endBatch(),await this.gpu.device.queue.onSubmittedWorkDone()}const N=[];for(let T=0;T<a;T++){await this.gpu.device.queue.onSubmittedWorkDone();const V=performance.now();this.gpu.beginBatch();for(let G=0;G<s;G++)this.dispatch("gptq_matvec_f16",[m,g,i,c[G],b],Math.ceil(h/32));this.gpu.endBatch(),await this.gpu.device.queue.onSubmittedWorkDone(),N.push(performance.now()-V)}N.sort((T,V)=>T-V);const q=N.slice(1,-1).reduce((T,V)=>T+V,0)/(N.length-2)/s,K=_/1e9/(q/1e3),v={label:n,ns:"f16",wgs:Math.ceil(h/32),avgMs:q.toFixed(4),bwGBs:K.toFixed(1)};d.push(v),e?.(v)}}return d}_makeNormParams(e,t=1e-6){const o=new ArrayBuffer(16),s=new DataView(o);return s.setUint32(0,e,!0),s.setFloat32(4,t,!0),this.gpu.createBufferFromData("norm_params",new Uint8Array(o),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST)}_cpuRMSNorm1PlusW(e,t,o,s=1e-6){let r=0;for(let f=0;f<o;f++)r+=e[f]*e[f];const a=1/Math.sqrt(r/o+s),d=new Float32Array(o);for(let f=0;f<o;f++){const w=t[Math.floor(f/2)]>>f%2*16&65535,h=this.bf16ToF32(w);d[f]=e[f]*a*(1+h)}return d}async testAddRMSNormB2(){const o=new Float32Array(32),s=new Float32Array(32);for(let u=0;u<32;u++)o[u]=Math.sin(u*.7)*2,s[u]=Math.cos(u*.3)*.5;const r=new Float32Array(16);for(let u=0;u<16;u++)r[u]=.1*(u+1);const a=this.cpuF32ToBF16Packed(r),d=new Float32Array(32),f=new Float32Array(32);for(let u=0;u<2;u++){const m=new Float32Array(16);for(let i=0;i<16;i++)m[i]=o[u*16+i]+s[u*16+i];for(let i=0;i<16;i++)f[u*16+i]=m[i];const g=this._cpuRMSNorm1PlusW(m,a,16,1e-6);d.set(g,u*16)}const w=this.makeF32Buffer("b2an_h",o),h=this.makeF32Buffer("b2an_a",s),F=this.makeU32Buffer("b2an_w",a),n=this.makeOutputBuffer("b2an_out",32),B=this._makeNormParams(16,1e-6);this.dispatch("add_rmsnorm_b2",[w,h,F,n,B],2);const l=await this.readback(n,32),p=await this.readback(w,32),y=this.compare(l,d,.001),k=this.compare(p,f,1e-6);return{pass:y.pass&&k.pass,maxErr:Math.max(y.maxErr,k.maxErr),errors:[...y.errors,...k.errors]}}async testAddRMSNormROB2(){const o=new Float32Array(32),s=new Float32Array(32);for(let l=0;l<32;l++)o[l]=Math.sin(l*.5)*3,s[l]=Math.cos(l*.8)*.3;const r=new Float32Array(16);for(let l=0;l<16;l++)r[l]=.05*(l+1);const a=this.cpuF32ToBF16Packed(r),d=new Float32Array(32);for(let l=0;l<2;l++){const p=new Float32Array(16);for(let k=0;k<16;k++)p[k]=o[l*16+k]+s[l*16+k];const y=this._cpuRMSNorm1PlusW(p,a,16,1e-6);d.set(y,l*16)}const f=this.makeF32Buffer("b2anro_h",o),w=this.makeF32Buffer("b2anro_a",s),h=this.makeU32Buffer("b2anro_w",a),F=this.makeOutputBuffer("b2anro_out",32),n=this._makeNormParams(16,1e-6);this.dispatch("add_rmsnorm_ro_b2",[f,w,h,F,n],2);const B=await this.readback(F,32);return this.compare(B,d,.001)}async testThreeWayAddRMSNormB2(){const o=new Float32Array(32),s=new Float32Array(32),r=new Float32Array(32);for(let i=0;i<32;i++)o[i]=Math.sin(i*.4)*2,s[i]=Math.cos(i*.6)*.5,r[i]=Math.sin(i*1.1)*.3;const a=new Float32Array(16);for(let i=0;i<16;i++)a[i]=.1*(i+1);const d=this.cpuF32ToBF16Packed(a),f=new Float32Array(32),w=new Float32Array(32);for(let i=0;i<2;i++){const c=new Float32Array(16);for(let b=0;b<16;b++)c[b]=o[i*16+b]+s[i*16+b]+r[i*16+b];f.set(c,i*16);const _=this._cpuRMSNorm1PlusW(c,d,16,1e-6);w.set(_,i*16)}const h=this.makeF32Buffer("b2twa_a",o),F=this.makeF32Buffer("b2twa_b",s),n=this.makeF32Buffer("b2twa_c",r),B=this.makeU32Buffer("b2twa_w",d),l=this.makeOutputBuffer("b2twa_hout",32),p=this.makeOutputBuffer("b2twa_normed",32),y=this._makeNormParams(16,1e-6);this.dispatch("three_way_add_rmsnorm_b2",[h,F,n,B,l,p,y],2);const k=await this.readback(p,32),u=await this.readback(l,32),m=this.compare(k,w,.001),g=this.compare(u,f,1e-6);return{pass:m.pass&&g.pass,maxErr:Math.max(m.maxErr,g.maxErr),errors:[...m.errors,...g.errors]}}_makeGPTQData(e,t,o){const s=e/8,r=new Uint32Array(s*t),a=new Float32Array(e*t);for(let h=0;h<s;h++)for(let F=0;F<t;F++){let n=0;for(let B=0;B<8;B++){const l=h*8+B,p=(l*3+F*7)%15;a[l*t+F]=p-8,n|=p<<B*4}r[h*t+F]=n}const d=e/o,f=new Float32Array(d*t);for(let h=0;h<d*t;h++)f[h]=.3+h*.15;const w=this.cpuF32ToF16Packed(f);return{qweight:r,rawWeights:a,scalesPacked:w}}_cpuGPTQMatvec(e,t,o,s,r,a,d=0){const f=new Float32Array(r);for(let w=0;w<r;w++){let h=0;for(let F=0;F<s;F++){const B=Math.floor(F/a)*r+w,l=o[Math.floor(B/2)]>>B%2*16&65535,p=this.f16ToF32(l);h+=e[d+F]*t[F*r+w]*p}f[w]=h}return f}async testFusedSiluGPTQB2(){if(!this.gpu.hasF16)return{pass:!0,maxErr:0,note:"skipped (no f16)"};const e=128,t=8,o=64,{qweight:s,rawWeights:r,scalesPacked:a}=this._makeGPTQData(e,t,o),d=new Float32Array(2*e),f=new Float32Array(2*e);for(let k=0;k<2*e;k++)d[k]=Math.sin(k*.3)*2,f[k]=Math.cos(k*.5)*1.5;const w=new Float32Array(2*t);for(let k=0;k<2;k++){const u=new Float32Array(e);for(let g=0;g<e;g++){const i=d[k*e+g];u[g]=i/(1+Math.exp(-i))*f[k*e+g]}const m=this._cpuGPTQMatvec(u,r,a,e,t,o);w.set(m,k*t)}const h=this.makeF32Buffer("b2sl_a",d),F=this.makeF32Buffer("b2sl_b",f),n=this.makeU32Buffer("b2sl_qw",s),B=this.makeU32Buffer("b2sl_sc",a),l=this.makeOutputBuffer("b2sl_out",2*t),p=this.makeUniform([e,t,o]);this.dispatch("fused_silu_gptq_b2_f16",[h,F,n,B,l,p],Math.ceil(t/32));const y=await this.readback(l,2*t);return this.compare(y,w,.05)}async testFusedSigmoidGPTQB2(){if(!this.gpu.hasF16)return{pass:!0,maxErr:0,note:"skipped (no f16)"};const e=128,t=8,o=64,{qweight:s,rawWeights:r,scalesPacked:a}=this._makeGPTQData(e,t,o),d=new Float32Array(2*e),f=new Float32Array(2*e);for(let k=0;k<2*e;k++)d[k]=Math.sin(k*.4)*1.5,f[k]=Math.cos(k*.7)*2;const w=new Float32Array(2*t);for(let k=0;k<2;k++){const u=new Float32Array(e);for(let g=0;g<e;g++){const i=1/(1+Math.exp(-f[k*e+g]));u[g]=i*d[k*e+g]}const m=this._cpuGPTQMatvec(u,r,a,e,t,o);w.set(m,k*t)}const h=this.makeF32Buffer("b2sg_x",d),F=this.makeF32Buffer("b2sg_g",f),n=this.makeU32Buffer("b2sg_qw",s),B=this.makeU32Buffer("b2sg_sc",a),l=this.makeOutputBuffer("b2sg_out",2*t),p=this.makeUniform([e,t,o]);this.dispatch("fused_sigmoid_gptq_b2_f16",[h,F,n,B,l,p],Math.ceil(t/32));const y=await this.readback(l,2*t);return this.compare(y,w,.05)}async testFusedGateUpSiLUB2(){if(!this.gpu.hasF16)return{pass:!0,maxErr:0,note:"skipped (no f16)"};const e=128,t=8,o=64,s=e/8,r=new Uint32Array(s*t),a=new Float32Array(e*t);for(let N=0;N<s;N++)for(let x=0;x<t;x++){let q=0;for(let K=0;K<8;K++){const v=N*8+K,T=(v*3+x*7)%15;a[v*t+x]=T-8,q|=T<<K*4}r[N*t+x]=q}const d=e/o,f=new Float32Array(d*t);for(let N=0;N<d*t;N++)f[N]=.01+N%8*.005;const w=this.cpuF32ToF16Packed(f),h=new Uint32Array(s*t),F=new Float32Array(e*t);for(let N=0;N<s;N++)for(let x=0;x<t;x++){let q=0;for(let K=0;K<8;K++){const v=N*8+K,T=(v*5+x*3)%15;F[v*t+x]=T-8,q|=T<<K*4}h[N*t+x]=q}const n=e/o,B=new Float32Array(n*t);for(let N=0;N<n*t;N++)B[N]=.01+N%8*.005;const l=this.cpuF32ToF16Packed(B),p=new Float32Array(2*e);for(let N=0;N<2*e;N++)p[N]=Math.sin(N*.2)*.5;const y=new Float32Array(2*t);for(let N=0;N<2;N++){const x=this._cpuGPTQMatvec(p,a,w,e,t,o,N*e),q=this._cpuGPTQMatvec(p,F,l,e,t,o,N*e);for(let K=0;K<t;K++){const v=x[K];y[N*t+K]=v/(1+Math.exp(-v))*q[K]}}const k=this.makeF32Buffer("b2gus_in",p),u=this.makeU32Buffer("b2gus_gqw",r),m=this.makeU32Buffer("b2gus_gsc",w),g=this.makeU32Buffer("b2gus_uqw",h),i=this.makeU32Buffer("b2gus_usc",l),c=this.makeOutputBuffer("b2gus_out",2*t),_=this.makeUniform([e,t,o]);this.dispatch("fused_gate_up_silu_b2_f16",[k,u,m,g,i,c,_],Math.ceil(t/32));const b=await this.readback(c,2*t);return this.compare(b,y,.05)}_makeGPTQMatmulB2Data(e,t,o){const s=e/8,r=new Float32Array(2*e);for(let n=0;n<e;n++)r[n]=Math.sin(n*.5)*.5;for(let n=0;n<e;n++)r[e+n]=Math.cos(n*.3)*.5;const a=new Uint32Array(s*t),d=new Float32Array(e*t);for(let n=0;n<s;n++)for(let B=0;B<t;B++){let l=0;for(let p=0;p<8;p++){const y=n*8+p,k=(y*3+B*7)%15;d[y*t+B]=k-8,l|=k<<p*4}a[n*t+B]=l}const f=e/o,w=new Float32Array(f*t);for(let n=0;n<f*t;n++)w[n]=.01+n%8*.005;const h=this.cpuF32ToF16Packed(w),F=new Float32Array(2*t);for(let n=0;n<2;n++)for(let B=0;B<t;B++){let l=0;for(let p=0;p<e;p++){const k=Math.floor(p/o)*t+B,u=h[Math.floor(k/2)]>>k%2*16&65535,m=this.f16ToF32(u);l+=r[n*e+p]*d[p*t+B]*m}F[n*t+B]=l}return{input:r,qweight:a,scalesPacked:h,expected:F,K:e,N:t,gs:o}}async testGPTQMatmulB2(){const{input:e,qweight:t,scalesPacked:o,expected:s,K:r,N:a,gs:d}=this._makeGPTQMatmulB2Data(128,8,64),f=this.makeF32Buffer("b2_input",e),w=this.makeU32Buffer("b2_qw",t),h=this.makeU32Buffer("b2_sc",o),F=this.makeOutputBuffer("b2_out",2*a),n=this.makeUniform([r,a,d]);this.dispatch("gptq_matmul_b2",[f,w,h,F,n],Math.ceil(a/32));const B=await this.readback(F,2*a);return this.compare(B,s,.02)}async testGPTQMatmulB2F16(){if(!this.gpu.hasF16)return{pass:!0,maxErr:0,note:"skipped (no f16)"};const{input:e,qweight:t,scalesPacked:o,expected:s,K:r,N:a,gs:d}=this._makeGPTQMatmulB2Data(128,8,64),f=this.makeF32Buffer("b2f16_input",e),w=this.makeU32Buffer("b2f16_qw",t),h=this.makeU32Buffer("b2f16_sc",o),F=this.makeOutputBuffer("b2f16_out",2*a),n=this.makeUniform([r,a,d]);this.dispatch("gptq_matmul_b2_f16",[f,w,h,F,n],Math.ceil(a/32));const B=await this.readback(F,2*a);return this.compare(B,s,.05)}async testGPTQMatmulB2_4T_F16(){if(!this.gpu.hasF16)return{pass:!0,maxErr:0,note:"skipped (no f16)"};const{input:e,qweight:t,scalesPacked:o,expected:s,K:r,N:a,gs:d}=this._makeGPTQMatmulB2Data(256,16,64),f=this.makeF32Buffer("b2_4t_input",e),w=this.makeU32Buffer("b2_4t_qw",t),h=this.makeU32Buffer("b2_4t_sc",o),F=this.makeOutputBuffer("b2_4t_out",2*a),n=this.makeUniform([r,a,d]);this.dispatch("gptq_matmul_b2_4t_f16",[f,w,h,F,n],Math.ceil(a/8));const B=await this.readback(F,2*a);return this.compare(B,s,.05)}f32ToBF16(e){const t=new ArrayBuffer(4);return new Uint32Array(t)[0]=new Uint32Array(new Float32Array([e]).buffer)[0],new Uint32Array(t)[0]>>>16}unpackBF16(e,t){const o=e>>>t*16&65535;return this.bf16ToF32(o)}}function $(I){const e=document.getElementById("log");e.textContent+=I+`
3
  `,e.scrollTop=e.scrollHeight}document.querySelectorAll(".tab").forEach(I=>{I.addEventListener("click",()=>{document.querySelectorAll(".tab").forEach(e=>e.classList.remove("active")),document.querySelectorAll(".panel").forEach(e=>e.classList.remove("active")),I.classList.add("active"),document.getElementById(I.dataset.panel).classList.add("active")})});document.getElementById("runTests").addEventListener("click",async()=>{const I=document.getElementById("runTests"),e=document.getElementById("testResults"),t=document.getElementById("testSummary");I.disabled=!0,e.innerHTML="",t.innerHTML="",$("Initializing WebGPU for tests...");try{const o=new ot;await o.init(),$(`WebGPU initialized. Running tests...
4
  `);let s=0,r=0;await o.runAll(a=>{const d=a.pass?"✓":"✗",f=a.pass?"pass":"fail";a.pass?s++:r++;const w=document.createElement("div");if(w.className="test-row",w.innerHTML=`
5
  <span class="test-icon ${f}">${d}</span>
@@ -14,7 +14,7 @@ import{G as nt,S as ct,_ as et}from"./gpu-ops-BbLjsC0p.js";class ot{constructor(
14
  </div>
15
  `,$(`
16
  Done: ${s} passed, ${r} failed`),o.destroy()}catch(o){$(`Error: ${o.message}
17
- ${o.stack}`)}finally{I.disabled=!1}});document.getElementById("loadAndProfile").addEventListener("click",async()=>{const I=document.getElementById("loadAndProfile"),e=document.getElementById("profStatus"),t=document.getElementById("profCategories"),o=document.getElementById("profTopOps");I.disabled=!0,t.innerHTML="",o.innerHTML="";try{const s=document.getElementById("profRepo").value.trim();e.textContent="Loading model...",$("Profiler: loading model from "+s);const{GPUContext:r}=await et(async()=>{const{GPUContext:c}=await import("./gpu-ops-BbLjsC0p.js").then(_=>_.g);return{GPUContext:c}},[]),{Qwen35Model:a}=await et(async()=>{const{Qwen35Model:c}=await import("./qwen35-model-7KVn_FLm.js");return{Qwen35Model:c}},__vite__mapDeps([0,1])),{loadModelWeights:d,loadConfig:f,loadQuantConfig:w}=await et(async()=>{const{loadModelWeights:c,loadConfig:_,loadQuantConfig:b}=await import("./safetensors-loader-CwGm5mJX.js");return{loadModelWeights:c,loadConfig:_,loadQuantConfig:b}},[]),{Profiler:h}=await et(async()=>{const{Profiler:c}=await import("./profiler-DYUyiq-B.js");return{Profiler:c}},[]),F=new r;await F.init(),e.textContent="Fetching config...";const n=await f(s),B=await w(s);e.textContent="Downloading weights (this may take a while)...";const l=await d(s,c=>{if(c.phase==="downloading"){const _=c.total?(c.loaded/c.total*100).toFixed(0):"?";e.textContent=`Downloading: ${c.filesLoaded}/${c.filesTotal} shards (${_}%)`}else c.phase==="parsing"&&(e.textContent=`Parsing ${c.file}...`)});e.textContent="Uploading to GPU...";const p=new a(F,n,B);p.compilePipelines(),p.loadWeights(l),p.initBuffers(),e.textContent="Warming up...",await p.forward(1,0);const y=p.textCfg.linear_num_key_heads,k=p.textCfg.linear_key_head_dim,u=p.textCfg.linear_value_head_dim,m=y*(k+k+u);for(let c=0;c<p.numLayers;c++)p.layerTypes[c]==="linear_attention"&&(F.device.queue.writeBuffer(p.linState[c],0,new ArrayBuffer(y*k*u*4)),F.device.queue.writeBuffer(p.linConvHist[c],0,new ArrayBuffer(3*m*4)));p.seqLen=0,e.textContent="Profiling forward pass...",$("Profiling single forward pass...");const i=await new h(p).profileForward(1,0);e.textContent=`Done! Estimated: ${i.estimatedTokPerSec} tok/s (sync overhead inflates times)`,$(`
18
  Profile complete: ${i.total.toFixed(1)}ms total (~${i.estimatedTokPerSec} tok/s with sync overhead)`),$(`
19
  Category breakdown:`);for(const c of i.categories){const _=document.createElement("div");_.className="prof-row";const b=c.name.includes("matvec")||c.name==="lm_head"?"var(--orange)":c.name.includes("attention")||c.name.includes("deltanet")?"var(--blue)":"var(--green)";_.innerHTML=`
20
  <span class="prof-name">${c.name}</span>
 
1
+ const __vite__mapDeps=(i,m=__vite__mapDeps,d=(m.f||(m.f=["assets/qwen35-model-BJNcT5Rw.js","assets/gpu-ops-flxI8RuZ.js"])))=>i.map(i=>d[i]);
2
+ import{G as nt,S as ct,_ as et}from"./gpu-ops-flxI8RuZ.js";class ot{constructor(){this.gpu=null,this.results=[],this.pipelines={}}async init(){this.gpu=new nt,await this.gpu.init();for(const[e,t]of Object.entries(ct))this.pipelines[e]=this.gpu.getOrCreatePipeline(e,t);return this}async runAll(e){const t=[["GPTQ MatVec",()=>this.testGPTQMatvec()],["GPTQ MatVec (group boundary)",()=>this.testGPTQMatvecGroups()],["GPTQ MatVec (gs=32)",()=>this.testGPTQMatvecGS32()],["GPTQ Split-K",()=>this.testGPTQSplitK()],["GPTQ F16",()=>this.testGPTQF16()],["BF16 MatVec",()=>this.testBF16Matvec()],["RMSNorm",()=>this.testRMSNorm()],["RMSNorm (1+w formula)",()=>this.testRMSNorm1PlusW()],["SiLU * Mul",()=>this.testSiLUMul()],["Vector Add",()=>this.testAdd()],["Embedding (BF16)",()=>this.testEmbedding()],["Argmax",()=>this.testArgmax()],["Argmax (large)",()=>this.testArgmaxLarge()],["Split interleaved",()=>this.testSplit()],["Sigmoid Mul",()=>this.testSigmoidMul()],["Head RMSNorm (1+w)",()=>this.testHeadRMSNorm()],["Head RMSNorm (nogated, w)",()=>this.testHeadRMSNormNogated()],["Causal Conv1d",()=>this.testCausalConv1d()],["KV Cache Store",()=>this.testKVCacheStore()],["GQA Attention (single pos)",()=>this.testGQAAttention()],["DeltaNet Recurrent",()=>this.testDeltaNetRecurrent()],["Add + RMSNorm (fused)",()=>this.testAddRMSNorm()],["Three-way Add + RMSNorm",()=>this.testThreeWayAddRMSNorm()],["GQA RoPE rotate_half",()=>this.testGQARoPERotateHalf()],["Fused sigmoid + GPTQ",()=>this.testFusedSigmoidGPTQ()],["Fused SiLU + GPTQ",()=>this.testFusedSiluGPTQ()],["Fused AddNorm+GateUp+SiLU",()=>this.testFusedAddNormGateUpSiLU()],["Fused Split+QKNorm+KVStore",()=>this.testFusedSplitQKNormKVStore()],["Add+RMSNorm B=2",()=>this.testAddRMSNormB2()],["Add+RMSNorm RO B=2",()=>this.testAddRMSNormROB2()],["Three-way Add+RMSNorm B=2",()=>this.testThreeWayAddRMSNormB2()],["Fused SiLU+GPTQ B=2",()=>this.testFusedSiluGPTQB2()],["Fused Sigmoid+GPTQ B=2",()=>this.testFusedSigmoidGPTQB2()],["Fused GateUp+SiLU B=2",()=>this.testFusedGateUpSiLUB2()],["GPTQ Matmul B=2",()=>this.testGPTQMatmulB2()],["GPTQ Matmul B=2 F16",()=>this.testGPTQMatmulB2F16()],["GPTQ Matmul B=2 4T F16",()=>this.testGPTQMatmulB2_4T_F16()]];for(const[o,s]of t)try{const r=await s();this.results.push({name:o,...r}),e?.({name:o,...r})}catch(r){this.results.push({name:o,pass:!1,error:r.message,maxErr:NaN}),e?.({name:o,pass:!1,error:r.message,maxErr:NaN})}return this.results}destroy(){this.gpu?.destroy()}makeBuffer(e,t,o=GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC){return this.gpu.createBufferFromData(e,new Uint8Array(t.buffer,t.byteOffset,t.byteLength),o)}makeF32Buffer(e,t){return this.makeBuffer(e,new Float32Array(t))}makeU32Buffer(e,t){return this.makeBuffer(e,new Uint32Array(t))}makeOutputBuffer(e,t){return this.gpu.createBuffer(e,t*4,GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST|GPUBufferUsage.COPY_SRC)}makeUniform(e){const t=new ArrayBuffer(Math.max(16,Math.ceil(e.length*4/16)*16)),o=new DataView(t);for(let s=0;s<e.length;s++)typeof e[s]=="number"&&!Number.isInteger(e[s])?o.setFloat32(s*4,e[s],!0):o.setUint32(s*4,e[s],!0);return this.gpu.createBufferFromData("uniform",new Uint8Array(t),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST)}makeUniformTyped(e,t){const o=e.length+t.length,s=new ArrayBuffer(Math.max(16,Math.ceil(o*4/16)*16)),r=new DataView(s);let a=0;for(const d of e)r.setUint32(a,d,!0),a+=4;for(const d of t)r.setFloat32(a,d,!0),a+=4;return this.gpu.createBufferFromData("uniform_typed",new Uint8Array(s),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST)}dispatch(e,t,o,s=1){const r=this.pipelines[e],a=this.gpu.createBindGroup(r,0,t);this.gpu.dispatch(r,[a],o,s)}async readback(e,t){return this.gpu.readBuffer(e,t*4)}async readbackU32(e,t){const o=this.gpu.createReadbackBuffer("_rb",t*4),s=this.gpu.device.createCommandEncoder();s.copyBufferToBuffer(e,0,o,0,t*4),this.gpu.device.queue.submit([s.finish()]),await o.mapAsync(GPUMapMode.READ);const r=new Uint32Array(o.getMappedRange().slice(0));return o.unmap(),o.destroy(),r}compare(e,t,o=1e-4){let s=0;const r=[];for(let a=0;a<t.length;a++){const d=Math.abs(e[a]-t[a]);s=Math.max(s,d),d>o&&r.push({idx:a,got:e[a],expected:t[a],err:d})}return{pass:r.length===0,maxErr:s,errors:r.slice(0,5)}}cpuF32ToBF16Packed(e){const t=Math.ceil(e.length/2),o=new Uint32Array(t);for(let s=0;s<e.length;s++){const a=new Uint32Array(new Float32Array([e[s]]).buffer)[0]>>>16,d=Math.floor(s/2);s%2===0?o[d]=a:o[d]|=a<<16}return o}cpuF32ToF16Packed(e){const t=Math.ceil(e.length/2),o=new Uint32Array(t);for(let s=0;s<e.length;s++){const r=this.f32ToF16(e[s]),a=Math.floor(s/2);s%2===0?o[a]=r:o[a]|=r<<16}return o}f32ToF16(e){const t=new ArrayBuffer(4);new Float32Array(t)[0]=e;const o=new Uint32Array(t)[0],s=o>>31&1;let r=o>>23&255,a=o&8388607;return r===0?s<<15:r===255?s<<15|31744|(a?512:0):(r=r-127+15,r>=31?s<<15|31744:r<=0?s<<15:s<<15|r<<10|a>>13)}f16ToF32(e){const t=e>>15&1,o=e>>10&31,s=e&1023;return o===0?s===0?t?-0:0:(t?-1:1)*Math.pow(2,-14)*(s/1024):o===31?s?NaN:t?-1/0:1/0:(t?-1:1)*Math.pow(2,o-15)*(1+s/1024)}bf16ToF32(e){const t=e<<16,o=new ArrayBuffer(4);return new Uint32Array(o)[0]=t,new Float32Array(o)[0]}async testGPTQMatvec(){const r=new Float32Array(64);for(let k=0;k<64;k++)r[k]=(k+1)*.1;const a=new Uint32Array(32),d=new Float32Array(256);for(let k=0;k<8;k++)for(let u=0;u<4;u++){let m=0;for(let g=0;g<8;g++){const i=k*8+g,c=(i+u)%15;d[i*4+u]=c-8,m|=c<<g*4}a[k*4+u]=m}const f=new Float32Array(4);for(let k=0;k<4;k++)f[k]=.5+k*.1;const w=this.cpuF32ToF16Packed(f),h=new Float32Array(4);for(let k=0;k<4;k++){let u=0;for(let m=0;m<64;m++){const i=Math.floor(m/64)*4+k,c=w[Math.floor(i/2)]>>i%2*16&65535,_=this.f16ToF32(c);u+=r[m]*d[m*4+k]*_}h[k]=u}const F=this.makeF32Buffer("gptq_input",r),n=this.makeU32Buffer("gptq_qw",a),B=this.makeU32Buffer("gptq_scales",w),l=this.makeOutputBuffer("gptq_out",4),p=this.makeUniform([64,4,64]);this.dispatch("gptq_matvec",[F,n,B,l,p],Math.ceil(4/32));const y=await this.readback(l,4);return this.compare(y,h,.01)}async testGPTQMatvecGroups(){const r=new Float32Array(128);for(let u=0;u<128;u++)r[u]=Math.sin(u*.5)*.5;const a=new Uint32Array(64),d=new Float32Array(512);for(let u=0;u<16;u++)for(let m=0;m<4;m++){let g=0;for(let i=0;i<8;i++){const c=u*8+i,_=(c*3+m*7)%15;d[c*4+m]=_-8,g|=_<<i*4}a[u*4+m]=g}const f=128/64,w=new Float32Array(f*4);for(let u=0;u<f*4;u++)w[u]=.3+u*.15;const h=this.cpuF32ToF16Packed(w),F=new Float32Array(4);for(let u=0;u<4;u++){let m=0;for(let g=0;g<128;g++){const c=Math.floor(g/64)*4+u,_=h[Math.floor(c/2)]>>c%2*16&65535,b=this.f16ToF32(_);m+=r[g]*d[g*4+u]*b}F[u]=m}const n=this.makeF32Buffer("gptq2_in",r),B=this.makeU32Buffer("gptq2_qw",a),l=this.makeU32Buffer("gptq2_sc",h),p=this.makeOutputBuffer("gptq2_out",4),y=this.makeUniform([128,4,64]);this.dispatch("gptq_matvec",[n,B,l,p,y],Math.ceil(4/32));const k=await this.readback(p,4);return this.compare(k,F,.01)}async testGPTQMatvecGS32(){const r=new Float32Array(128);for(let u=0;u<128;u++)r[u]=Math.sin(u*.5)*.5;const a=new Uint32Array(64),d=new Float32Array(512);for(let u=0;u<16;u++)for(let m=0;m<4;m++){let g=0;for(let i=0;i<8;i++){const c=u*8+i,_=(c*3+m*7)%15;d[c*4+m]=_-8,g|=_<<i*4}a[u*4+m]=g}const f=128/32,w=new Float32Array(f*4);for(let u=0;u<f*4;u++)w[u]=.3+u*.15;const h=this.cpuF32ToF16Packed(w),F=new Float32Array(4);for(let u=0;u<4;u++){let m=0;for(let g=0;g<128;g++){const c=Math.floor(g/32)*4+u,_=h[Math.floor(c/2)]>>c%2*16&65535,b=this.f16ToF32(_);m+=r[g]*d[g*4+u]*b}F[u]=m}const n=this.makeF32Buffer("gptq32_in",r),B=this.makeU32Buffer("gptq32_qw",a),l=this.makeU32Buffer("gptq32_sc",h),p=this.makeOutputBuffer("gptq32_out",4),y=this.makeUniform([128,4,32]);this.dispatch("gptq_matvec",[n,B,l,p,y],Math.ceil(4/32));const k=await this.readback(p,4);return this.compare(k,F,.01)}async testGPTQSplitK(){const a=new Float32Array(256);for(let i=0;i<256;i++)a[i]=Math.sin(i*.3)*.5;const d=new Uint32Array(1024),f=new Float32Array(256*32);for(let i=0;i<32;i++)for(let c=0;c<32;c++){let _=0;for(let b=0;b<8;b++){const N=i*8+b,x=(N*3+c*7)%15;f[N*32+c]=x-8,_|=x<<b*4}d[i*32+c]=_}const w=256/64,h=new Float32Array(w*32);for(let i=0;i<w;i++)for(let c=0;c<32;c++)h[i*32+c]=.3+(i*32+c)*.02;const F=this.cpuF32ToF16Packed(h),n=new Float32Array(32);for(let i=0;i<32;i++){let c=0;for(let _=0;_<256;_++){const N=Math.floor(_/64)*32+i,x=F[Math.floor(N/2)]>>N%2*16&65535,q=this.f16ToF32(x);c+=a[_]*f[_*32+i]*q}n[i]=c}const B=this.makeF32Buffer("sk_in",a),l=this.makeU32Buffer("sk_qw",d),p=this.makeU32Buffer("sk_sc",F),y=this.makeOutputBuffer("sk_partials",128),k=this.makeOutputBuffer("sk_out",32),u=this.makeUniform([256,32,64,4]),m=this.makeUniform([32,4]);this.dispatch("gptq_splitk",[B,l,p,y,u],Math.ceil(32/32),4),this.dispatch("reduce_splitk",[y,k,m],Math.ceil(32/256));const g=await this.readback(k,32);return this.compare(g,n,.01)}async testGPTQF16(){if(!this.gpu.hasF16||!this.pipelines.gptq_matvec_f16)return{pass:!0,maxErr:0,note:"shader-f16 not available, skipped"};const e=128,t=4,o=64,s=e/8,r=new Float32Array(e);for(let u=0;u<e;u++)r[u]=Math.sin(u*.5)*.5;const a=new Uint32Array(s*t),d=new Float32Array(e*t);for(let u=0;u<s;u++)for(let m=0;m<t;m++){let g=0;for(let i=0;i<8;i++){const c=u*8+i,_=(c+m*3)%15;d[c*t+m]=_-8,g|=_<<i*4}a[u*t+m]=g}const f=e/o,w=new Float32Array(f*t);for(let u=0;u<f;u++)for(let m=0;m<t;m++)w[u*t+m]=.5+u*.1;const h=this.cpuF32ToF16Packed(w),F=new Float32Array(t);for(let u=0;u<t;u++){let m=0;for(let g=0;g<e;g++){const c=Math.floor(g/o)*t+u,_=h[Math.floor(c/2)]>>c%2*16&65535,b=this.f16ToF32(_);m+=r[g]*d[g*t+u]*b}F[u]=m}const n=this.makeF32Buffer("f16_in",r),B=this.makeU32Buffer("f16_qw",a),l=this.makeU32Buffer("f16_sc",h),p=this.makeOutputBuffer("f16_out",t),y=this.makeUniform([e,t,o]);this.dispatch("gptq_matvec_f16",[n,B,l,p,y],Math.ceil(t/32));const k=await this.readback(p,t);return this.compare(k,F,.5)}async testBF16Matvec(){const o=new Float32Array(8);for(let n=0;n<8;n++)o[n]=(n+1)*.2;const s=new Float32Array(32);for(let n=0;n<4;n++)for(let B=0;B<8;B++)s[n*8+B]=Math.cos(n*.5+B*.3);const r=this.cpuF32ToBF16Packed(s),a=new Float32Array(4);for(let n=0;n<4;n++){let B=0;for(let l=0;l<8;l++){const p=n*8+l,y=r[Math.floor(p/2)],k=p%2===0?y&65535:y>>16&65535,u=this.bf16ToF32(k);B+=o[l]*u}a[n]=B}const d=this.makeF32Buffer("bf16mv_in",o),f=this.makeU32Buffer("bf16mv_w",r),w=this.makeOutputBuffer("bf16mv_out",4),h=this.makeUniform([8,4]);this.dispatch("bf16_matvec",[d,f,w,h],Math.ceil(4/32));const F=await this.readback(w,4);return this.compare(F,a,.001)}async testRMSNorm(){const o=new Float32Array(16);for(let l=0;l<16;l++)o[l]=(l-8)*.3;const s=new Float32Array(16);for(let l=0;l<16;l++)s[l]=Math.sin(l*.4)*.1;const r=this.cpuF32ToBF16Packed(s);let a=0;for(let l=0;l<16;l++)a+=o[l]*o[l];const d=1/Math.sqrt(a/16+1e-6),f=new Float32Array(16);for(let l=0;l<16;l++){const p=l,y=r[Math.floor(p/2)],k=p%2===0?y&65535:y>>16&65535,u=this.bf16ToF32(k);f[l]=o[l]*d*(1+u)}const w=this.makeF32Buffer("rmsn_in",o),h=this.makeU32Buffer("rmsn_w",r),F=this.makeOutputBuffer("rmsn_out",16),n=this.makeUniformTyped([16],[1e-6]);this.dispatch("rmsnorm",[w,h,F,n],1);const B=await this.readback(F,16);return this.compare(B,f,1e-4)}async testRMSNorm1PlusW(){const o=new Float32Array([1,2,3,4,5,6,7,8]),s=new Float32Array(8).fill(0),r=this.cpuF32ToBF16Packed(s);let a=0;for(let l=0;l<8;l++)a+=o[l]*o[l];const d=1/Math.sqrt(a/8+1e-6),f=new Float32Array(8);for(let l=0;l<8;l++)f[l]=o[l]*d;const w=this.makeF32Buffer("rmsn1_in",o),h=this.makeU32Buffer("rmsn1_w",r),F=this.makeOutputBuffer("rmsn1_out",8),n=this.makeUniformTyped([8],[1e-6]);this.dispatch("rmsnorm",[w,h,F,n],1);const B=await this.readback(F,8);return this.compare(B,f,1e-4)}async testSiLUMul(){const t=new Float32Array([0,1,-1,2,-2,.5,-.5,3]),o=new Float32Array([1,2,3,4,5,6,7,8]),s=new Float32Array(8);for(let h=0;h<8;h++){const F=t[h]/(1+Math.exp(-t[h]));s[h]=F*o[h]}const r=this.makeF32Buffer("silu_gate",t),a=this.makeF32Buffer("silu_up",o),d=this.makeOutputBuffer("silu_out",8),f=this.makeUniform([8]);this.dispatch("silu_mul",[r,a,d,f],Math.ceil(8/32));const w=await this.readback(d,8);return this.compare(w,s,1e-5)}async testAdd(){const t=new Float32Array([1,2,3,4,5,6,7,8]),o=new Float32Array([.1,.2,.3,.4,.5,.6,.7,.8]),s=new Float32Array(8);for(let w=0;w<8;w++)s[w]=t[w]+o[w];const r=this.makeF32Buffer("add_a",t),a=this.makeF32Buffer("add_b",o),d=this.makeUniform([8]);this.dispatch("add",[r,a,d],Math.ceil(8/32));const f=await this.readback(r,8);return this.compare(f,s,1e-6)}async testEmbedding(){const o=new Float32Array(32);for(let F=0;F<o.length;F++)o[F]=(F+1)*.01;const s=this.cpuF32ToBF16Packed(o),r=2,a=new Float32Array(8);for(let F=0;F<8;F++){const n=r*8+F,B=s[Math.floor(n/2)],l=n%2===0?B&65535:B>>16&65535;a[F]=this.bf16ToF32(l)}const d=this.makeU32Buffer("emb_w",s),f=this.makeOutputBuffer("emb_out",8),w=this.makeUniform([r,8]);this.dispatch("embedding",[d,f,w],Math.ceil(8/256));const h=await this.readback(f,8);return this.compare(h,a,.001)}async testArgmax(){const t=new Float32Array(16);for(let w=0;w<16;w++)t[w]=Math.sin(w*.7)*3;const o=t.indexOf(Math.max(...t)),s=this.makeF32Buffer("argmax_in",t),r=this.makeOutputBuffer("argmax_res",2),a=this.makeUniform([16]);this.dispatch("argmax",[s,r,a],1);const f=(await this.readbackU32(r,2))[0];return{pass:f===o,maxErr:Math.abs(f-o),errors:f!==o?[{idx:0,got:f,expected:o}]:[]}}async testArgmaxLarge(){const t=new Float32Array(1024);for(let d=0;d<1024;d++)t[d]=Math.sin(d*.01)-2;t[773]=99;const o=this.makeF32Buffer("argmax_lg_in",t),s=this.makeOutputBuffer("argmax_lg_res",2),r=this.makeUniform([1024]);this.dispatch("argmax",[o,s,r],1);const a=await this.readbackU32(s,2);return{pass:a[0]===773,maxErr:Math.abs(a[0]-773),errors:a[0]!==773?[{got:a[0],expected:773}]:[]}}async testSplit(){const s=new Float32Array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]),r=new Float32Array([1,2,3,4,9,10,11,12]),a=new Float32Array([5,6,7,8,13,14,15,16]),d=this.makeF32Buffer("split_src",s),f=this.makeOutputBuffer("split_a",8),w=this.makeOutputBuffer("split_b",8),h=this.makeUniform([8,2,4]);this.dispatch("split",[d,f,w,h],Math.ceil(8/32));const F=await this.readback(f,8),n=await this.readback(w,8),B=this.compare(F,r,1e-6),l=this.compare(n,a,1e-6);return{pass:B.pass&&l.pass,maxErr:Math.max(B.maxErr,l.maxErr),errors:[...B.errors,...l.errors]}}async testSigmoidMul(){const t=new Float32Array([1,2,3,4,5,6,7,8]),o=new Float32Array([0,1,-1,2,-2,.5,-.5,3]),s=new Float32Array(8);for(let h=0;h<8;h++)s[h]=t[h]/(1+Math.exp(-o[h]));const r=this.makeF32Buffer("sigmul_x",t),a=this.makeF32Buffer("sigmul_g",o),d=this.makeOutputBuffer("sigmul_out",8),f=this.makeUniform([8]);this.dispatch("sigmoid_mul",[r,a,d,f],Math.ceil(8/32));const w=await this.readback(d,8);return this.compare(w,s,1e-5)}async testHeadRMSNorm(){const s=new Float32Array([1,2,3,4,5,6,7,8]),r=new Float32Array(4);for(let n=0;n<4;n++)r[n]=.1*(n+1);const a=this.cpuF32ToBF16Packed(r),d=new Float32Array(8);for(let n=0;n<2;n++){let B=0;for(let p=0;p<4;p++)B+=s[n*4+p]**2;const l=1/Math.sqrt(B/4+1e-6);for(let p=0;p<4;p++){const y=a[Math.floor(p/2)],k=p%2===0?y&65535:y>>16&65535,u=this.bf16ToF32(k);d[n*4+p]=s[n*4+p]*l*(1+u)}}const f=this.makeF32Buffer("hrmsn_x",s),w=this.makeU32Buffer("hrmsn_w",a),h=this.makeUniformTyped([2,4],[1e-6]);this.dispatch("head_rmsnorm",[f,w,h],2);const F=await this.readback(f,8);return this.compare(F,d,.001)}async testHeadRMSNormNogated(){const s=new Float32Array([1,2,3,4,5,6,7,8]),r=new Float32Array(4);for(let n=0;n<4;n++)r[n]=.5+.1*n;const a=this.cpuF32ToBF16Packed(r),d=new Float32Array(8);for(let n=0;n<2;n++){let B=0;for(let p=0;p<4;p++)B+=s[n*4+p]**2;const l=1/Math.sqrt(B/4+1e-6);for(let p=0;p<4;p++){const y=a[Math.floor(p/2)],k=p%2===0?y&65535:y>>16&65535,u=this.bf16ToF32(k);d[n*4+p]=s[n*4+p]*l*u}}const f=this.makeF32Buffer("hrmsng_x",s),w=this.makeU32Buffer("hrmsng_w",a),h=this.makeUniformTyped([2,4],[1e-6]);this.dispatch("head_rmsnorm_nogated",[f,w,h],2);const F=await this.readback(f,8);return this.compare(F,d,.001)}async testCausalConv1d(){const t=new Float32Array([1,2,3,4]),o=new Float32Array(12);for(let y=0;y<o.length;y++)o[y]=(y+1)*.1;const s=new Float32Array(16);for(let y=0;y<s.length;y++)s[y]=.25;const r=this.cpuF32ToBF16Packed(s),a=new Float32Array(4),d=new Float32Array(12);for(let y=0;y<4;y++){const k=o[y],u=o[4+y],m=o[8+y],g=t[y],i=[];for(let _=0;_<4;_++){const b=y*4+_,N=r[Math.floor(b/2)],x=b%2===0?N&65535:N>>16&65535;i.push(this.bf16ToF32(x))}const c=i[0]*k+i[1]*u+i[2]*m+i[3]*g;a[y]=c/(1+Math.exp(-c)),d[y]=u,d[4+y]=m,d[8+y]=g}const f=this.makeF32Buffer("conv_x",t),w=this.makeF32Buffer("conv_hist",o),h=this.makeU32Buffer("conv_w",r),F=this.makeUniform([4]);this.dispatch("causal_conv1d",[f,w,h,F],Math.ceil(4/256));const n=await this.readback(f,4),B=await this.readback(w,12),l=this.compare(n,a,1e-4),p=this.compare(B,d,1e-4);return{pass:l.pass&&p.pass,maxErr:Math.max(l.maxErr,p.maxErr),errors:[...l.errors.map(y=>({...y,note:"conv output"})),...p.errors.map(y=>({...y,note:"history"}))]}}async testKVCacheStore(){const r=new Float32Array(8),a=new Float32Array(8);for(let c=0;c<8;c++)r[c]=c+1,a[c]=(c+1)*10;const f=8*8,w=new Float32Array(f),h=new Float32Array(f),F=new Float32Array(f),n=new Float32Array(f);for(let c=0;c<8;c++)F[24+c]=r[c],n[24+c]=a[c];const B=this.makeF32Buffer("kvs_k",r),l=this.makeF32Buffer("kvs_v",a),p=this.makeF32Buffer("kvs_kc",w),y=this.makeF32Buffer("kvs_vc",h),k=this.makeUniform([3,2,4]);this.dispatch("kv_cache_store",[B,l,p,y,k],Math.ceil(8/256));const u=await this.readback(p,f),m=await this.readback(y,f),g=this.compare(u,F,1e-6),i=this.compare(m,n,1e-6);return{pass:g.pass&&i.pass,maxErr:Math.max(g.maxErr,i.maxErr),errors:[...g.errors,...i.errors]}}async testGQAAttention(){const a=new Float32Array([1,0,0,0]),d=new Float32Array(4);d.set([1,0,0,0]);const f=new Float32Array(4);f.set([.5,.6,.7,.8]);const w=new Float32Array([.5,.6,.7,.8]),h=this.makeF32Buffer("gqa_q",a),F=this.makeF32Buffer("gqa_kc",d),n=this.makeF32Buffer("gqa_vc",f),B=this.makeOutputBuffer("gqa_out",4),l=new ArrayBuffer(32),p=new DataView(l);p.setUint32(0,1,!0),p.setUint32(4,4,!0),p.setUint32(8,1,!0),p.setUint32(12,1,!0),p.setUint32(16,1/1,!0),p.setUint32(20,1,!0);const y=this.gpu.createBufferFromData("gqa_params",new Uint8Array(l),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST);this.dispatch("gqa_attention_head",[h,F,n,B,y],1);const k=await this.readback(B,4);return this.compare(k,w,1e-4)}async testDeltaNetRecurrent(){const s=[.5,.5],r=[1,0],a=[.8,.2],d=new Float32Array([...s,...r,...a]),f=new Float32Array(4),w=new Float32Array([0]),h=new Float32Array([0]),F=new Float32Array([0]),n=new Float32Array([0]),B=this.cpuF32ToBF16Packed(F),l=this.cpuF32ToBF16Packed(n),p=1/(1+Math.exp(-0)),y=Math.sqrt(s[0]**2+s[1]**2),k=Math.sqrt(r[0]**2+r[1]**2),u=s.map(U=>U/y),m=r.map(U=>U/k),g=[0,0,0,0],i=[0,0],c=a.map((U,S)=>(U-i[S])*p);for(let U=0;U<2;U++)for(let S=0;S<2;S++)g[U*2+S]+=m[U]*c[S];const _=1/Math.sqrt(2),b=new Float32Array(2);for(let U=0;U<2;U++){let S=0;for(let O=0;O<2;O++)S+=g[O*2+U]*u[O]*_;b[U]=S}const N=this.makeF32Buffer("dn_qkv",d),x=this.makeF32Buffer("dn_state",f),q=this.makeOutputBuffer("dn_out",2),K=this.makeF32Buffer("dn_a",w),v=this.makeF32Buffer("dn_b",h),T=this.makeU32Buffer("dn_alog",B),V=this.makeU32Buffer("dn_dtb",l),G=this.makeUniform([1,2,2]);this.dispatch("deltanet_recurrent",[N,x,q,K,v,T,V,G],1);const C=await this.readback(q,2);return this.compare(C,b,.001)}async testAddRMSNorm(){const o=new Float32Array([1,2,3,4,5,6,7,8]),s=new Float32Array([.1,-.2,.3,-.4,.5,-.6,.7,-.8]),r=new Float32Array(8);for(let _=0;_<8;_++)r[_]=.1*(_+1);const a=this.cpuF32ToBF16Packed(r),d=new Float32Array(o);for(let _=0;_<8;_++)d[_]+=s[_];let f=0;for(let _=0;_<8;_++)f+=d[_]*d[_];const w=1/Math.sqrt(f/8+1e-6),h=new Float32Array(8);for(let _=0;_<8;_++){const b=a[Math.floor(_/2)]>>_%2*16&65535,N=this.bf16ToF32(b);h[_]=d[_]*w*(1+N)}const F=this.makeF32Buffer("addnorm_h",o),n=this.makeF32Buffer("addnorm_a",s),B=this.makeU32Buffer("addnorm_w",a),l=this.makeOutputBuffer("addnorm_out",8);this.makeUniform([8]);const p=new ArrayBuffer(16),y=new DataView(p);y.setUint32(0,8,!0),y.setFloat32(4,1e-6,!0);const k=this.gpu.createBufferFromData("addnorm_params",new Uint8Array(p),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST);this.dispatch("add_rmsnorm",[F,n,B,l,k],1);const u=await this.readback(l,8),m=await this.readback(F,8),g=d,i=this.compare(u,h,.001),c=this.compare(m,g,1e-6);return{pass:i.pass&&c.pass,maxErr:Math.max(i.maxErr,c.maxErr),errors:[...i.errors,...c.errors]}}async testThreeWayAddRMSNorm(){const o=new Float32Array([1,2,3,4,5,6,7,8]),s=new Float32Array([.1,-.2,.3,-.4,.5,-.6,.7,-.8]),r=new Float32Array([.5,.5,-.5,-.5,1,1,-1,-1]),a=new Float32Array(8);for(let N=0;N<8;N++)a[N]=.1*(N+1);const d=this.cpuF32ToBF16Packed(a),f=new Float32Array(8);for(let N=0;N<8;N++)f[N]=o[N]+s[N]+r[N];let w=0;for(let N=0;N<8;N++)w+=f[N]*f[N];const h=1/Math.sqrt(w/8+1e-6),F=new Float32Array(8);for(let N=0;N<8;N++){const x=d[Math.floor(N/2)]>>N%2*16&65535,q=this.bf16ToF32(x);F[N]=f[N]*h*(1+q)}const n=this.makeF32Buffer("twa_a",o),B=this.makeF32Buffer("twa_b",s),l=this.makeF32Buffer("twa_c",r),p=this.makeU32Buffer("twa_w",d),y=this.makeOutputBuffer("twa_hout",8),k=this.makeOutputBuffer("twa_normed",8),u=new ArrayBuffer(16),m=new DataView(u);m.setUint32(0,8,!0),m.setFloat32(4,1e-6,!0);const g=this.gpu.createBufferFromData("twa_params",new Uint8Array(u),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST);this.dispatch("three_way_add_rmsnorm",[n,B,l,p,y,k,g],1);const i=await this.readback(y,8),c=await this.readback(k,8),_=this.compare(i,f,1e-6),b=this.compare(c,F,.001);return{pass:_.pass&&b.pass,maxErr:Math.max(_.maxErr,b.maxErr),errors:[..._.errors,...b.errors]}}async testGQARoPERotateHalf(){const h=1/Math.sqrt(256),F=[];for(let D=0;D<32;D++)F.push(1/Math.pow(100,2*D/64));function n(D,M){const H=new Float32Array(D.length);for(let P=0;P<D.length;P++)H[P]=D[P];for(let P=0;P<32;P++){const Q=M*F[P],E=Math.cos(Q),J=Math.sin(Q);H[P]=D[P]*E-D[P+32]*J,H[P+32]=D[P+32]*E+D[P]*J}return H}const B=new Float32Array(256);B[0]=1,B[1]=.5,B[2]=.3,B[32]=.7,B[33]=.2;const l=new Float32Array(256);l[0]=1,l[32]=.5;const p=new Float32Array(256);p[1]=1,p[33]=.3;const y=new Float32Array(256);y[2]=1;const k=new Float32Array(256);k[0]=1;const u=new Float32Array(256);u[0]=0,u[1]=1;const m=new Float32Array(256);m[0]=0,m[2]=1;const g=n(B,2),i=n(l,0),c=n(p,1),_=n(y,2);let b=0,N=0,x=0;for(let D=0;D<256;D++)b+=g[D]*i[D],N+=g[D]*c[D],x+=g[D]*_[D];b*=h,N*=h,x*=h;const q=Math.max(b,N,x),K=Math.exp(b-q),v=Math.exp(N-q),T=Math.exp(x-q),V=K+v+T,G=K/V,C=v/V,U=T/V,S=new Float32Array(256);for(let D=0;D<256;D++)S[D]=G*k[D]+C*u[D]+U*m[D];const O=1*256,Y=new Float32Array(3*O),z=new Float32Array(3*O);Y.set(i,0),Y.set(c,O),Y.set(_,2*O),z.set(k,0),z.set(u,O),z.set(m,2*O);const X=this.makeF32Buffer("rope2_q",g),tt=this.makeF32Buffer("rope2_kc",Y),A=this.makeF32Buffer("rope2_vc",z),R=this.makeOutputBuffer("rope2_out",1*256),L=new ArrayBuffer(32),W=new DataView(L);W.setUint32(0,3,!0),W.setUint32(4,256,!0),W.setUint32(8,1,!0),W.setUint32(12,1,!0),W.setUint32(16,1/1,!0),W.setUint32(20,1,!0);const Z=this.gpu.createBufferFromData("rope2_params",new Uint8Array(L),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST);this.dispatch("gqa_attention_head",[X,tt,A,R,Z],1);const j=await this.readback(R,1*256);return this.compare(j,S,.001)}async testFusedSigmoidGPTQ(){const r=new Float32Array(64),a=new Float32Array(64);for(let m=0;m<64;m++)r[m]=(m+1)*.1,a[m]=(m-8)*.3;const d=new Uint32Array(32),f=new Float32Array(256);for(let m=0;m<8;m++)for(let g=0;g<4;g++){let i=0;for(let c=0;c<8;c++){const _=m*8+c,b=(_+g)%15;f[_*4+g]=b-8,i|=b<<c*4}d[m*4+g]=i}const w=new Float32Array(4);for(let m=0;m<4;m++)w[m]=.5+m*.1;const h=this.cpuF32ToF16Packed(w),F=new Float32Array(4);for(let m=0;m<4;m++){let g=0;for(let i=0;i<64;i++){const c=r[i]/(1+Math.exp(-a[i])),b=Math.floor(i/64)*4+m,N=h[Math.floor(b/2)]>>b%2*16&65535,x=this.f16ToF32(N);g+=c*f[i*4+m]*x}F[m]=g}const n=this.makeF32Buffer("fsg_x",r),B=this.makeF32Buffer("fsg_g",a),l=this.makeU32Buffer("fsg_qw",d),p=this.makeU32Buffer("fsg_sc",h),y=this.makeOutputBuffer("fsg_out",4),k=this.makeUniform([64,4,64]);this.dispatch("fused_sigmoid_gptq",[n,B,l,p,y,k],Math.ceil(4/32));const u=await this.readback(y,4);return this.compare(u,F,.01)}async testFusedSiluGPTQ(){const r=new Float32Array(64),a=new Float32Array(64);for(let m=0;m<64;m++)r[m]=(m-8)*.2,a[m]=(m+1)*.15;const d=new Uint32Array(32),f=new Float32Array(256);for(let m=0;m<8;m++)for(let g=0;g<4;g++){let i=0;for(let c=0;c<8;c++){const _=m*8+c,b=(_*2+g)%15;f[_*4+g]=b-8,i|=b<<c*4}d[m*4+g]=i}const w=new Float32Array(4);for(let m=0;m<4;m++)w[m]=.4+m*.2;const h=this.cpuF32ToF16Packed(w),F=new Float32Array(4);for(let m=0;m<4;m++){let g=0;for(let i=0;i<64;i++){const _=r[i]/(1+Math.exp(-r[i]))*a[i],N=Math.floor(i/64)*4+m,x=h[Math.floor(N/2)]>>N%2*16&65535,q=this.f16ToF32(x);g+=_*f[i*4+m]*q}F[m]=g}const n=this.makeF32Buffer("fslg_a",r),B=this.makeF32Buffer("fslg_b",a),l=this.makeU32Buffer("fslg_qw",d),p=this.makeU32Buffer("fslg_sc",h),y=this.makeOutputBuffer("fslg_out",4),k=this.makeUniform([64,4,64]);this.dispatch("fused_silu_gptq",[n,B,l,p,y,k],Math.ceil(4/32));const u=await this.readback(y,4);return this.compare(u,F,.01)}async testFusedAddNormGateUpSiLU(){const a=new Float32Array(64),d=new Float32Array(64),f=new Float32Array(64),w=new Float32Array(64);for(let A=0;A<64;A++)a[A]=(A+1)*.1,d[A]=(A-5)*.05,f[A]=(A%7-3)*.03,w[A]=(A%3-1)*.1;const h=new Uint32Array(32),F=new Uint32Array(32),n=new Float32Array(256),B=new Float32Array(256);for(let A=0;A<64;A++)for(let R=0;R<4;R++)n[A*4+R]=Math.sin(A*.7+R*1.3)*.5,B[A*4+R]=Math.cos(A*.3+R*.9)*.5;const l=new Uint32Array(4/2),p=new Uint32Array(4/2);for(let A=0;A<4;A++){let R=0,L=0;for(let P=0;P<64;P++)R=Math.max(R,Math.abs(n[P*4+A])),L=Math.max(L,Math.abs(B[P*4+A]));const W=R/7,Z=L/7,j=this.f32ToF16(W),D=this.f32ToF16(Z),M=this.f16ToF32(j),H=this.f16ToF32(D);A%2===0?(l[A/2]=j,p[A/2]=D):(l[(A-1)/2]|=j<<16,p[(A-1)/2]|=D<<16);for(let P=0;P<64;P++){const Q=Math.round(n[P*4+A]/M)+8,E=Math.round(B[P*4+A]/H)+8,J=Math.max(0,Math.min(15,Q)),rt=Math.max(0,Math.min(15,E)),st=Math.floor(P/8),at=P%8*4;h[st*4+A]|=J<<at,F[st*4+A]|=rt<<at}}const y=new Uint32Array(64/2);for(let A=0;A<64;A+=2){const R=this.f32ToBF16(w[A]),L=this.f32ToBF16(w[A+1]);y[A/2]=R|L<<16}const k=new Float32Array(64);let u=0;for(let A=0;A<64;A++)k[A]=a[A]+d[A]+f[A],u+=k[A]*k[A];const m=1/Math.sqrt(u/64+1e-6),g=new Float32Array(64);for(let A=0;A<64;A++){const R=this.unpackBF16(y[Math.floor(A/2)],A%2);g[A]=k[A]*m*(1+R)}const i=new Float32Array(4);for(let A=0;A<4;A++){let R=0,L=0;const W=this.f16ToF32(A%2===0?l[Math.floor(A/2)]&65535:l[Math.floor(A/2)]>>>16),Z=this.f16ToF32(A%2===0?p[Math.floor(A/2)]&65535:p[Math.floor(A/2)]>>>16);for(let j=0;j<64;j++){const D=Math.floor(j/8),M=j%8*4,H=h[D*4+A]>>>M&15,P=F[D*4+A]>>>M&15;R+=g[j]*W*(H-8),L+=g[j]*Z*(P-8)}i[A]=R/(1+Math.exp(-R))*L}const c=k,_=16+640*16,b=new ArrayBuffer(_),N=new DataView(b);N.setUint32(0,64,!0),N.setUint32(4,4,!0),N.setUint32(8,64,!0),N.setFloat32(12,1e-6,!0);for(let A=0;A<64/2;A++){const R=Math.floor(A/4),L=A%4;N.setUint32(16+R*16+L*4,y[A],!0)}const x=this.gpu.createBufferFromData("fang_params",new Uint8Array(b),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),q=new Uint32Array(64);q.set(h,0),q.set(F,32);const K=8/(64/8),v=new Uint32Array(K*4/2*2);v.set(l,0),v.set(p,K*4/2);const T=this.makeF32Buffer("fang_hin",a),V=this.makeF32Buffer("fang_mlpres",d),G=this.makeF32Buffer("fang_attnres",f),C=this.makeU32Buffer("fang_mqw",q),U=this.makeU32Buffer("fang_msc",v),S=this.makeOutputBuffer("fang_hout",64),O=this.makeOutputBuffer("fang_out",4);this.dispatch("fused_addnorm_gate_up_silu",[T,V,G,C,U,S,O,x],Math.max(1,Math.ceil(4/32)));const Y=await this.readback(O,4),z=await this.readback(S,64),X=this.compare(Y,i,.02),tt=this.compare(z,c,1e-6);return{pass:X.pass&&tt.pass,maxErr:Math.max(X.maxErr,tt.maxErr),errors:[...X.errors||[],...tt.errors||[]]}}async testFusedSplitQKNormKVStore(){const f=new Float32Array(32);for(let M=0;M<f.length;M++)f[M]=Math.sin(M*.5+1)*.3;const w=new Float32Array(8),h=new Float32Array(8);for(let M=0;M<8;M++)w[M]=Math.cos(M*.7+2)*.4,h[M]=Math.sin(M*1.1+3)*.2;const F=new Float32Array(16),n=new Float32Array(8);for(let M=0;M<16;M++)F[M]=(M%3-1)*.05;for(let M=0;M<8;M++)n[M]=(M%4-2)*.03;const B=new Uint32Array(16/2);for(let M=0;M<16;M+=2)B[M/2]=this.f32ToBF16(F[M])|this.f32ToBF16(F[M+1])<<16;const l=new Uint32Array(8/2);for(let M=0;M<8;M+=2)l[M/2]=this.f32ToBF16(n[M])|this.f32ToBF16(n[M+1])<<16;const p=new Float32Array(16),y=new Float32Array(16);for(let M=0;M<2;M++){const H=new Float32Array(8);for(let E=0;E<8;E++)H[E]=f[M*8*2+E],y[M*8+E]=f[M*8*2+8+E];let P=0;for(let E=0;E<8;E++)P+=H[E]*H[E];const Q=1/Math.sqrt(P/8+1e-6);for(let E=0;E<8;E++){const J=this.unpackBF16(B[Math.floor((M*8+E)/2)],(M*8+E)%2);p[M*8+E]=H[E]*Q*(1+J)}}const k=new Float32Array(8),u=new Float32Array(32),m=new Float32Array(32);for(let M=0;M<1;M++){let H=0;for(let Q=0;Q<8;Q++)H+=w[M*8+Q]*w[M*8+Q];const P=1/Math.sqrt(H/8+1e-6);for(let Q=0;Q<8;Q++){const E=this.unpackBF16(l[Math.floor((M*8+Q)/2)],(M*8+Q)%2),J=w[M*8+Q]*P*(1+E);k[M*8+Q]=J,u[24+M*8+Q]=J,m[24+M*8+Q]=h[M*8+Q]}}const g=32+320*16,i=new ArrayBuffer(g),c=new DataView(i);c.setUint32(0,2,!0),c.setUint32(4,1,!0),c.setUint32(8,8,!0),c.setFloat32(12,1e-6,!0),c.setUint32(16,3,!0);for(let M=0;M<16/2;M++){const H=Math.floor(M/4),P=M%4;c.setUint32(32+H*16+P*4,B[M],!0)}const _=16/2;for(let M=0;M<8/2;M++){const H=_+M,P=Math.floor(H/4),Q=H%4;c.setUint32(32+P*16+Q*4,l[M],!0)}const b=this.gpu.createBufferFromData("fsqk_params",new Uint8Array(i),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST),N=this.makeF32Buffer("fsqk_qpf",f),x=this.makeF32Buffer("fsqk_kp",w),q=this.makeF32Buffer("fsqk_vp",h),K=this.makeOutputBuffer("fsqk_qp",16),v=this.makeOutputBuffer("fsqk_qg",16),T=this.makeOutputBuffer("fsqk_kc",32),V=this.makeOutputBuffer("fsqk_vc",32);this.dispatch("fused_split_qknorm_kvstore",[N,x,q,K,v,T,V,b],3);const G=await this.readback(K,16),C=await this.readback(v,16),U=await this.readback(x,8),S=await this.readback(T,32),O=await this.readback(V,32),Y=this.compare(G,p,1e-5),z=this.compare(C,y,1e-6),X=this.compare(U,k,1e-5),tt=S.slice(24,32),A=O.slice(24,32),R=u.slice(24,32),L=m.slice(24,32),W=this.compare(tt,R,1e-5),Z=this.compare(A,L,1e-6),j=Math.max(Y.maxErr,z.maxErr,X.maxErr,W.maxErr,Z.maxErr);return{pass:Y.pass&&z.pass&&X.pass&&W.pass&&Z.pass,maxErr:j,errors:[...Y.errors||[],...z.errors||[],...X.errors||[],...W.errors||[],...Z.errors||[]]}}async benchmarkGPTQ(e){const t=[{K:2048,N:2048,gs:128,label:"2048→2048 (Z/out_proj)"},{K:2048,N:6144,gs:128,label:"2048→6144 (QKV/gate+up)"},{K:6144,N:2048,gs:128,label:"6144→2048 (down_proj)"}],o=[1,2,4,8],s=50,r=3,a=10,d=[];for(const f of t){const{K:w,N:h,gs:F,label:n}=f,B=w/8,l=B/(F/8),p=new Float32Array(w);for(let b=0;b<w;b++)p[b]=Math.random()*2-1;const y=new Uint32Array(B*h);for(let b=0;b<y.length;b++)y[b]=Math.random()*4294967295>>>0;const k=new Float32Array(l*h);for(let b=0;b<k.length;b++)k[b]=Math.random()*.5;const u=this.cpuF32ToF16Packed(k),m=this.makeF32Buffer("bm_in",p),g=this.makeU32Buffer("bm_qw",y),i=this.makeU32Buffer("bm_sc",u),c=[];for(let b=0;b<s;b++)c.push(this.makeOutputBuffer(`bm_out_${b}`,h));const _=B*h*4+l*h*2;for(const b of o){if(b>1&&l%b!==0)continue;let N;if(b===1){const v=this.makeUniform([w,h,F]);for(let G=0;G<r;G++){this.gpu.beginBatch();for(let C=0;C<s;C++)this.dispatch("gptq_matvec",[m,g,i,c[C],v],Math.ceil(h/32));this.gpu.endBatch(),await this.gpu.device.queue.onSubmittedWorkDone()}const T=[];for(let G=0;G<a;G++){await this.gpu.device.queue.onSubmittedWorkDone();const C=performance.now();this.gpu.beginBatch();for(let U=0;U<s;U++)this.dispatch("gptq_matvec",[m,g,i,c[U],v],Math.ceil(h/32));this.gpu.endBatch(),await this.gpu.device.queue.onSubmittedWorkDone(),T.push(performance.now()-C)}T.sort((G,C)=>G-C),N=T.slice(1,-1).reduce((G,C)=>G+C,0)/(T.length-2)/s}else{const v=[];for(let U=0;U<s;U++)v.push(this.makeOutputBuffer(`bm_part_${U}`,h*b));const T=this.makeUniform([w,h,F,b]),V=this.makeUniform([h,b]);for(let U=0;U<r;U++){this.gpu.beginBatch();for(let S=0;S<s;S++)this.dispatch("gptq_splitk",[m,g,i,v[S],T],Math.ceil(h/32),b),this.dispatch("reduce_splitk",[v[S],c[S],V],Math.ceil(h/256));this.gpu.endBatch(),await this.gpu.device.queue.onSubmittedWorkDone()}const G=[];for(let U=0;U<a;U++){await this.gpu.device.queue.onSubmittedWorkDone();const S=performance.now();this.gpu.beginBatch();for(let O=0;O<s;O++)this.dispatch("gptq_splitk",[m,g,i,v[O],T],Math.ceil(h/32),b),this.dispatch("reduce_splitk",[v[O],c[O],V],Math.ceil(h/256));this.gpu.endBatch(),await this.gpu.device.queue.onSubmittedWorkDone(),G.push(performance.now()-S)}G.sort((U,S)=>U-S),N=G.slice(1,-1).reduce((U,S)=>U+S,0)/(G.length-2)/s}const x=_/1e9/(N/1e3),q=Math.ceil(h/32)*b,K={label:n,ns:b,wgs:q,avgMs:N.toFixed(4),bwGBs:x.toFixed(1)};d.push(K),e?.(K)}if(this.gpu.hasF16&&this.pipelines.gptq_matvec_f16){const b=this.makeUniform([w,h,F]);for(let T=0;T<r;T++){this.gpu.beginBatch();for(let V=0;V<s;V++)this.dispatch("gptq_matvec_f16",[m,g,i,c[V],b],Math.ceil(h/32));this.gpu.endBatch(),await this.gpu.device.queue.onSubmittedWorkDone()}const N=[];for(let T=0;T<a;T++){await this.gpu.device.queue.onSubmittedWorkDone();const V=performance.now();this.gpu.beginBatch();for(let G=0;G<s;G++)this.dispatch("gptq_matvec_f16",[m,g,i,c[G],b],Math.ceil(h/32));this.gpu.endBatch(),await this.gpu.device.queue.onSubmittedWorkDone(),N.push(performance.now()-V)}N.sort((T,V)=>T-V);const q=N.slice(1,-1).reduce((T,V)=>T+V,0)/(N.length-2)/s,K=_/1e9/(q/1e3),v={label:n,ns:"f16",wgs:Math.ceil(h/32),avgMs:q.toFixed(4),bwGBs:K.toFixed(1)};d.push(v),e?.(v)}}return d}_makeNormParams(e,t=1e-6){const o=new ArrayBuffer(16),s=new DataView(o);return s.setUint32(0,e,!0),s.setFloat32(4,t,!0),this.gpu.createBufferFromData("norm_params",new Uint8Array(o),GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST)}_cpuRMSNorm1PlusW(e,t,o,s=1e-6){let r=0;for(let f=0;f<o;f++)r+=e[f]*e[f];const a=1/Math.sqrt(r/o+s),d=new Float32Array(o);for(let f=0;f<o;f++){const w=t[Math.floor(f/2)]>>f%2*16&65535,h=this.bf16ToF32(w);d[f]=e[f]*a*(1+h)}return d}async testAddRMSNormB2(){const o=new Float32Array(32),s=new Float32Array(32);for(let u=0;u<32;u++)o[u]=Math.sin(u*.7)*2,s[u]=Math.cos(u*.3)*.5;const r=new Float32Array(16);for(let u=0;u<16;u++)r[u]=.1*(u+1);const a=this.cpuF32ToBF16Packed(r),d=new Float32Array(32),f=new Float32Array(32);for(let u=0;u<2;u++){const m=new Float32Array(16);for(let i=0;i<16;i++)m[i]=o[u*16+i]+s[u*16+i];for(let i=0;i<16;i++)f[u*16+i]=m[i];const g=this._cpuRMSNorm1PlusW(m,a,16,1e-6);d.set(g,u*16)}const w=this.makeF32Buffer("b2an_h",o),h=this.makeF32Buffer("b2an_a",s),F=this.makeU32Buffer("b2an_w",a),n=this.makeOutputBuffer("b2an_out",32),B=this._makeNormParams(16,1e-6);this.dispatch("add_rmsnorm_b2",[w,h,F,n,B],2);const l=await this.readback(n,32),p=await this.readback(w,32),y=this.compare(l,d,.001),k=this.compare(p,f,1e-6);return{pass:y.pass&&k.pass,maxErr:Math.max(y.maxErr,k.maxErr),errors:[...y.errors,...k.errors]}}async testAddRMSNormROB2(){const o=new Float32Array(32),s=new Float32Array(32);for(let l=0;l<32;l++)o[l]=Math.sin(l*.5)*3,s[l]=Math.cos(l*.8)*.3;const r=new Float32Array(16);for(let l=0;l<16;l++)r[l]=.05*(l+1);const a=this.cpuF32ToBF16Packed(r),d=new Float32Array(32);for(let l=0;l<2;l++){const p=new Float32Array(16);for(let k=0;k<16;k++)p[k]=o[l*16+k]+s[l*16+k];const y=this._cpuRMSNorm1PlusW(p,a,16,1e-6);d.set(y,l*16)}const f=this.makeF32Buffer("b2anro_h",o),w=this.makeF32Buffer("b2anro_a",s),h=this.makeU32Buffer("b2anro_w",a),F=this.makeOutputBuffer("b2anro_out",32),n=this._makeNormParams(16,1e-6);this.dispatch("add_rmsnorm_ro_b2",[f,w,h,F,n],2);const B=await this.readback(F,32);return this.compare(B,d,.001)}async testThreeWayAddRMSNormB2(){const o=new Float32Array(32),s=new Float32Array(32),r=new Float32Array(32);for(let i=0;i<32;i++)o[i]=Math.sin(i*.4)*2,s[i]=Math.cos(i*.6)*.5,r[i]=Math.sin(i*1.1)*.3;const a=new Float32Array(16);for(let i=0;i<16;i++)a[i]=.1*(i+1);const d=this.cpuF32ToBF16Packed(a),f=new Float32Array(32),w=new Float32Array(32);for(let i=0;i<2;i++){const c=new Float32Array(16);for(let b=0;b<16;b++)c[b]=o[i*16+b]+s[i*16+b]+r[i*16+b];f.set(c,i*16);const _=this._cpuRMSNorm1PlusW(c,d,16,1e-6);w.set(_,i*16)}const h=this.makeF32Buffer("b2twa_a",o),F=this.makeF32Buffer("b2twa_b",s),n=this.makeF32Buffer("b2twa_c",r),B=this.makeU32Buffer("b2twa_w",d),l=this.makeOutputBuffer("b2twa_hout",32),p=this.makeOutputBuffer("b2twa_normed",32),y=this._makeNormParams(16,1e-6);this.dispatch("three_way_add_rmsnorm_b2",[h,F,n,B,l,p,y],2);const k=await this.readback(p,32),u=await this.readback(l,32),m=this.compare(k,w,.001),g=this.compare(u,f,1e-6);return{pass:m.pass&&g.pass,maxErr:Math.max(m.maxErr,g.maxErr),errors:[...m.errors,...g.errors]}}_makeGPTQData(e,t,o){const s=e/8,r=new Uint32Array(s*t),a=new Float32Array(e*t);for(let h=0;h<s;h++)for(let F=0;F<t;F++){let n=0;for(let B=0;B<8;B++){const l=h*8+B,p=(l*3+F*7)%15;a[l*t+F]=p-8,n|=p<<B*4}r[h*t+F]=n}const d=e/o,f=new Float32Array(d*t);for(let h=0;h<d*t;h++)f[h]=.3+h*.15;const w=this.cpuF32ToF16Packed(f);return{qweight:r,rawWeights:a,scalesPacked:w}}_cpuGPTQMatvec(e,t,o,s,r,a,d=0){const f=new Float32Array(r);for(let w=0;w<r;w++){let h=0;for(let F=0;F<s;F++){const B=Math.floor(F/a)*r+w,l=o[Math.floor(B/2)]>>B%2*16&65535,p=this.f16ToF32(l);h+=e[d+F]*t[F*r+w]*p}f[w]=h}return f}async testFusedSiluGPTQB2(){if(!this.gpu.hasF16)return{pass:!0,maxErr:0,note:"skipped (no f16)"};const e=128,t=8,o=64,{qweight:s,rawWeights:r,scalesPacked:a}=this._makeGPTQData(e,t,o),d=new Float32Array(2*e),f=new Float32Array(2*e);for(let k=0;k<2*e;k++)d[k]=Math.sin(k*.3)*2,f[k]=Math.cos(k*.5)*1.5;const w=new Float32Array(2*t);for(let k=0;k<2;k++){const u=new Float32Array(e);for(let g=0;g<e;g++){const i=d[k*e+g];u[g]=i/(1+Math.exp(-i))*f[k*e+g]}const m=this._cpuGPTQMatvec(u,r,a,e,t,o);w.set(m,k*t)}const h=this.makeF32Buffer("b2sl_a",d),F=this.makeF32Buffer("b2sl_b",f),n=this.makeU32Buffer("b2sl_qw",s),B=this.makeU32Buffer("b2sl_sc",a),l=this.makeOutputBuffer("b2sl_out",2*t),p=this.makeUniform([e,t,o]);this.dispatch("fused_silu_gptq_b2_f16",[h,F,n,B,l,p],Math.ceil(t/32));const y=await this.readback(l,2*t);return this.compare(y,w,.05)}async testFusedSigmoidGPTQB2(){if(!this.gpu.hasF16)return{pass:!0,maxErr:0,note:"skipped (no f16)"};const e=128,t=8,o=64,{qweight:s,rawWeights:r,scalesPacked:a}=this._makeGPTQData(e,t,o),d=new Float32Array(2*e),f=new Float32Array(2*e);for(let k=0;k<2*e;k++)d[k]=Math.sin(k*.4)*1.5,f[k]=Math.cos(k*.7)*2;const w=new Float32Array(2*t);for(let k=0;k<2;k++){const u=new Float32Array(e);for(let g=0;g<e;g++){const i=1/(1+Math.exp(-f[k*e+g]));u[g]=i*d[k*e+g]}const m=this._cpuGPTQMatvec(u,r,a,e,t,o);w.set(m,k*t)}const h=this.makeF32Buffer("b2sg_x",d),F=this.makeF32Buffer("b2sg_g",f),n=this.makeU32Buffer("b2sg_qw",s),B=this.makeU32Buffer("b2sg_sc",a),l=this.makeOutputBuffer("b2sg_out",2*t),p=this.makeUniform([e,t,o]);this.dispatch("fused_sigmoid_gptq_b2_f16",[h,F,n,B,l,p],Math.ceil(t/32));const y=await this.readback(l,2*t);return this.compare(y,w,.05)}async testFusedGateUpSiLUB2(){if(!this.gpu.hasF16)return{pass:!0,maxErr:0,note:"skipped (no f16)"};const e=128,t=8,o=64,s=e/8,r=new Uint32Array(s*t),a=new Float32Array(e*t);for(let N=0;N<s;N++)for(let x=0;x<t;x++){let q=0;for(let K=0;K<8;K++){const v=N*8+K,T=(v*3+x*7)%15;a[v*t+x]=T-8,q|=T<<K*4}r[N*t+x]=q}const d=e/o,f=new Float32Array(d*t);for(let N=0;N<d*t;N++)f[N]=.01+N%8*.005;const w=this.cpuF32ToF16Packed(f),h=new Uint32Array(s*t),F=new Float32Array(e*t);for(let N=0;N<s;N++)for(let x=0;x<t;x++){let q=0;for(let K=0;K<8;K++){const v=N*8+K,T=(v*5+x*3)%15;F[v*t+x]=T-8,q|=T<<K*4}h[N*t+x]=q}const n=e/o,B=new Float32Array(n*t);for(let N=0;N<n*t;N++)B[N]=.01+N%8*.005;const l=this.cpuF32ToF16Packed(B),p=new Float32Array(2*e);for(let N=0;N<2*e;N++)p[N]=Math.sin(N*.2)*.5;const y=new Float32Array(2*t);for(let N=0;N<2;N++){const x=this._cpuGPTQMatvec(p,a,w,e,t,o,N*e),q=this._cpuGPTQMatvec(p,F,l,e,t,o,N*e);for(let K=0;K<t;K++){const v=x[K];y[N*t+K]=v/(1+Math.exp(-v))*q[K]}}const k=this.makeF32Buffer("b2gus_in",p),u=this.makeU32Buffer("b2gus_gqw",r),m=this.makeU32Buffer("b2gus_gsc",w),g=this.makeU32Buffer("b2gus_uqw",h),i=this.makeU32Buffer("b2gus_usc",l),c=this.makeOutputBuffer("b2gus_out",2*t),_=this.makeUniform([e,t,o]);this.dispatch("fused_gate_up_silu_b2_f16",[k,u,m,g,i,c,_],Math.ceil(t/32));const b=await this.readback(c,2*t);return this.compare(b,y,.05)}_makeGPTQMatmulB2Data(e,t,o){const s=e/8,r=new Float32Array(2*e);for(let n=0;n<e;n++)r[n]=Math.sin(n*.5)*.5;for(let n=0;n<e;n++)r[e+n]=Math.cos(n*.3)*.5;const a=new Uint32Array(s*t),d=new Float32Array(e*t);for(let n=0;n<s;n++)for(let B=0;B<t;B++){let l=0;for(let p=0;p<8;p++){const y=n*8+p,k=(y*3+B*7)%15;d[y*t+B]=k-8,l|=k<<p*4}a[n*t+B]=l}const f=e/o,w=new Float32Array(f*t);for(let n=0;n<f*t;n++)w[n]=.01+n%8*.005;const h=this.cpuF32ToF16Packed(w),F=new Float32Array(2*t);for(let n=0;n<2;n++)for(let B=0;B<t;B++){let l=0;for(let p=0;p<e;p++){const k=Math.floor(p/o)*t+B,u=h[Math.floor(k/2)]>>k%2*16&65535,m=this.f16ToF32(u);l+=r[n*e+p]*d[p*t+B]*m}F[n*t+B]=l}return{input:r,qweight:a,scalesPacked:h,expected:F,K:e,N:t,gs:o}}async testGPTQMatmulB2(){const{input:e,qweight:t,scalesPacked:o,expected:s,K:r,N:a,gs:d}=this._makeGPTQMatmulB2Data(128,8,64),f=this.makeF32Buffer("b2_input",e),w=this.makeU32Buffer("b2_qw",t),h=this.makeU32Buffer("b2_sc",o),F=this.makeOutputBuffer("b2_out",2*a),n=this.makeUniform([r,a,d]);this.dispatch("gptq_matmul_b2",[f,w,h,F,n],Math.ceil(a/32));const B=await this.readback(F,2*a);return this.compare(B,s,.02)}async testGPTQMatmulB2F16(){if(!this.gpu.hasF16)return{pass:!0,maxErr:0,note:"skipped (no f16)"};const{input:e,qweight:t,scalesPacked:o,expected:s,K:r,N:a,gs:d}=this._makeGPTQMatmulB2Data(128,8,64),f=this.makeF32Buffer("b2f16_input",e),w=this.makeU32Buffer("b2f16_qw",t),h=this.makeU32Buffer("b2f16_sc",o),F=this.makeOutputBuffer("b2f16_out",2*a),n=this.makeUniform([r,a,d]);this.dispatch("gptq_matmul_b2_f16",[f,w,h,F,n],Math.ceil(a/32));const B=await this.readback(F,2*a);return this.compare(B,s,.05)}async testGPTQMatmulB2_4T_F16(){if(!this.gpu.hasF16)return{pass:!0,maxErr:0,note:"skipped (no f16)"};const{input:e,qweight:t,scalesPacked:o,expected:s,K:r,N:a,gs:d}=this._makeGPTQMatmulB2Data(256,16,64),f=this.makeF32Buffer("b2_4t_input",e),w=this.makeU32Buffer("b2_4t_qw",t),h=this.makeU32Buffer("b2_4t_sc",o),F=this.makeOutputBuffer("b2_4t_out",2*a),n=this.makeUniform([r,a,d]);this.dispatch("gptq_matmul_b2_4t_f16",[f,w,h,F,n],Math.ceil(a/8));const B=await this.readback(F,2*a);return this.compare(B,s,.05)}f32ToBF16(e){const t=new ArrayBuffer(4);return new Uint32Array(t)[0]=new Uint32Array(new Float32Array([e]).buffer)[0],new Uint32Array(t)[0]>>>16}unpackBF16(e,t){const o=e>>>t*16&65535;return this.bf16ToF32(o)}}function $(I){const e=document.getElementById("log");e.textContent+=I+`
3
  `,e.scrollTop=e.scrollHeight}document.querySelectorAll(".tab").forEach(I=>{I.addEventListener("click",()=>{document.querySelectorAll(".tab").forEach(e=>e.classList.remove("active")),document.querySelectorAll(".panel").forEach(e=>e.classList.remove("active")),I.classList.add("active"),document.getElementById(I.dataset.panel).classList.add("active")})});document.getElementById("runTests").addEventListener("click",async()=>{const I=document.getElementById("runTests"),e=document.getElementById("testResults"),t=document.getElementById("testSummary");I.disabled=!0,e.innerHTML="",t.innerHTML="",$("Initializing WebGPU for tests...");try{const o=new ot;await o.init(),$(`WebGPU initialized. Running tests...
4
  `);let s=0,r=0;await o.runAll(a=>{const d=a.pass?"✓":"✗",f=a.pass?"pass":"fail";a.pass?s++:r++;const w=document.createElement("div");if(w.className="test-row",w.innerHTML=`
5
  <span class="test-icon ${f}">${d}</span>
 
14
  </div>
15
  `,$(`
16
  Done: ${s} passed, ${r} failed`),o.destroy()}catch(o){$(`Error: ${o.message}
17
+ ${o.stack}`)}finally{I.disabled=!1}});document.getElementById("loadAndProfile").addEventListener("click",async()=>{const I=document.getElementById("loadAndProfile"),e=document.getElementById("profStatus"),t=document.getElementById("profCategories"),o=document.getElementById("profTopOps");I.disabled=!0,t.innerHTML="",o.innerHTML="";try{const s=document.getElementById("profRepo").value.trim();e.textContent="Loading model...",$("Profiler: loading model from "+s);const{GPUContext:r}=await et(async()=>{const{GPUContext:c}=await import("./gpu-ops-flxI8RuZ.js").then(_=>_.g);return{GPUContext:c}},[]),{Qwen35Model:a}=await et(async()=>{const{Qwen35Model:c}=await import("./qwen35-model-BJNcT5Rw.js");return{Qwen35Model:c}},__vite__mapDeps([0,1])),{loadModelWeights:d,loadConfig:f,loadQuantConfig:w}=await et(async()=>{const{loadModelWeights:c,loadConfig:_,loadQuantConfig:b}=await import("./safetensors-loader-CwGm5mJX.js");return{loadModelWeights:c,loadConfig:_,loadQuantConfig:b}},[]),{Profiler:h}=await et(async()=>{const{Profiler:c}=await import("./profiler-DYUyiq-B.js");return{Profiler:c}},[]),F=new r;await F.init(),e.textContent="Fetching config...";const n=await f(s),B=await w(s);e.textContent="Downloading weights (this may take a while)...";const l=await d(s,c=>{if(c.phase==="downloading"){const _=c.total?(c.loaded/c.total*100).toFixed(0):"?";e.textContent=`Downloading: ${c.filesLoaded}/${c.filesTotal} shards (${_}%)`}else c.phase==="parsing"&&(e.textContent=`Parsing ${c.file}...`)});e.textContent="Uploading to GPU...";const p=new a(F,n,B);p.compilePipelines(),p.loadWeights(l),p.initBuffers(),e.textContent="Warming up...",await p.forward(1,0);const y=p.textCfg.linear_num_key_heads,k=p.textCfg.linear_key_head_dim,u=p.textCfg.linear_value_head_dim,m=y*(k+k+u);for(let c=0;c<p.numLayers;c++)p.layerTypes[c]==="linear_attention"&&(F.device.queue.writeBuffer(p.linState[c],0,new ArrayBuffer(y*k*u*4)),F.device.queue.writeBuffer(p.linConvHist[c],0,new ArrayBuffer(3*m*4)));p.seqLen=0,e.textContent="Profiling forward pass...",$("Profiling single forward pass...");const i=await new h(p).profileForward(1,0);e.textContent=`Done! Estimated: ${i.estimatedTokPerSec} tok/s (sync overhead inflates times)`,$(`
18
  Profile complete: ${i.total.toFixed(1)}ms total (~${i.estimatedTokPerSec} tok/s with sync overhead)`),$(`
19
  Category breakdown:`);for(const c of i.categories){const _=document.createElement("div");_.className="prof-row";const b=c.name.includes("matvec")||c.name==="lm_head"?"var(--orange)":c.name.includes("attention")||c.name.includes("deltanet")?"var(--blue)":"var(--green)";_.innerHTML=`
20
  <span class="prof-name">${c.name}</span>
index.html CHANGED
@@ -138,9 +138,9 @@
138
  .toast-error { border-color: #ef4444; color: #ef4444; }
139
  .toast-success { border-color: var(--accent); color: var(--accent); }
140
  </style>
141
- <script type="module" crossorigin src="/assets/main-p04e1WzX.js"></script>
142
- <link rel="modulepreload" crossorigin href="/assets/gpu-ops-BbLjsC0p.js">
143
- <link rel="modulepreload" crossorigin href="/assets/qwen35-model-7KVn_FLm.js">
144
  <link rel="modulepreload" crossorigin href="/assets/safetensors-loader-CwGm5mJX.js">
145
  </head>
146
  <body>
 
138
  .toast-error { border-color: #ef4444; color: #ef4444; }
139
  .toast-success { border-color: var(--accent); color: var(--accent); }
140
  </style>
141
+ <script type="module" crossorigin src="/assets/main-Cji2l4fL.js"></script>
142
+ <link rel="modulepreload" crossorigin href="/assets/gpu-ops-flxI8RuZ.js">
143
+ <link rel="modulepreload" crossorigin href="/assets/qwen35-model-BJNcT5Rw.js">
144
  <link rel="modulepreload" crossorigin href="/assets/safetensors-loader-CwGm5mJX.js">
145
  </head>
146
  <body>
test.html CHANGED
@@ -41,8 +41,8 @@
41
  button.run-btn:hover:not(:disabled) { filter: brightness(1.15); }
42
  #log { margin-top: 12px; padding: 12px; background: var(--bg2); border: 1px solid var(--border); border-radius: 8px; max-height: 300px; overflow-y: auto; white-space: pre-wrap; font-size: 0.7rem; color: var(--dim); }
43
  </style>
44
- <script type="module" crossorigin src="/assets/test-vukVrAzP.js"></script>
45
- <link rel="modulepreload" crossorigin href="/assets/gpu-ops-BbLjsC0p.js">
46
  </head>
47
  <body>
48
  <h1>TensorBend Shader Tests & Profiler</h1>
 
41
  button.run-btn:hover:not(:disabled) { filter: brightness(1.15); }
42
  #log { margin-top: 12px; padding: 12px; background: var(--bg2); border: 1px solid var(--border); border-radius: 8px; max-height: 300px; overflow-y: auto; white-space: pre-wrap; font-size: 0.7rem; color: var(--dim); }
43
  </style>
44
+ <script type="module" crossorigin src="/assets/test-DQDfUwQY.js"></script>
45
+ <link rel="modulepreload" crossorigin href="/assets/gpu-ops-flxI8RuZ.js">
46
  </head>
47
  <body>
48
  <h1>TensorBend Shader Tests & Profiler</h1>