diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..91a10c2cf39fe5863636d2d499e2a458f3a6428a 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,607 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.32.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.39.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.8.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.17.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.30.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.37.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.37.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.40.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.6.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.18.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.25.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.44.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.8.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.23.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.42.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.44.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.47.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.17.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.24.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.25.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.35.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.3.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.5.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.16.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.40.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.45.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.48.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.33.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.41.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.42.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.45.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.53.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.27.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.16.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.21.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.23.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.29.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.44.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.51.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.6.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.14.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.19.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.20.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.23.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.28.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.2.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.54.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.56.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.15.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.25.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.2.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.30.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.46.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.47.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.54.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.10.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.34.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.3.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.0.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.3.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.48.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.48.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.48.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.24.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.27.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.36.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.49.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.54.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.13.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.18.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.41.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.18.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.58.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.13.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.42.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.46.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.50.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.20.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.14.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.40.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.42.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.50.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.51.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.5.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.11.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.8.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.52.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.17.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.23.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.26.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.55.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.8.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.11.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.20.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.43.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.7.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.7.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.13.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.21.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.32.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.39.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.44.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.52.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.57.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.13.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.28.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.2.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.7.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.22.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.35.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.3.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.45.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.4.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/tok_embeddings.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.32.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.59.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.7.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.55.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.23.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.56.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.1.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.39.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.48.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.52.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.19.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.46.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.26.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.29.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.3.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.53.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.57.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.20.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.3.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.44.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.36.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.21.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.24.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.31.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.35.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.51.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.53.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.59.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.14.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.9.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.21.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.39.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.43.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.48.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.4.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.57.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.19.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.27.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.30.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.34.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.58.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.24.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.46.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.47.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.53.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.7.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.22.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.16.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.23.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.41.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.6.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.8.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.16.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.28.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.16.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.43.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.1.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.26.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.31.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.55.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.6.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.18.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.47.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.4.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.50.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.15.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.2.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.40.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.45.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.7.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.18.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.31.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.40.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.19.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.27.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.47.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.51.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.59.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.21.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.41.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.34.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.44.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.13.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.38.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.40.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.50.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.56.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.37.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.47.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.49.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.51.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.53.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.55.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.38.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.17.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.53.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.11.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.26.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.33.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.47.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.47.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.26.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.14.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.27.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.36.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.12.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.20.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.21.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.41.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.41.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.43.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.45.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.58.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.14.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.20.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.38.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.41.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.49.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.7.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.9.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.0.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.42.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.0.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.18.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.29.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.34.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.39.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/config.ini filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.46.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.9.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.3.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.18.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.22.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.22.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.34.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.49.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.53.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.5.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.0.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.9.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.22.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.4.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.10.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.35.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.39.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.46.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.57.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.6.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.22.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.10.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.36.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.37.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.38.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.41.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.10.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.13.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.25.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.33.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.36.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.51.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.56.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.0.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.15.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.29.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.48.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.6.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.8.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.11.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.12.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.0.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.38.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.37.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.49.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.55.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.5.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.40.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.43.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.4.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.58.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.8.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.15.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.56.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.42.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.14.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.27.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.28.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.28.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.55.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.7.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.12.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.21.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.31.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.10.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.10.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.11.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.27.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.29.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.0.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.15.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.17.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.32.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.33.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.0.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.1.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.30.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.35.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.54.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.57.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.9.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.15.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.33.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.58.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.32.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.54.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.35.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.24.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.28.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.2.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.56.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.6.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.19.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.23.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.31.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.59.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.9.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.13.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.2.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.30.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.32.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.42.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.58.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.21.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.36.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.44.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.2.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.33.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.3.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.49.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.57.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.9.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.11.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.33.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.36.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.43.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.43.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.45.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.50.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.53.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.14.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.8.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.34.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.42.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.49.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.51.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.10.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.25.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.38.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.39.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.3.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.6.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.9.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.24.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.19.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.1.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.24.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.29.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.32.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.57.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.15.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.29.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.51.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.7.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.26.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.52.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.44.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.48.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.55.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.59.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.22.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.2.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.35.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.36.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.55.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.26.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.30.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.31.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.42.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.46.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.50.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.28.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.37.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.51.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.16.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.14.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.19.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.37.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.4.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.58.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.12.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.34.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.44.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.55.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.57.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.9.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.25.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.1.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.23.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.28.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.30.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.35.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.49.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.59.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.12.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.17.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.41.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.5.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.13.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.15.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.15.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.26.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.40.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.43.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.4.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.54.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.12.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.5.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.56.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.29.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.36.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.8.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.26.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.16.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.31.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.33.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.12.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.17.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.55.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.13.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.25.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.41.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.17.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.34.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.38.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.50.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.5.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.0.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.31.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.38.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.40.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.50.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.56.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.23.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.52.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.40.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.7.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.27.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.21.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.36.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.4.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.12.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.38.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.52.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.21.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.16.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.19.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.31.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.4.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.50.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.10.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.1.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.23.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.52.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.53.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.54.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.0.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.46.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.51.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.2.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.20.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.43.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.45.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.16.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.12.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.39.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.54.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.54.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.11.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.19.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.59.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.18.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.32.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.50.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.18.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.17.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.19.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.29.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.47.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.58.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.13.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.49.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.52.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.56.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.58.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.6.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.8.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.9.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.48.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.33.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.14.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.22.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.30.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.44.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.4.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.52.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.59.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.12.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.24.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.52.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.57.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.58.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.10.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.1.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.1.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.2.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.39.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.11.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.20.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.34.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.42.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.5.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.1.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.20.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.25.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.30.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.35.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.37.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.37.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/output.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.11.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.25.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.27.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.29.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.45.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.45.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.47.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.54.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.22.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.5.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.20.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.24.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.39.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.59.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.18.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.28.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.3.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.46.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.56.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.25.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.28.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.30.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.38.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.10.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.1.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.27.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.37.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.11.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.43.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.49.feed_forward.w2.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.6.feed_forward.w13.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.14.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.24.ffn_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.34.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.45.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.22.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.26.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.33.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.53.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.16.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.31.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.32.feed_forward.w13.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.46.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.48.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.15.attention.wo.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.35.attention_norm.weight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.57.attention.w_qkv.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.59.feed_forward.w2.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.5.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.17.attention.w_qkv.0.qweight filter=lfs diff=lfs merge=lfs -text
+triton_models/weights/layers.32.attention.wo.0.scales_zeros filter=lfs diff=lfs merge=lfs -text
diff --git a/model_repository/postprocessing b/model_repository/postprocessing
new file mode 120000
index 0000000000000000000000000000000000000000..6aece3aeef8f34d1e2d82489220a7eeb7d7c58d5
--- /dev/null
+++ b/model_repository/postprocessing
@@ -0,0 +1 @@
+../triton_models/postprocessing
\ No newline at end of file
diff --git a/model_repository/preprocessing b/model_repository/preprocessing
new file mode 120000
index 0000000000000000000000000000000000000000..11ce1acf1c1baf40cb9cc89981d475a746754df8
--- /dev/null
+++ b/model_repository/preprocessing
@@ -0,0 +1 @@
+../triton_models/preprocessing
\ No newline at end of file
diff --git a/model_repository/turbomind b/model_repository/turbomind
new file mode 120000
index 0000000000000000000000000000000000000000..9e80edcdf7f8d985a32da609fc519b408764ae1e
--- /dev/null
+++ b/model_repository/turbomind
@@ -0,0 +1 @@
+../triton_models/interactive
\ No newline at end of file
diff --git a/service_docker_up.sh b/service_docker_up.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d45345e6165857b5bf210cb4ad261029143508a9
--- /dev/null
+++ b/service_docker_up.sh
@@ -0,0 +1,87 @@
+#!/bin/sh
+
+show_help() {
+ echo "Usage: $0 [-h] [--help] [-l] [--lib-dir]"
+ echo
+ echo "Options:"
+ echo " -h, --help Show this help message and exit"
+ echo " --lib-dir Specify the directory of turbomind libraries"
+}
+
+# check if '-h' or '--help' in the arguments
+for arg in "$@"
+do
+ if [ "$arg" == "-h" ] || [ "$arg" == "--help" ]; then
+ show_help
+ exit 0
+ fi
+done
+
+
+TP=1
+DEVICES="0"
+for ((i = 1; i < ${TP}; ++i)); do
+ DEVICES="${DEVICES},$i"
+done
+DEVICES="\"device=${DEVICES}\""
+
+
+SCRIPT_DIR="$(dirname "$0")"
+SCRIPT_ABS_DIR="$(realpath "$SCRIPT_DIR")"
+
+
+if [ -z "$1" ]; then
+ docker run \
+ --gpus $DEVICES \
+ --rm \
+ -v "${SCRIPT_ABS_DIR}":/workspace/models \
+ --shm-size 16g \
+ -p 33336:22 \
+ -p 33337-33400:33337-33400 \
+ --cap-add=SYS_PTRACE \
+ --cap-add=SYS_ADMIN \
+ --security-opt seccomp=unconfined \
+ --name lmdeploy \
+ -it --env NCCL_LAUNCH_MODE=GROUP openmmlab/lmdeploy:latest \
+ tritonserver \
+ --model-repository=/workspace/models/model_repository \
+ --allow-http=0 \
+ --allow-grpc=1 \
+ --grpc-port=33337 \
+ --log-verbose=0 \
+ --allow-metrics=1
+fi
+
+for ((i = 1; i <= $#; i++)); do
+ arg=${!i}
+ case "$arg" in
+ --lib-dir)
+ if [ "$i" -eq "$#" ]; then
+ show_help
+ exit -1
+ fi
+ LIB_PATH=${@:i+1:1}
+ docker run \
+ --gpus $DEVICES \
+ --rm \
+ -v "${LIB_PATH}":/opt/tritonserver/backends/turbomind \
+ -v ""${SCRIPT_ABS_DIR}"":/workspace/models \
+ --shm-size 16g \
+ -p 33336:22 \
+ -p 33337-33400:33337-33400 \
+ --cap-add=SYS_PTRACE \
+ --cap-add=SYS_ADMIN \
+ --security-opt seccomp=unconfined \
+ --name lmdeploy \
+ -it --env NCCL_LAUNCH_MODE=GROUP openmmlab/lmdeploy:latest \
+ tritonserver \
+ --model-repository=/workspace/models/model_repository \
+ --allow-http=0 \
+ --allow-grpc=1 \
+ --grpc-port=33337 \
+ --log-verbose=0 \
+ --allow-metrics=1
+ break
+ ;;
+ esac
+done
diff --git a/triton_models/interactive/1/placeholder b/triton_models/interactive/1/placeholder
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/triton_models/interactive/1/weights b/triton_models/interactive/1/weights
new file mode 120000
index 0000000000000000000000000000000000000000..05546b6f3227d07ee91d76d14c794fe34bc3aac2
--- /dev/null
+++ b/triton_models/interactive/1/weights
@@ -0,0 +1 @@
+../../weights
\ No newline at end of file
diff --git a/triton_models/interactive/config.pbtxt b/triton_models/interactive/config.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..ae0423c7d4905b4fc058e72ba23aaf81391316d5
--- /dev/null
+++ b/triton_models/interactive/config.pbtxt
@@ -0,0 +1,281 @@
+# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of NVIDIA CORPORATION nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "turbomind"
+backend: "turbomind"
+default_model_filename: "weights"
+max_batch_size: 1
+
+model_transaction_policy {
+ decoupled: True
+}
+
+instance_group [
+ {
+ # max concurrent instances
+ count: 48
+ kind: KIND_CPU
+ }
+]
+
+input [
+ {
+ name: "input_ids"
+ data_type: TYPE_UINT32
+ dims: [ -1 ]
+ # allow_ragged_batch: true
+ },
+ {
+ name: "input_lengths"
+ data_type: TYPE_UINT32
+ dims: [ 1 ]
+ reshape: { shape: [ ] }
+ },
+ {
+ name: "request_output_len"
+ data_type: TYPE_UINT32
+ dims: [ -1 ]
+ },
+ {
+ name: "step"
+ data_type: TYPE_INT32
+ dims: [ 1 ]
+ reshape: { shape: [ ] }
+ optional: true
+ },
+ {
+ name: "session_len"
+ data_type: TYPE_UINT32
+ dims: [ 1 ]
+ reshape: { shape: [ ] }
+ optional: true
+ },
+ {
+ name: "runtime_top_k"
+ data_type: TYPE_UINT32
+ dims: [ 1 ]
+ reshape: { shape: [ ] }
+ optional: true
+ },
+ {
+ name: "runtime_top_p"
+ data_type: TYPE_FP32
+ dims: [ 1 ]
+ reshape: { shape: [ ] }
+ optional: true
+ },
+ {
+ name: "beam_search_diversity_rate"
+ data_type: TYPE_FP32
+ dims: [ 1 ]
+ reshape: { shape: [ ] }
+ optional: true
+ },
+ {
+ name: "temperature"
+ data_type: TYPE_FP32
+ dims: [ 1 ]
+ reshape: { shape: [ ] }
+ optional: true
+ },
+ {
+ name: "len_penalty"
+ data_type: TYPE_FP32
+ dims: [ 1 ]
+ reshape: { shape: [ ] }
+ optional: true
+ },
+ {
+ name: "repetition_penalty"
+ data_type: TYPE_FP32
+ dims: [ 1 ]
+ reshape: { shape: [ ] }
+ optional: true
+ },
+ {
+ name: "random_seed"
+ data_type: TYPE_UINT64
+ dims: [ 1 ]
+ reshape: { shape: [ ] }
+ optional: true
+ },
+ {
+ name: "is_return_log_probs"
+ data_type: TYPE_BOOL
+ dims: [ 1 ]
+ reshape: { shape: [ ] }
+ optional: true
+ },
+ {
+ name: "beam_width"
+ data_type: TYPE_UINT32
+ dims: [ 1 ]
+ reshape: { shape: [ ] }
+ optional: true
+ },
+ {
+ name: "start_id"
+ data_type: TYPE_UINT32
+ dims: [ 1 ]
+ reshape: { shape: [ ] }
+ optional: true
+ },
+ {
+ name: "end_id"
+ data_type: TYPE_UINT32
+ dims: [ 1 ]
+ reshape: { shape: [ ] }
+ optional: true
+ },
+ {
+ name: "bad_words_list"
+ data_type: TYPE_INT32
+ dims: [ 2, -1 ]
+ optional: true
+ },
+ {
+ name: "stop_words_list"
+ data_type: TYPE_INT32
+ dims: [ 2, -1 ]
+ optional: true
+ },
+ {
+ name: "prompt_learning_task_name_ids"
+ data_type: TYPE_UINT32
+ dims: [ 1 ]
+ reshape: { shape: [ ] }
+ optional: true
+ },
+ {
+ name: "top_p_decay"
+ data_type: TYPE_FP32
+ dims: [ 1 ]
+ reshape: { shape: [ ] }
+ optional: true
+ },
+ {
+ name: "top_p_min"
+ data_type: TYPE_FP32
+ dims: [ 1 ]
+ reshape: { shape: [ ] }
+ optional: true
+ },
+ {
+ name: "top_p_reset_ids"
+ data_type: TYPE_UINT32
+ dims: [ 1 ]
+ reshape: { shape: [ ] }
+ optional: true
+ },
+ {
+ name: "START"
+ data_type: TYPE_INT32
+ dims: [ 1 ]
+ reshape: { shape: [ ] }
+ optional: true
+ },
+ {
+ name: "END"
+ data_type: TYPE_INT32
+ dims: [ 1 ]
+ reshape: { shape: [ ] }
+ optional: true
+ },
+ {
+ name: "STOP"
+ data_type: TYPE_INT32
+ dims: [ 1 ]
+ reshape: { shape: [ ] }
+ optional: true
+ },
+ {
+ name: "CORRID"
+ data_type: TYPE_UINT64
+ dims: [ 1 ]
+ reshape: { shape: [ ] }
+ optional: true
+ }
+]
+output [
+ {
+ name: "output_ids"
+ data_type: TYPE_UINT32
+ dims: [ -1, -1 ]
+ },
+ {
+ name: "sequence_length"
+ data_type: TYPE_UINT32
+ dims: [ -1 ]
+ },
+ {
+ name: "cum_log_probs"
+ data_type: TYPE_FP32
+ dims: [ -1 ]
+ },
+ {
+ name: "output_log_probs"
+ data_type: TYPE_FP32
+ dims: [ -1, -1 ]
+ }
+]
+
+parameters {
+ key: "pipeline_para_size"
+ value: {
+ string_value: "1"
+ }
+}
+parameters {
+ key: "data_type"
+ value: {
+ string_value: "fp16"
+ }
+}
+parameters {
+ key: "model_type"
+ value: {
+ string_value: "Llama"
+ }
+}
+
+parameters {
+ key: "enable_custom_all_reduce"
+ value: {
+ string_value: "0"
+ }
+}
+parameters {
+ key: "tensor_para_size"
+ value: {
+ string_value: "1"
+ }
+}
+parameters {
+ key: "model_name"
+ value: {
+ string_value: "internlm-chat-7b"
+ }
+}
diff --git a/triton_models/postprocessing/1/model.py b/triton_models/postprocessing/1/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..20de97595195da5dedc044a31c6086c1f49892da
--- /dev/null
+++ b/triton_models/postprocessing/1/model.py
@@ -0,0 +1,129 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import json
+import os.path as osp
+from pathlib import Path
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+# This tokenizer is `lmdeploy/turbomind/tokenizer.py`. When an LLM is served
+# by triton inference server, it has to be converted first by running
+# `python lmdeploy/serve/turbomind/deploy.py`. Then
+# `lmdeploy/turbomind/tokenizer.py` will be copied to `tokenizer/tokenizer.py`
+from .tokenizer.tokenizer import Tokenizer
+
+
+class TritonPythonModel:
+ """Your Python model must use the same class name.
+
+ Every Python model that is created must have "TritonPythonModel" as the
+ class name.
+ """
+
+ def initialize(self, args):
+ """`initialize` is called only once when the model is being loaded.
+ Implementing `initialize` function is optional. This function allows
+ the model to initialize any state associated with this model.
+ Parameters
+ ----------
+ args : dict
+ Both keys and values are strings. The dictionary keys and values are:
+ * model_config: A JSON string containing the model configuration
+ * model_instance_kind: A string containing model instance kind
+ * model_instance_device_id: A string containing model instance device
+ ID
+ * model_repository: Model repository path
+ * model_version: Model version
+ * model_name: Model name
+ """
+ # Parse model configs
+ self.model_config = model_config = json.loads(args['model_config'])
+
+ # Parse model output configs
+ output_config = pb_utils.get_output_config_by_name(
+ model_config, 'OUTPUT')
+
+ # Convert Triton types to numpy types
+ self.output_dtype = pb_utils.triton_string_to_numpy(
+ output_config['data_type'])
+
+ cur_folder = Path(__file__).parent
+
+ self.tokenizer = Tokenizer(
+ osp.join(
+ cur_folder, self.model_config['parameters']['tokenizer_path']
+ ['string_value']))
+
+ def execute(self, requests):
+ """`execute` must be implemented in every Python model. `execute`
+ function receives a list of pb_utils.InferenceRequest as the only
+ argument. This function is called when an inference is requested
+ for this model. Depending on the batching configuration (e.g. Dynamic
+ Batching) used, `requests` may contain multiple requests. Every
+ Python model, must create one pb_utils.InferenceResponse for every
+ pb_utils.InferenceRequest in `requests`. If there is an error, you can
+ set the error argument when creating a pb_utils.InferenceResponse.
+ Parameters
+ ----------
+ requests : list
+ A list of pb_utils.InferenceRequest
+ Returns
+ -------
+ list
+ A list of pb_utils.InferenceResponse. The length of this list must
+ be the same as `requests`
+ """
+
+ responses = []
+
+ # Every Python backend must iterate over everyone of the requests
+ # and create a pb_utils.InferenceResponse for each of them.
+ for idx, request in enumerate(requests):
+ # Get input tensors
+ tokens_batch = pb_utils.get_input_tensor_by_name(
+ request, 'TOKENS_BATCH').as_numpy()
+ sequence_length = pb_utils.get_input_tensor_by_name(
+ request, 'sequence_length').as_numpy()
+
+ # Postprocessing output data.
+ outputs = self._postprocessing(tokens_batch.tolist(),
+ sequence_length)
+
+ # Create output tensors. You need pb_utils.Tensor
+ # objects to create pb_utils.InferenceResponse.
+ output_tensor = pb_utils.Tensor(
+ 'OUTPUT',
+ np.array(outputs).astype(self.output_dtype))
+
+ # Create InferenceResponse. You can set an error here in case
+ # there was a problem with handling this inference request.
+ # Below is an example of how you can set errors in inference
+ # response:
+ #
+ # pb_utils.InferenceResponse(
+ # output_tensors=..., TritonError("An error occurred"))
+ inference_response = pb_utils.InferenceResponse(
+ output_tensors=[output_tensor])
+ responses.append(inference_response)
+
+ # You should return a list of pb_utils.InferenceResponse. Length
+ # of this list must match the length of `requests` list.
+ return responses
+
+ def finalize(self):
+ """`finalize` is called only once when the model is being unloaded.
+
+ Implementing `finalize` function is optional. This function allows the
+ model to perform any necessary clean ups before exit.
+ """
+ print('Cleaning up...')
+
+ def _postprocessing(self, tokens_batch, sequence_length):
+ """decode token ids into texts."""
+ outputs = []
+ for beam_tokens, beam_len in zip(tokens_batch, sequence_length):
+ for tokens, _len in zip(beam_tokens, beam_len):
+ output = self.tokenizer.decode(tokens, _len)
+ output = output.encode('utf8')
+ outputs.append(output)
+ return outputs
diff --git a/triton_models/postprocessing/1/tokenizer b/triton_models/postprocessing/1/tokenizer
new file mode 120000
index 0000000000000000000000000000000000000000..a92be7faecb29307d691683e022915639ca09f22
--- /dev/null
+++ b/triton_models/postprocessing/1/tokenizer
@@ -0,0 +1 @@
+../../tokenizer
\ No newline at end of file
diff --git a/triton_models/postprocessing/config.pbtxt b/triton_models/postprocessing/config.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..a4c3fd1041dcd03dc5c18b3fc28533cb82ac5653
--- /dev/null
+++ b/triton_models/postprocessing/config.pbtxt
@@ -0,0 +1,36 @@
+name: "postprocessing"
+backend: "python"
+max_batch_size: 1
+input [
+ {
+ name: "TOKENS_BATCH"
+ data_type: TYPE_UINT32
+ dims: [ -1, -1 ]
+ },
+ {
+ name: "sequence_length"
+ data_type: TYPE_UINT32
+ dims: [ -1 ]
+ }
+]
+output [
+ {
+ name: "OUTPUT"
+ data_type: TYPE_STRING
+ dims: [ -1, -1 ]
+ }
+]
+
+instance_group [
+ {
+ count: 16
+ kind: KIND_CPU
+ }
+]
+
+parameters {
+ key: "tokenizer_path"
+ value: {
+ string_value: "tokenizer/tokenizer.model"
+ }
+}
diff --git a/triton_models/preprocessing/1/model.py b/triton_models/preprocessing/1/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..77f51bfb3d03e4ccd1eee656eada1744ae19805a
--- /dev/null
+++ b/triton_models/preprocessing/1/model.py
@@ -0,0 +1,151 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import json
+import os.path as osp
+from pathlib import Path
+
+import numpy as np
+import torch
+import triton_python_backend_utils as pb_utils
+from torch.nn.utils.rnn import pad_sequence
+
+# This tokenizer is `lmdeploy/turbomind/tokenizer.py`. When an LLM is served
+# by triton inference server, it has to be converted first by running
+# `python lmdeploy/serve/turbomind/deploy.py`. Then
+# `lmdeploy/turbomind/tokenizer.py` will be copied to `tokenizer/tokenizer.py`
+from .tokenizer.tokenizer import Tokenizer
+
+
+class TritonPythonModel:
+ """Your Python model must use the same class name.
+
+ Every Python model that is created must have "TritonPythonModel" as the
+ class name.
+ """
+
+ def initialize(self, args):
+ """`initialize` is called only once when the model is being loaded.
+ Implementing `initialize` function is optional. This function allows
+ the model to initialize any state associated with this model.
+ Parameters
+ ----------
+ args : dict
+ Both keys and values are strings. The dictionary keys and values are:
+ * model_config: A JSON string containing the model configuration
+ * model_instance_kind: A string containing model instance kind
+ * model_instance_device_id: A string containing model instance device
+ ID
+ * model_repository: Model repository path
+ * model_version: Model version
+ * model_name: Model name
+ """
+ # Parse model configs
+ self.model_config = model_config = json.loads(args['model_config'])
+
+ # Parse model output configs and convert Triton types to numpy types
+ input_names = [
+ 'INPUT_ID', 'REQUEST_INPUT_LEN', 'BAD_WORDS_IDS', 'STOP_WORDS_IDS'
+ ]
+ for input_name in input_names:
+ setattr(
+ self,
+ input_name.lower() + '_dtype',
+ pb_utils.triton_string_to_numpy(
+ pb_utils.get_output_config_by_name(
+ model_config, input_name)['data_type']))
+
+ cur_folder = Path(__file__).parent
+ self.tokenizer = Tokenizer(
+ osp.join(
+ cur_folder, self.model_config['parameters']['tokenizer_path']
+ ['string_value']))
+ self.start_id = self.tokenizer.bos_token_id
+ self.end_id = self.tokenizer.eos_token_id
+
+ def execute(self, requests):
+ """`execute` must be implemented in every Python model. `execute`
+ function receives a list of pb_utils.InferenceRequest as the only
+ argument. This function is called when an inference is requested
+ for this model. Depending on the batching configuration (e.g. Dynamic
+ Batching) used, `requests` may contain multiple requests. Every
+ Python model, must create one pb_utils.InferenceResponse for every
+ pb_utils.InferenceRequest in `requests`. If there is an error, you can
+ set the error argument when creating a pb_utils.InferenceResponse.
+ Parameters
+ ----------
+ requests : list
+ A list of pb_utils.InferenceRequest
+ Returns
+ -------
+ list
+ A list of pb_utils.InferenceResponse. The length of this list must
+ be the same as `requests`
+ """
+
+ responses = []
+
+ # Every Python backend must iterate over everyone of the requests
+ # and create a pb_utils.InferenceResponse for each of them.
+ for idx, request in enumerate(requests):
+ # Get input tensors
+ query = pb_utils.get_input_tensor_by_name(request,
+ 'QUERY').as_numpy()
+ request_output_len = pb_utils.get_input_tensor_by_name(
+ request, 'REQUEST_OUTPUT_LEN').as_numpy()
+
+ # Preprocessing input data.
+ input_id, request_input_len = self._create_request(query)
+
+ # Create output tensors. You need pb_utils.Tensor
+ # objects to create pb_utils.InferenceResponse.
+ input_id_tensor = pb_utils.Tensor(
+ 'INPUT_ID',
+ np.array(input_id).astype(self.input_id_dtype))
+ request_input_len_tensor = pb_utils.Tensor(
+ 'REQUEST_INPUT_LEN',
+ np.array(request_input_len).astype(
+ self.request_input_len_dtype))
+ request_output_len_tensor = pb_utils.Tensor(
+ 'REQUEST_OUTPUT_LEN', request_output_len)
+
+ # Create InferenceResponse. You can set an error here in case
+ # there was a problem with handling this inference request.
+ # Below is an example of how you can set errors in inference
+ # response:
+ #
+ # pb_utils.InferenceResponse(
+ # output_tensors=..., TritonError("An error occurred"))
+ inference_response = pb_utils.InferenceResponse(output_tensors=[
+ input_id_tensor, request_input_len_tensor,
+ request_output_len_tensor
+ ])
+ responses.append(inference_response)
+
+ # You should return a list of pb_utils.InferenceResponse. Length
+ # of this list must match the length of `requests` list.
+ return responses
+
+ def finalize(self):
+ """`finalize` is called only once when the model is being unloaded.
+
+ Implementing `finalize` function is optional. This function allows the
+ model to perform any necessary clean ups before exit.
+ """
+ print('Cleaning up...')
+
+ def _create_request(self, query):
+ """Tokenize prompts and return the token ids and their length.
+
+ Args:
+ query (List[str]): a list of prompt
+ Returns:
+ tuple: token ids and their length
+ """
+ start_ids = [
+ torch.IntTensor(self.tokenizer.encode(s[0].decode()))
+ for s in query
+ ]
+ start_lengths = torch.IntTensor([[len(ids)] for ids in start_ids])
+ start_ids = pad_sequence(start_ids,
+ batch_first=True,
+ padding_value=self.end_id)
+ return start_ids, start_lengths
diff --git a/triton_models/preprocessing/1/tokenizer b/triton_models/preprocessing/1/tokenizer
new file mode 120000
index 0000000000000000000000000000000000000000..a92be7faecb29307d691683e022915639ca09f22
--- /dev/null
+++ b/triton_models/preprocessing/1/tokenizer
@@ -0,0 +1 @@
+../../tokenizer
\ No newline at end of file
diff --git a/triton_models/preprocessing/config.pbtxt b/triton_models/preprocessing/config.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..a87abd98df1e193849122f0b1f3979f20eef3bbf
--- /dev/null
+++ b/triton_models/preprocessing/config.pbtxt
@@ -0,0 +1,74 @@
+name: "preprocessing"
+backend: "python"
+max_batch_size: 1
+
+input [
+ {
+ name: "QUERY"
+ data_type: TYPE_STRING
+ dims: [ -1 ]
+ },
+ {
+ name: "BAD_WORDS_DICT"
+ data_type: TYPE_STRING
+ dims: [ -1 ]
+ optional: true
+ },
+ {
+ name: "STOP_WORDS_DICT"
+ data_type: TYPE_STRING
+ dims: [ -1 ]
+ optional: true
+ },
+ {
+ name: "REQUEST_OUTPUT_LEN"
+ data_type: TYPE_UINT32
+ dims: [ -1 ]
+ }
+]
+output [
+ {
+ name: "INPUT_ID"
+ data_type: TYPE_UINT32
+ dims: [ -1 ]
+ },
+ {
+ name: "REQUEST_INPUT_LEN"
+ data_type: TYPE_UINT32
+ dims: [ 1 ]
+ },
+ {
+ name: "BAD_WORDS_IDS"
+ data_type: TYPE_INT32
+ dims: [ 2, -1 ]
+ },
+ {
+ name: "STOP_WORDS_IDS"
+ data_type: TYPE_INT32
+ dims: [ 2, -1 ]
+ },
+ {
+ name: "REQUEST_OUTPUT_LEN"
+ data_type: TYPE_UINT32
+ dims: [ -1 ]
+ },
+ {
+ name: "PROMPT_LEARNING_TASK_NAME_IDS"
+ data_type: TYPE_UINT32
+ dims: [ 1 ]
+ }
+]
+
+instance_group [
+ {
+ count: 4
+ kind: KIND_CPU
+ }
+]
+
+parameters {
+ key: "tokenizer_path"
+ value: {
+ string_value: "tokenizer/tokenizer.model"
+ }
+}
diff --git a/triton_models/tokenizer/config.json b/triton_models/tokenizer/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f835bb376128703da2af4a0c9472705c94366bd2
--- /dev/null
+++ b/triton_models/tokenizer/config.json
@@ -0,0 +1,33 @@
+{
+ "_name_or_path": "/nvme/shared_data/InternLM/20B/internlm-20b-chat",
+ "architectures": [
+ "InternLMForCausalLM"
+ ],
+ "auto_map": {
+ "AutoConfig": "configuration_internlm.InternLMConfig",
+ "AutoModel": "modeling_internlm.InternLMForCausalLM",
+ "AutoModelForCausalLM": "modeling_internlm.InternLMForCausalLM"
+ },
+ "bias": false,
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "hidden_act": "silu",
+ "hidden_size": 5120,
+ "initializer_range": 0.02,
+ "intermediate_size": 13824,
+ "max_position_embeddings": 2048,
+ "model_type": "internlm",
+ "num_attention_heads": 40,
+ "num_hidden_layers": 60,
+ "num_key_value_heads": 40,
+ "pad_token_id": 0,
+ "pretraining_tp": 1,
+ "rms_norm_eps": 1e-06,
+ "rope_scaling": null,
+ "rope_theta": 10000.0,
+ "tie_word_embeddings": false,
+ "torch_dtype": "float16",
+ "transformers_version": "4.33.1",
+ "use_cache": false,
+ "vocab_size": 103168
+}
diff --git a/triton_models/tokenizer/configuration_internlm.py b/triton_models/tokenizer/configuration_internlm.py
new file mode 100644
index 0000000000000000000000000000000000000000..298f91319529e9b3034bcb74bb428d610534a0ba
--- /dev/null
+++ b/triton_models/tokenizer/configuration_internlm.py
@@ -0,0 +1,120 @@
+# coding=utf-8
+# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
+#
+# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
+# and OPT implementations in this library. It has been modified from its
+# original forms to accommodate minor architectural differences compared
+# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" InternLM model configuration"""
+
+from transformers.utils import logging
+from transformers.configuration_utils import PretrainedConfig
+
+
+logger = logging.get_logger(__name__)
+
+INTERNLM_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
+
+
+class InternLMConfig(PretrainedConfig):
+ r"""
+ This is the configuration class to store the configuration of a [`InternLMModel`]. It is used to instantiate an InternLM
+ model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
+ defaults will yield a similar configuration to that of the InternLM-7B.
+
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+ documentation from [`PretrainedConfig`] for more information.
+
+
+ Args:
+ vocab_size (`int`, *optional*, defaults to 32000):
+ Vocabulary size of the InternLM model. Defines the number of different tokens that can be represented by the
+ `inputs_ids` passed when calling [`InternLMModel`]
+ hidden_size (`int`, *optional*, defaults to 4096):
+ Dimension of the hidden representations.
+ intermediate_size (`int`, *optional*, defaults to 11008):
+ Dimension of the MLP representations.
+ num_hidden_layers (`int`, *optional*, defaults to 32):
+ Number of hidden layers in the Transformer encoder.
+ num_attention_heads (`int`, *optional*, defaults to 32):
+ Number of attention heads for each attention layer in the Transformer encoder.
+ hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
+ The non-linear activation function (function or string) in the decoder.
+ max_position_embeddings (`int`, *optional*, defaults to 2048):
+ The maximum sequence length that this model might ever be used with. Typically set this to something large
+ just in case (e.g., 512 or 1024 or 2048).
+ initializer_range (`float`, *optional*, defaults to 0.02):
+ The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+ rms_norm_eps (`float`, *optional*, defaults to 1e-12):
+ The epsilon used by the rms normalization layers.
+ use_cache (`bool`, *optional*, defaults to `True`):
+ Whether or not the model should return the last key/values attentions (not used by all models). Only
+ relevant if `config.is_decoder=True`.
+ tie_word_embeddings(`bool`, *optional*, defaults to `False`):
+ Whether to tie weight embeddings
+ Example:
+
+ ```python
+ >>> from transformers import InternLMModel, InternLMConfig
+
+ >>> # Initializing a InternLM internlm-7b style configuration
+ >>> configuration = InternLMConfig()
+
+ >>> # Initializing a model from the internlm-7b style configuration
+ >>> model = InternLMModel(configuration)
+
+ >>> # Accessing the model configuration
+ >>> configuration = model.config
+ ```"""
+ model_type = "internlm"
+ _auto_class = "AutoConfig"
+
+ def __init__(
+ self,
+ vocab_size=103168,
+ hidden_size=4096,
+ intermediate_size=11008,
+ num_hidden_layers=32,
+ num_attention_heads=32,
+ hidden_act="silu",
+ max_position_embeddings=2048,
+ initializer_range=0.02,
+ rms_norm_eps=1e-6,
+ use_cache=True,
+ pad_token_id=0,
+ bos_token_id=1,
+ eos_token_id=2,
+ tie_word_embeddings=False,
+ bias=True,
+ **kwargs,
+ ):
+ self.vocab_size = vocab_size
+ self.max_position_embeddings = max_position_embeddings
+ self.hidden_size = hidden_size
+ self.intermediate_size = intermediate_size
+ self.num_hidden_layers = num_hidden_layers
+ self.num_attention_heads = num_attention_heads
+ self.hidden_act = hidden_act
+ self.initializer_range = initializer_range
+ self.rms_norm_eps = rms_norm_eps
+ self.use_cache = use_cache
+ self.bias = bias
+ super().__init__(
+ pad_token_id=pad_token_id,
+ bos_token_id=bos_token_id,
+ eos_token_id=eos_token_id,
+ tie_word_embeddings=tie_word_embeddings,
+ **kwargs,
+ )
diff --git a/triton_models/tokenizer/generation_config.json b/triton_models/tokenizer/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a7a7f642937e35b7bb1289294526ff16a4e08ef3
--- /dev/null
+++ b/triton_models/tokenizer/generation_config.json
@@ -0,0 +1,6 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "transformers_version": "4.33.1"
+}
diff --git a/triton_models/tokenizer/modeling_internlm.py b/triton_models/tokenizer/modeling_internlm.py
new file mode 100644
index 0000000000000000000000000000000000000000..66acd06bffe503e12458ed2f7ad365a612de86d9
--- /dev/null
+++ b/triton_models/tokenizer/modeling_internlm.py
@@ -0,0 +1,998 @@
+# coding=utf-8
+# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
+#
+# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
+# and OPT implementations in this library. It has been modified from its
+# original forms to accommodate minor architectural differences compared
+# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" PyTorch InternLM model."""
+import math
+from typing import List, Optional, Tuple, Union
+import threading, queue
+
+import torch
+import torch.utils.checkpoint
+from torch import nn
+from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
+
+from transformers.activations import ACT2FN
+from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast, SequenceClassifierOutputWithPast
+from transformers.modeling_utils import PreTrainedModel
+from transformers.generation.streamers import BaseStreamer
+from transformers.utils import add_start_docstrings, add_start_docstrings_to_model_forward, logging, replace_return_docstrings
+from .configuration_internlm import InternLMConfig
+
+
+logger = logging.get_logger(__name__)
+
+_CONFIG_FOR_DOC = "InternLMConfig"
+
+# Copied from transformers.models.bart.modeling_bart._make_causal_mask
+def _make_causal_mask(
+ input_ids_shape: torch.Size, dtype: torch.dtype, device: torch.device, past_key_values_length: int = 0
+):
+ """
+ Make causal mask used for bi-directional self-attention.
+ """
+ bsz, tgt_len = input_ids_shape
+ mask = torch.full((tgt_len, tgt_len), torch.tensor(torch.finfo(dtype).min, device=device), device=device)
+ mask_cond = torch.arange(mask.size(-1), device=device)
+ mask.masked_fill_(mask_cond < (mask_cond + 1).view(mask.size(-1), 1), 0)
+ mask = mask.to(dtype)
+
+ if past_key_values_length > 0:
+ mask = torch.cat([torch.zeros(tgt_len, past_key_values_length, dtype=dtype, device=device), mask], dim=-1)
+ return mask[None, None, :, :].expand(bsz, 1, tgt_len, tgt_len + past_key_values_length)
+
+
+# Copied from transformers.models.bart.modeling_bart._expand_mask
+def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Optional[int] = None):
+ """
+ Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
+ """
+ bsz, src_len = mask.size()
+ tgt_len = tgt_len if tgt_len is not None else src_len
+
+ expanded_mask = mask[:, None, None, :].expand(bsz, 1, tgt_len, src_len).to(dtype)
+
+ inverted_mask = 1.0 - expanded_mask
+
+ return inverted_mask.masked_fill(inverted_mask.to(torch.bool), torch.finfo(dtype).min)
+
+
+class InternLMRMSNorm(nn.Module):
+ def __init__(self, hidden_size, eps=1e-6):
+ """
+ InternLMRMSNorm is equivalent to T5LayerNorm
+ """
+ super().__init__()
+ self.weight = nn.Parameter(torch.ones(hidden_size))
+ self.variance_epsilon = eps
+
+ def forward(self, hidden_states):
+ variance = hidden_states.to(torch.float32).pow(2).mean(-1, keepdim=True)
+ hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
+
+ # convert into half-precision if necessary
+ if self.weight.dtype in [torch.float16, torch.bfloat16]:
+ hidden_states = hidden_states.to(self.weight.dtype)
+
+ return self.weight * hidden_states
+
+
+class InternLMRotaryEmbedding(torch.nn.Module):
+ def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None):
+ super().__init__()
+ inv_freq = 1.0 / (base ** (torch.arange(0, dim, 2).float().to(device) / dim))
+ self.register_buffer("inv_freq", inv_freq, persistent=False)
+
+ # Build here to make `torch.jit.trace` work.
+ self.max_seq_len_cached = max_position_embeddings
+ t = torch.arange(self.max_seq_len_cached, device=self.inv_freq.device, dtype=self.inv_freq.dtype)
+ freqs = torch.einsum("i,j->ij", t, self.inv_freq)
+ # Different from paper, but it uses a different permutation in order to obtain the same calculation
+ emb = torch.cat((freqs, freqs), dim=-1)
+ self.register_buffer("cos_cached", emb.cos()[None, None, :, :], persistent=False)
+ self.register_buffer("sin_cached", emb.sin()[None, None, :, :], persistent=False)
+
+ def forward(self, x, seq_len=None):
+ # x: [bs, num_attention_heads, seq_len, head_size]
+ # This `if` block is unlikely to be run after we build sin/cos in `__init__`. Keep the logic here just in case.
+ if seq_len > self.max_seq_len_cached:
+ self.max_seq_len_cached = seq_len
+ t = torch.arange(self.max_seq_len_cached, device=x.device, dtype=self.inv_freq.dtype)
+ freqs = torch.einsum("i,j->ij", t, self.inv_freq)
+ # Different from paper, but it uses a different permutation in order to obtain the same calculation
+ emb = torch.cat((freqs, freqs), dim=-1).to(x.device)
+ self.register_buffer("cos_cached", emb.cos()[None, None, :, :], persistent=False)
+ self.register_buffer("sin_cached", emb.sin()[None, None, :, :], persistent=False)
+ return (
+ self.cos_cached[:, :, :seq_len, ...].to(dtype=x.dtype),
+ self.sin_cached[:, :, :seq_len, ...].to(dtype=x.dtype),
+ )
+
+
+def rotate_half(x):
+ """Rotates half the hidden dims of the input."""
+ x1 = x[..., : x.shape[-1] // 2]
+ x2 = x[..., x.shape[-1] // 2 :]
+ return torch.cat((-x2, x1), dim=-1)
+
+
+def apply_rotary_pos_emb(q, k, cos, sin, position_ids):
+ # The first two dimensions of cos and sin are always 1, so we can `squeeze` them.
+ cos = cos.squeeze(1).squeeze(0) # [seq_len, dim]
+ sin = sin.squeeze(1).squeeze(0) # [seq_len, dim]
+ cos = cos[position_ids].unsqueeze(1) # [bs, 1, seq_len, dim]
+ sin = sin[position_ids].unsqueeze(1) # [bs, 1, seq_len, dim]
+ q_embed = (q * cos) + (rotate_half(q) * sin)
+ k_embed = (k * cos) + (rotate_half(k) * sin)
+ return q_embed, k_embed
+
+
+class InternLMMLP(nn.Module):
+ def __init__(
+ self,
+ hidden_size: int,
+ intermediate_size: int,
+ hidden_act: str,
+ ):
+ super().__init__()
+ self.gate_proj = nn.Linear(hidden_size, intermediate_size, bias=False)
+ self.down_proj = nn.Linear(intermediate_size, hidden_size, bias=False)
+ self.up_proj = nn.Linear(hidden_size, intermediate_size, bias=False)
+ self.act_fn = ACT2FN[hidden_act]
+
+ def forward(self, x):
+ return self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x))
+
+
+class InternLMAttention(nn.Module):
+ """Multi-headed attention from 'Attention Is All You Need' paper"""
+
+ def __init__(self, config: InternLMConfig):
+ super().__init__()
+ self.config = config
+ self.hidden_size = config.hidden_size
+ self.num_heads = config.num_attention_heads
+ self.head_dim = self.hidden_size // self.num_heads
+ self.max_position_embeddings = config.max_position_embeddings
+
+ if (self.head_dim * self.num_heads) != self.hidden_size:
+ raise ValueError(
+ f"hidden_size must be divisible by num_heads (got `hidden_size`: {self.hidden_size}"
+ f" and `num_heads`: {self.num_heads})."
+ )
+ self.q_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=config.bias)
+ self.k_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=config.bias)
+ self.v_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=config.bias)
+ self.o_proj = nn.Linear(self.num_heads * self.head_dim, self.hidden_size, bias=config.bias)
+ self.rotary_emb = InternLMRotaryEmbedding(self.head_dim, max_position_embeddings=self.max_position_embeddings)
+
+ def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
+ return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
+
+ def forward(
+ self,
+ hidden_states: torch.Tensor,
+ attention_mask: Optional[torch.Tensor] = None,
+ position_ids: Optional[torch.LongTensor] = None,
+ past_key_value: Optional[Tuple[torch.Tensor]] = None,
+ output_attentions: bool = False,
+ use_cache: bool = False,
+ ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
+ bsz, q_len, _ = hidden_states.size()
+
+ query_states = self.q_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
+ key_states = self.k_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
+ value_states = self.v_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
+
+ kv_seq_len = key_states.shape[-2]
+ if past_key_value is not None:
+ kv_seq_len += past_key_value[0].shape[-2]
+ cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
+ query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids)
+ # [bsz, nh, t, hd]
+
+ if past_key_value is not None:
+ # reuse k, v, self_attention
+ key_states = torch.cat([past_key_value[0], key_states], dim=2)
+ value_states = torch.cat([past_key_value[1], value_states], dim=2)
+
+ past_key_value = (key_states, value_states) if use_cache else None
+
+ attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim)
+
+ if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len):
+ raise ValueError(
+ f"Attention weights should be of size {(bsz, self.num_heads, q_len, kv_seq_len)}, but is"
+ f" {attn_weights.size()}"
+ )
+
+ if attention_mask is not None:
+ if attention_mask.size() != (bsz, 1, q_len, kv_seq_len):
+ raise ValueError(
+ f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}"
+ )
+ attn_weights = attn_weights + attention_mask
+ attn_weights = torch.max(attn_weights, torch.tensor(torch.finfo(attn_weights.dtype).min))
+
+ # upcast attention to fp32
+ attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype)
+ attn_output = torch.matmul(attn_weights, value_states)
+
+ if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim):
+ raise ValueError(
+ f"`attn_output` should be of size {(bsz, self.num_heads, q_len, self.head_dim)}, but is"
+ f" {attn_output.size()}"
+ )
+
+ attn_output = attn_output.transpose(1, 2)
+ attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
+
+ attn_output = self.o_proj(attn_output)
+
+ if not output_attentions:
+ attn_weights = None
+
+ return attn_output, attn_weights, past_key_value
+
+
+class InternLMDecoderLayer(nn.Module):
+ def __init__(self, config: InternLMConfig):
+ super().__init__()
+ self.hidden_size = config.hidden_size
+ self.self_attn = InternLMAttention(config=config)
+ self.mlp = InternLMMLP(
+ hidden_size=self.hidden_size,
+ intermediate_size=config.intermediate_size,
+ hidden_act=config.hidden_act,
+ )
+ self.input_layernorm = InternLMRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+ self.post_attention_layernorm = InternLMRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+
+ def forward(
+ self,
+ hidden_states: torch.Tensor,
+ attention_mask: Optional[torch.Tensor] = None,
+ position_ids: Optional[torch.LongTensor] = None,
+ past_key_value: Optional[Tuple[torch.Tensor]] = None,
+ output_attentions: Optional[bool] = False,
+ use_cache: Optional[bool] = False,
+ ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
+ """
+ Args:
+ hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
+ attention_mask (`torch.FloatTensor`, *optional*): attention mask of size
+ `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
+ output_attentions (`bool`, *optional*):
+ Whether or not to return the attentions tensors of all attention layers. See `attentions` under
+ returned tensors for more detail.
+ use_cache (`bool`, *optional*):
+ If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
+ (see `past_key_values`).
+ past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
+ """
+
+ residual = hidden_states
+
+ hidden_states = self.input_layernorm(hidden_states)
+
+ # Self Attention
+ hidden_states, self_attn_weights, present_key_value = self.self_attn(
+ hidden_states=hidden_states,
+ attention_mask=attention_mask,
+ position_ids=position_ids,
+ past_key_value=past_key_value,
+ output_attentions=output_attentions,
+ use_cache=use_cache,
+ )
+ hidden_states = residual + hidden_states
+
+ # Fully Connected
+ residual = hidden_states
+ hidden_states = self.post_attention_layernorm(hidden_states)
+ hidden_states = self.mlp(hidden_states)
+ hidden_states = residual + hidden_states
+
+ outputs = (hidden_states,)
+
+ if output_attentions:
+ outputs += (self_attn_weights,)
+
+ if use_cache:
+ outputs += (present_key_value,)
+
+ return outputs
+
+
+INTERNLM_START_DOCSTRING = r"""
+ This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
+ library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
+ etc.)
+
+ This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass.
+ Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
+ and behavior.
+
+ Parameters:
+ config ([`InternLMConfig`]):
+ Model configuration class with all the parameters of the model. Initializing with a config file does not
+ load the weights associated with the model, only the configuration. Check out the
+ [`~PreTrainedModel.from_pretrained`] method to load the model weights.
+"""
+
+
+@add_start_docstrings(
+ "The bare InternLM Model outputting raw hidden-states without any specific head on top.",
+ INTERNLM_START_DOCSTRING,
+)
+class InternLMPreTrainedModel(PreTrainedModel):
+ config_class = InternLMConfig
+ base_model_prefix = "model"
+ supports_gradient_checkpointing = True
+ _no_split_modules = ["InternLMDecoderLayer"]
+ _keys_to_ignore_on_load_unexpected = [r"decoder\.version"]
+
+ def _init_weights(self, module):
+ std = self.config.initializer_range
+ if isinstance(module, nn.Linear):
+ module.weight.data.normal_(mean=0.0, std=std)
+ if module.bias is not None:
+ module.bias.data.zero_()
+ elif isinstance(module, nn.Embedding):
+ module.weight.data.normal_(mean=0.0, std=std)
+ if module.padding_idx is not None:
+ module.weight.data[module.padding_idx].zero_()
+
+ def _set_gradient_checkpointing(self, module, value=False):
+ if isinstance(module, InternLMModel):
+ module.gradient_checkpointing = value
+
+
+INTERNLM_INPUTS_DOCSTRING = r"""
+ Args:
+ input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
+ Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide
+ it.
+
+ Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+ [`PreTrainedTokenizer.__call__`] for details.
+
+ [What are input IDs?](../glossary#input-ids)
+ attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
+ Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
+
+ - 1 for tokens that are **not masked**,
+ - 0 for tokens that are **masked**.
+
+ [What are attention masks?](../glossary#attention-mask)
+
+ Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+ [`PreTrainedTokenizer.__call__`] for details.
+
+ If `past_key_values` is used, optionally only the last `decoder_input_ids` have to be input (see
+ `past_key_values`).
+
+ If you want to change padding behavior, you should read [`modeling_opt._prepare_decoder_attention_mask`]
+ and modify to your needs. See diagram 1 in [the paper](https://arxiv.org/abs/1910.13461) for more
+ information on the default strategy.
+
+ - 1 indicates the head is **not masked**,
+ - 0 indicates the head is **masked**.
+ position_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
+ Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
+ config.n_positions - 1]`.
+
+ [What are position IDs?](../glossary#position-ids)
+ past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
+ Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape
+ `(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of shape
+ `(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)`.
+
+ Contains pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention
+ blocks) that can be used (see `past_key_values` input) to speed up sequential decoding.
+
+ If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
+ don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
+ `decoder_input_ids` of shape `(batch_size, sequence_length)`.
+ inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
+ Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
+ is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
+ model's internal embedding lookup matrix.
+ use_cache (`bool`, *optional*):
+ If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see
+ `past_key_values`).
+ output_attentions (`bool`, *optional*):
+ Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
+ tensors for more detail.
+ output_hidden_states (`bool`, *optional*):
+ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
+ more detail.
+ return_dict (`bool`, *optional*):
+ Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
+"""
+
+
+@add_start_docstrings(
+ "The bare InternLM Model outputting raw hidden-states without any specific head on top.",
+ INTERNLM_START_DOCSTRING,
+)
+class InternLMModel(InternLMPreTrainedModel):
+ """
+ Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`InternLMDecoderLayer`]
+
+ Args:
+ config: InternLMConfig
+ """
+ _auto_class = "AutoModel"
+
+ def __init__(self, config: InternLMConfig):
+ super().__init__(config)
+ self.padding_idx = config.pad_token_id
+ self.vocab_size = config.vocab_size
+
+ self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
+ self.layers = nn.ModuleList([InternLMDecoderLayer(config) for _ in range(config.num_hidden_layers)])
+ self.norm = InternLMRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+
+ self.gradient_checkpointing = False
+ # Initialize weights and apply final processing
+ self.post_init()
+
+ def get_input_embeddings(self):
+ return self.embed_tokens
+
+ def set_input_embeddings(self, value):
+ self.embed_tokens = value
+
+ # Copied from transformers.models.bart.modeling_bart.BartDecoder._prepare_decoder_attention_mask
+ def _prepare_decoder_attention_mask(self, attention_mask, input_shape, inputs_embeds, past_key_values_length):
+ # create causal mask
+ # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
+ combined_attention_mask = None
+ if input_shape[-1] > 1:
+ combined_attention_mask = _make_causal_mask(
+ input_shape,
+ inputs_embeds.dtype,
+ device=inputs_embeds.device,
+ past_key_values_length=past_key_values_length,
+ )
+
+ if attention_mask is not None:
+ # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
+ expanded_attn_mask = _expand_mask(attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1]).to(
+ inputs_embeds.device
+ )
+ combined_attention_mask = (
+ expanded_attn_mask if combined_attention_mask is None else expanded_attn_mask + combined_attention_mask
+ )
+
+ return combined_attention_mask
+
+ @add_start_docstrings_to_model_forward(INTERNLM_INPUTS_DOCSTRING)
+ def forward(
+ self,
+ input_ids: torch.LongTensor = None,
+ attention_mask: Optional[torch.Tensor] = None,
+ position_ids: Optional[torch.LongTensor] = None,
+ past_key_values: Optional[List[torch.FloatTensor]] = None,
+ inputs_embeds: Optional[torch.FloatTensor] = None,
+ use_cache: Optional[bool] = None,
+ output_attentions: Optional[bool] = None,
+ output_hidden_states: Optional[bool] = None,
+ return_dict: Optional[bool] = None,
+ ) -> Union[Tuple, BaseModelOutputWithPast]:
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+ output_hidden_states = (
+ output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+ )
+ use_cache = use_cache if use_cache is not None else self.config.use_cache
+
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+ # retrieve input_ids and inputs_embeds
+ if input_ids is not None and inputs_embeds is not None:
+ raise ValueError("You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time")
+ elif input_ids is not None:
+ batch_size, seq_length = input_ids.shape
+ elif inputs_embeds is not None:
+ batch_size, seq_length, _ = inputs_embeds.shape
+ else:
+ raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds")
+
+ seq_length_with_past = seq_length
+ past_key_values_length = 0
+
+ if past_key_values is not None:
+ past_key_values_length = past_key_values[0][0].shape[2]
+ seq_length_with_past = seq_length_with_past + past_key_values_length
+
+ if position_ids is None:
+ device = input_ids.device if input_ids is not None else inputs_embeds.device
+ position_ids = torch.arange(
+ past_key_values_length, seq_length + past_key_values_length, dtype=torch.long, device=device
+ )
+ position_ids = position_ids.unsqueeze(0).view(-1, seq_length)
+ else:
+ position_ids = position_ids.view(-1, seq_length).long()
+
+ if inputs_embeds is None:
+ inputs_embeds = self.embed_tokens(input_ids)
+ # embed positions
+ if attention_mask is None:
+ attention_mask = torch.ones(
+ (batch_size, seq_length_with_past), dtype=torch.bool, device=inputs_embeds.device
+ )
+ attention_mask = self._prepare_decoder_attention_mask(
+ attention_mask, (batch_size, seq_length), inputs_embeds, past_key_values_length
+ )
+
+ hidden_states = inputs_embeds
+
+ if self.gradient_checkpointing and self.training:
+ if use_cache:
+ logger.warning_once(
+ "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
+ )
+ use_cache = False
+
+ # decoder layers
+ all_hidden_states = () if output_hidden_states else None
+ all_self_attns = () if output_attentions else None
+ next_decoder_cache = () if use_cache else None
+
+ for idx, decoder_layer in enumerate(self.layers):
+ if output_hidden_states:
+ all_hidden_states += (hidden_states,)
+
+ past_key_value = past_key_values[idx] if past_key_values is not None else None
+
+ if self.gradient_checkpointing and self.training:
+
+ def create_custom_forward(module):
+ def custom_forward(*inputs):
+ # None for past_key_value
+ return module(*inputs, output_attentions, None)
+
+ return custom_forward
+
+ layer_outputs = torch.utils.checkpoint.checkpoint(
+ create_custom_forward(decoder_layer),
+ hidden_states,
+ attention_mask,
+ position_ids,
+ None,
+ )
+ else:
+ layer_outputs = decoder_layer(
+ hidden_states,
+ attention_mask=attention_mask,
+ position_ids=position_ids,
+ past_key_value=past_key_value,
+ output_attentions=output_attentions,
+ use_cache=use_cache,
+ )
+
+ hidden_states = layer_outputs[0]
+
+ if use_cache:
+ next_decoder_cache += (layer_outputs[2 if output_attentions else 1],)
+
+ if output_attentions:
+ all_self_attns += (layer_outputs[1],)
+
+ hidden_states = self.norm(hidden_states)
+
+ # add hidden states from the last decoder layer
+ if output_hidden_states:
+ all_hidden_states += (hidden_states,)
+
+ next_cache = next_decoder_cache if use_cache else None
+ if not return_dict:
+ return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
+ return BaseModelOutputWithPast(
+ last_hidden_state=hidden_states,
+ past_key_values=next_cache,
+ hidden_states=all_hidden_states,
+ attentions=all_self_attns,
+ )
+
+
+class InternLMForCausalLM(InternLMPreTrainedModel):
+ _auto_class = "AutoModelForCausalLM"
+
+ def __init__(self, config):
+ super().__init__(config)
+ self.model = InternLMModel(config)
+
+ self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
+
+ # Initialize weights and apply final processing
+ self.post_init()
+
+ def get_input_embeddings(self):
+ return self.model.embed_tokens
+
+ def set_input_embeddings(self, value):
+ self.model.embed_tokens = value
+
+ def get_output_embeddings(self):
+ return self.lm_head
+
+ def set_output_embeddings(self, new_embeddings):
+ self.lm_head = new_embeddings
+
+ def set_decoder(self, decoder):
+ self.model = decoder
+
+ def get_decoder(self):
+ return self.model
+
+ @add_start_docstrings_to_model_forward(INTERNLM_INPUTS_DOCSTRING)
+ @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
+ def forward(
+ self,
+ input_ids: torch.LongTensor = None,
+ attention_mask: Optional[torch.Tensor] = None,
+ position_ids: Optional[torch.LongTensor] = None,
+ past_key_values: Optional[List[torch.FloatTensor]] = None,
+ inputs_embeds: Optional[torch.FloatTensor] = None,
+ labels: Optional[torch.LongTensor] = None,
+ use_cache: Optional[bool] = None,
+ output_attentions: Optional[bool] = None,
+ output_hidden_states: Optional[bool] = None,
+ return_dict: Optional[bool] = None,
+ ) -> Union[Tuple, CausalLMOutputWithPast]:
+ r"""
+ Args:
+ labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
+ Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
+ config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
+ (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
+
+ Returns:
+
+ Example:
+
+ ```python
+ >>> from transformers import AutoTokenizer, InternLMForCausalLM
+
+ >>> model = InternLMForCausalLM.from_pretrained(PATH_TO_CONVERTED_WEIGHTS)
+ >>> tokenizer = AutoTokenizer.from_pretrained(PATH_TO_CONVERTED_TOKENIZER)
+
+ >>> prompt = "Hey, are you consciours? Can you talk to me?"
+ >>> inputs = tokenizer(prompt, return_tensors="pt")
+
+ >>> # Generate
+ >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
+ >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
+ "Hey, are you consciours? Can you talk to me?\nI'm not consciours, but I can talk to you."
+ ```"""
+
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+ output_hidden_states = (
+ output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+ )
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+ # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
+ outputs = self.model(
+ input_ids=input_ids,
+ attention_mask=attention_mask,
+ position_ids=position_ids,
+ past_key_values=past_key_values,
+ inputs_embeds=inputs_embeds,
+ use_cache=use_cache,
+ output_attentions=output_attentions,
+ output_hidden_states=output_hidden_states,
+ return_dict=return_dict,
+ )
+
+ hidden_states = outputs[0]
+ logits = self.lm_head(hidden_states)
+
+ loss = None
+ if labels is not None:
+ # Shift so that tokens < n predict n
+ shift_logits = logits[..., :-1, :].contiguous()
+ shift_labels = labels[..., 1:].contiguous()
+ # Flatten the tokens
+ loss_fct = CrossEntropyLoss()
+ shift_logits = shift_logits.view(-1, self.config.vocab_size)
+ shift_labels = shift_labels.view(-1)
+ # Enable model parallelism
+ shift_labels = shift_labels.to(shift_logits.device)
+ loss = loss_fct(shift_logits, shift_labels)
+
+ if not return_dict:
+ output = (logits,) + outputs[1:]
+ return (loss,) + output if loss is not None else output
+
+ return CausalLMOutputWithPast(
+ loss=loss,
+ logits=logits,
+ past_key_values=outputs.past_key_values,
+ hidden_states=outputs.hidden_states,
+ attentions=outputs.attentions,
+ )
+
+ def prepare_inputs_for_generation(
+ self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, **kwargs
+ ):
+ if past_key_values:
+ input_ids = input_ids[:, -1:]
+
+ position_ids = kwargs.get("position_ids", None)
+ if attention_mask is not None and position_ids is None:
+ # create position_ids on the fly for batch generation
+ position_ids = attention_mask.long().cumsum(-1) - 1
+ position_ids.masked_fill_(attention_mask == 0, 1)
+ if past_key_values:
+ position_ids = position_ids[:, -1].unsqueeze(-1)
+
+ # if `inputs_embeds` are passed, we only want to use them in the 1st generation step
+ if inputs_embeds is not None and past_key_values is None:
+ model_inputs = {"inputs_embeds": inputs_embeds}
+ else:
+ model_inputs = {"input_ids": input_ids}
+
+ model_inputs.update(
+ {
+ "position_ids": position_ids,
+ "past_key_values": past_key_values,
+ "use_cache": kwargs.get("use_cache"),
+ "attention_mask": attention_mask,
+ }
+ )
+ return model_inputs
+
+ @staticmethod
+ def _reorder_cache(past_key_values, beam_idx):
+ reordered_past = ()
+ for layer_past in past_key_values:
+ reordered_past += (tuple(past_state.index_select(0, beam_idx) for past_state in layer_past),)
+ return reordered_past
+
+ def build_inputs(self, tokenizer, query: str, history: List[Tuple[str, str]] = []):
+ prompt = ""
+ for record in history:
+ prompt += f"""<|User|>:{record[0]}\n<|Bot|>:{record[1]}\n"""
+ if len(prompt) == 0:
+ prompt += ""
+ prompt += f"""<|User|>:{query}\n<|Bot|>:"""
+ return tokenizer([prompt], return_tensors="pt")
+
+ @torch.no_grad()
+ def chat(self,
+ tokenizer,
+ query: str,
+ history: List[Tuple[str, str]] = [],
+ streamer: Optional[BaseStreamer] = None,
+ max_new_tokens: int = 1024,
+ do_sample: bool = True,
+ temperature: float = 0.8,
+ top_p: float = 0.8,
+ **kwargs):
+ inputs = self.build_inputs(tokenizer, query, history)
+ inputs = {k: v.to(self.device) for k, v in inputs.items() if torch.is_tensor(v)}
+ outputs = self.generate(**inputs,
+ streamer=streamer,
+ max_new_tokens=max_new_tokens,
+ do_sample=do_sample,
+ temperature=temperature,
+ top_p=top_p,
+ **kwargs)
+ outputs = outputs[0].cpu().tolist()[len(inputs["input_ids"][0]):]
+ response = tokenizer.decode(outputs, skip_special_tokens=True)
+ response = response.split("")[0]
+ history = history + [(query, response)]
+ return response, history
+
+ @torch.no_grad()
+ def stream_chat(self,
+ tokenizer,
+ query: str,
+ history: List[Tuple[str, str]] = [],
+ max_new_tokens: int = 1024,
+ do_sample: bool = True,
+ temperature: float = 0.8,
+ top_p: float = 0.8,
+ **kwargs):
+ """
+ Return a generator in format: (response, history)
+ Eg.
+ ('你好,有什么可以帮助您的吗', [('你好', '你好,有什么可以帮助您的吗')])
+ ('你好,有什么可以帮助您的吗?', [('你好', '你好,有什么可以帮助您的吗?')])
+ """
+
+ response_queue = queue.Queue(maxsize=20)
+
+ class ChatStreamer(BaseStreamer):
+ def __init__(self, tokenizer) -> None:
+ super().__init__()
+ self.tokenizer = tokenizer
+ self.queue = response_queue
+ self.query = query
+ self.history = history
+ self.response = ""
+ self.received_inputs = False
+ self.queue.put((self.response, history + [(self.query, self.response)]))
+
+ def put(self, value):
+ if len(value.shape) > 1 and value.shape[0] > 1:
+ raise ValueError("ChatStreamer only supports batch size 1")
+ elif len(value.shape) > 1:
+ value = value[0]
+
+ if not self.received_inputs:
+ # The first received value is input_ids, ignore here
+ self.received_inputs = True
+ return
+
+ token = self.tokenizer.decode([value[-1]], skip_special_tokens=True)
+ if token.strip() != "":
+ self.response = self.response + token
+ history = self.history + [(self.query, self.response)]
+ self.queue.put((self.response, history))
+
+ def end(self):
+ self.queue.put(None)
+
+ def stream_producer():
+ return self.chat(
+ tokenizer=tokenizer,
+ query=query,
+ streamer=ChatStreamer(tokenizer=tokenizer),
+ history=history,
+ max_new_tokens=max_new_tokens,
+ do_sample=do_sample,
+ temperature=temperature,
+ top_p=top_p,
+ **kwargs
+ )
+
+ def consumer():
+ producer = threading.Thread(target=stream_producer)
+ producer.start()
+ while True:
+ res = response_queue.get()
+ if res is None:
+ return
+ yield res
+
+ return consumer()
+
+
+@add_start_docstrings(
+ """
+ The InternLM Model transformer with a sequence classification head on top (linear layer).
+
+ [`InternLMForSequenceClassification`] uses the last token in order to do the classification, as other causal models
+ (e.g. GPT-2) do.
+
+ Since it does classification on the last token, it requires to know the position of the last token. If a
+ `pad_token_id` is defined in the configuration, it finds the last token that is not a padding token in each row. If
+ no `pad_token_id` is defined, it simply takes the last value in each row of the batch. Since it cannot guess the
+ padding tokens when `inputs_embeds` are passed instead of `input_ids`, it does the same (take the last value in
+ each row of the batch).
+ """,
+ INTERNLM_START_DOCSTRING,
+)
+class InternLMForSequenceClassification(InternLMPreTrainedModel):
+ _keys_to_ignore_on_load_missing = [r"lm_head.weight"]
+
+ def __init__(self, config):
+ super().__init__(config)
+ self.num_labels = config.num_labels
+ self.model = InternLMModel(config)
+ self.score = nn.Linear(config.hidden_size, self.num_labels, bias=False)
+
+ # Initialize weights and apply final processing
+ self.post_init()
+
+ def get_input_embeddings(self):
+ return self.model.embed_tokens
+
+ def set_input_embeddings(self, value):
+ self.model.embed_tokens = value
+
+ @add_start_docstrings_to_model_forward(INTERNLM_INPUTS_DOCSTRING)
+ def forward(
+ self,
+ input_ids: torch.LongTensor = None,
+ attention_mask: Optional[torch.Tensor] = None,
+ position_ids: Optional[torch.LongTensor] = None,
+ past_key_values: Optional[List[torch.FloatTensor]] = None,
+ inputs_embeds: Optional[torch.FloatTensor] = None,
+ labels: Optional[torch.LongTensor] = None,
+ use_cache: Optional[bool] = None,
+ output_attentions: Optional[bool] = None,
+ output_hidden_states: Optional[bool] = None,
+ return_dict: Optional[bool] = None,
+ ) -> Union[Tuple, SequenceClassifierOutputWithPast]:
+ r"""
+ labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
+ Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
+ config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
+ `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
+ """
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+ transformer_outputs = self.model(
+ input_ids,
+ attention_mask=attention_mask,
+ position_ids=position_ids,
+ past_key_values=past_key_values,
+ inputs_embeds=inputs_embeds,
+ use_cache=use_cache,
+ output_attentions=output_attentions,
+ output_hidden_states=output_hidden_states,
+ return_dict=return_dict,
+ )
+ hidden_states = transformer_outputs[0]
+ logits = self.score(hidden_states)
+
+ if input_ids is not None:
+ batch_size = input_ids.shape[0]
+ else:
+ batch_size = inputs_embeds.shape[0]
+
+ if self.config.pad_token_id is None and batch_size != 1:
+ raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
+ if self.config.pad_token_id is None:
+ sequence_lengths = -1
+ else:
+ if input_ids is not None:
+ sequence_lengths = (torch.ne(input_ids, self.config.pad_token_id).sum(-1) - 1).to(logits.device)
+ else:
+ sequence_lengths = -1
+
+ pooled_logits = logits[torch.arange(batch_size, device=logits.device), sequence_lengths]
+
+ loss = None
+ if labels is not None:
+ labels = labels.to(logits.device)
+ if self.config.problem_type is None:
+ if self.num_labels == 1:
+ self.config.problem_type = "regression"
+ elif self.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
+ self.config.problem_type = "single_label_classification"
+ else:
+ self.config.problem_type = "multi_label_classification"
+
+ if self.config.problem_type == "regression":
+ loss_fct = MSELoss()
+ if self.num_labels == 1:
+ loss = loss_fct(pooled_logits.squeeze(), labels.squeeze())
+ else:
+ loss = loss_fct(pooled_logits, labels)
+ elif self.config.problem_type == "single_label_classification":
+ loss_fct = CrossEntropyLoss()
+ loss = loss_fct(pooled_logits.view(-1, self.num_labels), labels.view(-1))
+ elif self.config.problem_type == "multi_label_classification":
+ loss_fct = BCEWithLogitsLoss()
+ loss = loss_fct(pooled_logits, labels)
+ if not return_dict:
+ output = (pooled_logits,) + transformer_outputs[1:]
+ return ((loss,) + output) if loss is not None else output
+
+ return SequenceClassifierOutputWithPast(
+ loss=loss,
+ logits=pooled_logits,
+ past_key_values=transformer_outputs.past_key_values,
+ hidden_states=transformer_outputs.hidden_states,
+ attentions=transformer_outputs.attentions,
+ )
\ No newline at end of file
diff --git a/triton_models/tokenizer/placeholder b/triton_models/tokenizer/placeholder
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/triton_models/tokenizer/pytorch_model.bin.index.json b/triton_models/tokenizer/pytorch_model.bin.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..205a5c72d2aa730500f845a7424893a922ddaa9a
--- /dev/null
+++ b/triton_models/tokenizer/pytorch_model.bin.index.json
@@ -0,0 +1,1390 @@
+{
+ "metadata": {
+ "total_size": 12001658880
+ },
+ "weight_map": {
+ "lm_head.weight": "pytorch_model-00002-of-00002.bin",
+ "model.embed_tokens.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.0.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.0.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.0.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.0.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.0.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.0.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.0.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.0.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.0.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.0.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.0.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.0.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.0.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.0.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.0.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.0.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.0.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.0.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.0.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.0.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.0.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.1.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.1.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.1.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.1.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.1.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.1.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.1.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.1.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.1.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.1.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.1.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.1.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.1.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.1.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.1.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.1.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.1.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.1.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.1.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.1.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.1.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.10.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.10.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.10.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.10.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.10.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.10.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.10.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.10.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.10.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.10.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.10.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.10.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.10.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.10.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.10.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.10.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.10.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.10.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.10.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.10.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.10.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.10.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.11.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.11.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.11.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.11.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.11.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.11.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.11.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.11.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.11.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.11.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.11.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.11.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.11.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.11.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.11.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.11.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.11.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.11.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.11.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.11.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.11.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.11.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.12.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.12.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.12.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.12.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.12.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.12.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.12.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.12.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.12.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.12.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.12.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.12.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.12.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.12.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.12.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.12.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.12.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.12.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.12.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.12.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.12.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.12.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.13.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.13.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.13.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.13.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.13.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.13.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.13.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.13.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.13.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.13.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.13.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.13.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.13.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.13.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.13.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.13.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.13.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.13.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.13.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.13.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.13.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.13.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.14.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.14.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.14.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.14.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.14.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.14.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.14.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.14.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.14.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.14.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.14.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.14.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.14.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.14.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.14.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.14.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.14.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.14.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.14.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.14.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.14.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.14.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.15.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.15.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.15.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.15.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.15.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.15.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.15.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.15.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.15.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.15.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.15.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.15.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.15.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.15.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.15.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.15.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.15.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.15.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.15.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.15.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.15.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.15.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.16.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.16.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.16.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.16.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.16.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.16.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.16.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.16.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.16.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.16.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.16.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.16.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.16.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.16.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.16.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.16.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.16.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.16.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.16.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.16.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.16.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.16.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.17.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.17.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.17.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.17.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.17.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.17.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.17.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.17.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.17.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.17.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.17.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.17.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.17.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.17.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.17.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.17.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.17.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.17.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.17.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.17.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.17.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.17.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.18.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.18.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.18.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.18.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.18.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.18.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.18.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.18.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.18.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.18.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.18.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.18.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.18.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.18.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.18.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.18.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.18.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.18.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.18.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.18.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.18.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.18.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.19.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.19.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.19.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.19.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.19.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.19.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.19.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.19.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.19.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.19.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.19.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.19.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.19.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.19.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.19.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.19.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.19.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.19.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.19.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.19.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.19.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.19.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.2.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.2.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.2.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.2.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.2.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.2.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.2.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.2.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.2.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.2.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.2.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.2.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.2.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.2.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.2.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.2.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.2.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.2.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.2.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.2.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.2.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.20.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.20.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.20.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.20.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.20.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.20.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.20.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.20.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.20.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.20.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.20.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.20.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.20.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.20.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.20.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.20.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.20.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.20.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.20.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.20.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.20.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.20.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.21.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.21.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.21.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.21.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.21.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.21.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.21.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.21.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.21.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.21.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.21.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.21.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.21.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.21.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.21.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.21.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.21.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.21.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.21.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.21.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.21.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.21.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.22.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.22.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.22.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.22.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.22.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.22.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.22.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.22.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.22.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.22.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.22.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.22.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.22.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.22.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.22.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.22.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.22.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.22.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.22.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.22.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.22.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.22.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.23.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.23.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.23.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.23.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.23.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.23.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.23.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.23.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.23.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.23.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.23.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.23.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.23.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.23.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.23.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.23.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.23.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.23.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.23.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.23.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.23.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.23.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.24.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.24.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.24.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.24.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.24.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.24.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.24.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.24.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.24.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.24.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.24.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.24.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.24.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.24.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.24.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.24.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.24.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.24.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.24.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.24.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.24.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.24.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.25.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.25.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.25.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.25.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.25.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.25.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.25.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.25.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.25.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.25.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.25.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.25.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.25.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.25.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.25.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.25.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.25.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.25.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.25.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.25.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.25.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.25.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.26.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.26.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.26.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.26.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.26.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.26.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.26.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.26.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.26.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.26.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.26.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.26.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.26.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.26.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.26.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.26.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.26.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.26.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.26.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.26.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.26.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.26.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.27.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.27.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.27.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.27.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.27.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.27.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.27.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.27.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.27.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.27.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.27.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.27.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.27.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.27.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.27.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.27.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.27.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.27.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.27.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.27.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.27.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.27.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.28.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.28.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.28.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.28.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.28.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.28.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.28.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.28.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.28.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.28.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.28.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.28.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.28.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.28.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.28.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.28.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.28.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.28.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.28.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.28.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.28.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.28.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.29.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.29.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.29.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.29.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.29.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.29.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.29.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.29.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.29.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.29.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.29.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.29.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.29.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.29.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.29.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.29.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.29.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.29.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.29.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.29.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.29.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.29.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.3.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.3.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.3.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.3.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.3.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.3.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.3.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.3.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.3.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.3.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.3.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.3.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.3.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.3.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.3.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.3.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.3.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.3.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.3.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.3.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.3.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.30.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.30.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.30.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.30.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.30.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.30.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.30.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.30.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.30.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.30.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.30.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.30.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.30.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.30.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.30.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.30.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.30.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.30.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.30.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.30.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.30.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.30.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.31.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.31.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.31.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.31.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.31.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.31.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.31.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.31.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.31.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.31.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.31.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.31.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.31.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.31.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.31.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.31.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.31.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.31.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.31.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.31.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.31.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.31.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.32.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.32.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.32.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.32.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.32.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.32.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.32.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.32.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.32.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.32.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.32.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.32.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.32.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.32.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.32.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.32.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.32.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.32.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.32.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.32.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.32.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.32.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.32.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.33.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.33.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.33.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.33.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.33.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.33.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.33.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.33.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.33.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.33.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.33.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.33.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.33.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.33.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.33.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.33.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.33.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.33.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.33.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.33.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.33.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.33.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.33.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.34.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.34.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.34.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.34.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.34.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.34.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.34.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.34.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.34.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.34.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.34.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.34.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.34.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.34.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.34.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.34.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.34.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.34.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.34.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.34.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.34.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.34.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.34.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.35.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.35.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.35.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.35.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.35.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.35.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.35.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.35.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.35.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.35.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.35.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.35.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.35.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.35.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.35.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.35.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.35.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.35.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.35.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.35.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.35.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.35.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.35.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.36.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.36.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.36.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.36.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.36.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.36.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.36.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.36.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.36.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.36.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.36.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.36.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.36.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.36.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.36.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.36.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.36.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.36.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.36.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.36.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.36.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.36.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.36.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.37.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.37.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.37.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.37.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.37.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.37.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.37.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.37.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.37.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.37.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.37.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.37.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.37.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.37.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.37.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.37.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.37.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.37.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.37.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.37.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.37.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.37.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.37.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.38.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.38.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.38.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.38.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.38.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.38.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.38.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.38.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.38.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.38.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.38.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.38.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.38.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.38.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.38.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.38.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.38.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.38.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.38.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.38.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.38.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.38.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.38.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.39.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.39.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.39.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.39.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.39.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.39.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.39.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.39.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.39.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.39.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.39.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.39.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.39.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.39.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.39.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.39.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.39.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.39.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.39.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.39.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.39.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.39.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.39.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.4.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.4.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.4.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.4.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.4.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.4.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.4.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.4.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.4.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.4.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.4.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.4.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.4.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.4.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.4.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.4.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.4.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.4.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.4.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.4.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.4.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.40.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.40.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.40.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.40.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.40.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.40.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.40.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.40.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.40.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.40.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.40.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.40.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.40.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.40.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.40.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.40.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.40.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.40.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.40.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.40.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.40.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.40.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.40.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.41.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.41.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.41.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.41.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.41.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.41.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.41.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.41.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.41.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.41.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.41.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.41.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.41.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.41.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.41.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.41.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.41.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.41.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.41.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.41.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.41.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.41.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.41.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.42.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.42.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.42.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.42.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.42.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.42.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.42.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.42.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.42.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.42.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.42.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.42.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.42.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.42.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.42.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.42.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.42.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.42.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.42.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.42.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.42.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.42.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.42.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.43.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.43.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.43.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.43.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.43.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.43.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.43.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.43.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.43.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.43.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.43.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.43.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.43.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.43.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.43.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.43.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.43.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.43.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.43.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.43.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.43.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.43.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.43.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.44.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.44.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.44.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.44.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.44.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.44.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.44.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.44.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.44.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.44.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.44.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.44.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.44.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.44.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.44.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.44.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.44.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.44.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.44.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.44.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.44.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.44.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.44.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.45.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.45.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.45.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.45.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.45.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.45.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.45.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.45.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.45.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.45.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.45.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.45.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.45.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.45.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.45.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.45.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.45.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.45.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.45.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.45.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.45.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.45.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.45.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.46.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.46.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.46.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.46.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.46.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.46.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.46.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.46.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.46.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.46.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.46.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.46.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.46.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.46.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.46.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.46.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.46.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.46.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.46.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.46.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.46.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.46.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.46.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.47.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.47.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.47.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.47.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.47.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.47.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.47.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.47.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.47.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.47.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.47.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.47.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.47.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.47.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.47.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.47.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.47.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.47.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.47.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.47.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.47.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.47.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.47.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.48.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.48.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.48.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.48.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.48.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.48.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.48.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.48.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.48.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.48.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.48.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.48.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.48.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.48.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.48.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.48.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.48.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.48.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.48.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.48.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.48.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.48.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.48.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.49.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.49.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.49.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.49.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.49.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.49.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.49.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.49.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.49.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.49.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.49.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.49.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.49.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.49.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.49.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.49.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.49.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.49.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.49.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.49.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.49.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.49.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.49.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.5.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.5.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.5.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.5.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.5.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.5.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.5.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.5.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.5.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.5.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.5.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.5.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.5.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.5.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.5.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.5.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.5.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.5.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.5.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.5.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.5.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.5.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.50.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.50.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.50.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.50.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.50.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.50.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.50.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.50.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.50.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.50.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.50.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.50.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.50.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.50.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.50.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.50.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.50.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.50.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.50.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.50.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.50.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.50.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.50.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.51.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.51.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.51.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.51.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.51.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.51.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.51.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.51.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.51.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.51.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.51.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.51.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.51.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.51.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.51.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.51.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.51.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.51.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.51.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.51.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.51.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.51.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.51.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.52.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.52.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.52.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.52.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.52.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.52.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.52.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.52.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.52.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.52.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.52.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.52.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.52.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.52.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.52.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.52.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.52.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.52.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.52.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.52.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.52.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.52.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.52.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.53.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.53.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.53.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.53.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.53.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.53.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.53.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.53.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.53.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.53.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.53.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.53.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.53.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.53.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.53.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.53.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.53.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.53.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.53.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.53.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.53.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.53.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.53.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.54.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.54.mlp.down_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.54.mlp.down_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.54.mlp.down_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.54.mlp.gate_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.54.mlp.gate_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.54.mlp.gate_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.54.mlp.up_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.54.mlp.up_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.54.mlp.up_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.54.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.54.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.54.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.54.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.54.self_attn.o_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.54.self_attn.o_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.54.self_attn.o_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.54.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.54.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.54.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.54.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.54.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.54.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.55.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.55.mlp.down_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.55.mlp.down_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.55.mlp.down_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.55.mlp.gate_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.55.mlp.gate_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.55.mlp.gate_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.55.mlp.up_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.55.mlp.up_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.55.mlp.up_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.55.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.55.self_attn.k_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.55.self_attn.k_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.55.self_attn.k_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.55.self_attn.o_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.55.self_attn.o_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.55.self_attn.o_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.55.self_attn.q_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.55.self_attn.q_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.55.self_attn.q_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.55.self_attn.v_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.55.self_attn.v_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.55.self_attn.v_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.56.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.56.mlp.down_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.56.mlp.down_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.56.mlp.down_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.56.mlp.gate_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.56.mlp.gate_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.56.mlp.gate_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.56.mlp.up_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.56.mlp.up_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.56.mlp.up_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.56.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.56.self_attn.k_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.56.self_attn.k_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.56.self_attn.k_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.56.self_attn.o_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.56.self_attn.o_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.56.self_attn.o_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.56.self_attn.q_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.56.self_attn.q_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.56.self_attn.q_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.56.self_attn.v_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.56.self_attn.v_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.56.self_attn.v_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.57.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.57.mlp.down_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.57.mlp.down_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.57.mlp.down_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.57.mlp.gate_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.57.mlp.gate_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.57.mlp.gate_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.57.mlp.up_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.57.mlp.up_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.57.mlp.up_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.57.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.57.self_attn.k_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.57.self_attn.k_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.57.self_attn.k_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.57.self_attn.o_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.57.self_attn.o_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.57.self_attn.o_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.57.self_attn.q_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.57.self_attn.q_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.57.self_attn.q_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.57.self_attn.v_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.57.self_attn.v_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.57.self_attn.v_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.58.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.58.mlp.down_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.58.mlp.down_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.58.mlp.down_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.58.mlp.gate_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.58.mlp.gate_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.58.mlp.gate_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.58.mlp.up_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.58.mlp.up_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.58.mlp.up_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.58.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.58.self_attn.k_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.58.self_attn.k_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.58.self_attn.k_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.58.self_attn.o_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.58.self_attn.o_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.58.self_attn.o_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.58.self_attn.q_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.58.self_attn.q_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.58.self_attn.q_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.58.self_attn.v_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.58.self_attn.v_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.58.self_attn.v_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.59.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.59.mlp.down_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.59.mlp.down_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.59.mlp.down_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.59.mlp.gate_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.59.mlp.gate_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.59.mlp.gate_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.59.mlp.up_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.59.mlp.up_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.59.mlp.up_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.59.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.59.self_attn.k_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.59.self_attn.k_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.59.self_attn.k_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.59.self_attn.o_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.59.self_attn.o_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.59.self_attn.o_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.59.self_attn.q_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.59.self_attn.q_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.59.self_attn.q_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.59.self_attn.v_proj.qweight": "pytorch_model-00002-of-00002.bin",
+ "model.layers.59.self_attn.v_proj.qzeros": "pytorch_model-00002-of-00002.bin",
+ "model.layers.59.self_attn.v_proj.scales": "pytorch_model-00002-of-00002.bin",
+ "model.layers.6.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.6.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.6.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.6.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.6.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.6.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.6.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.6.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.6.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.6.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.6.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.6.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.6.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.6.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.6.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.6.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.6.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.6.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.6.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.6.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.6.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.6.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.7.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.7.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.7.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.7.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.7.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.7.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.7.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.7.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.7.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.7.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.7.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.7.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.7.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.7.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.7.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.7.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.7.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.7.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.7.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.7.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.7.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.7.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.8.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.8.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.8.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.8.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.8.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.8.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.8.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.8.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.8.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.8.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.8.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.8.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.8.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.8.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.8.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.8.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.8.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.8.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.8.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.8.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.8.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.8.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.9.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.9.mlp.down_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.9.mlp.down_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.9.mlp.down_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.9.mlp.gate_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.9.mlp.gate_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.9.mlp.gate_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.9.mlp.up_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.9.mlp.up_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.9.mlp.up_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.9.self_attn.k_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.9.self_attn.k_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.9.self_attn.k_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.9.self_attn.o_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.9.self_attn.o_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.9.self_attn.o_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.9.self_attn.q_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.9.self_attn.q_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.9.self_attn.q_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.layers.9.self_attn.v_proj.qweight": "pytorch_model-00001-of-00002.bin",
+ "model.layers.9.self_attn.v_proj.qzeros": "pytorch_model-00001-of-00002.bin",
+ "model.layers.9.self_attn.v_proj.scales": "pytorch_model-00001-of-00002.bin",
+ "model.norm.weight": "pytorch_model-00002-of-00002.bin"
+ }
+}
diff --git a/triton_models/tokenizer/special_tokens_map.json b/triton_models/tokenizer/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..9bfed7513d3b1b65445af10c4571256f4a19b290
--- /dev/null
+++ b/triton_models/tokenizer/special_tokens_map.json
@@ -0,0 +1,6 @@
+{
+ "bos_token": "",
+ "eos_token": "",
+ "pad_token": "",
+ "unk_token": ""
+}
diff --git a/triton_models/tokenizer/tokenization_internlm.py b/triton_models/tokenizer/tokenization_internlm.py
new file mode 100644
index 0000000000000000000000000000000000000000..b6a348959c94afcc41f02caacf47a8bf23078dca
--- /dev/null
+++ b/triton_models/tokenizer/tokenization_internlm.py
@@ -0,0 +1,242 @@
+# coding=utf-8
+# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
+#
+# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
+# and OPT implementations in this library. It has been modified from its
+# original forms to accommodate minor architectural differences compared
+# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tokenization classes for IntermLM."""
+import os
+from shutil import copyfile
+from typing import Any, Dict, List, Optional, Tuple
+
+import sentencepiece as spm
+
+from transformers.tokenization_utils import PreTrainedTokenizer
+from transformers.utils import logging
+
+
+logger = logging.get_logger(__name__)
+
+VOCAB_FILES_NAMES = {"vocab_file": "./tokenizer.model"}
+
+PRETRAINED_VOCAB_FILES_MAP = {}
+
+
+class InternLMTokenizer(PreTrainedTokenizer):
+ """
+ Construct a InternLM tokenizer. Based on byte-level Byte-Pair-Encoding.
+
+ Args:
+ vocab_file (`str`):
+ Path to the vocabulary file.
+ """
+
+ vocab_files_names = VOCAB_FILES_NAMES
+ pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
+ model_input_names = ["input_ids", "attention_mask"]
+ _auto_class = "AutoTokenizer"
+
+ def __init__(
+ self,
+ vocab_file,
+ unk_token="",
+ bos_token="",
+ eos_token="",
+ pad_token="",
+ sp_model_kwargs: Optional[Dict[str, Any]] = None,
+ add_bos_token=True,
+ add_eos_token=False,
+ decode_with_prefix_space=False,
+ clean_up_tokenization_spaces=False,
+ **kwargs,
+ ):
+ self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
+ super().__init__(
+ bos_token=bos_token,
+ eos_token=eos_token,
+ unk_token=unk_token,
+ pad_token=pad_token,
+ clean_up_tokenization_spaces=clean_up_tokenization_spaces,
+ **kwargs,
+ )
+ self.vocab_file = vocab_file
+ self.add_bos_token = add_bos_token
+ self.add_eos_token = add_eos_token
+ self.decode_with_prefix_space = decode_with_prefix_space
+ self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
+ self.sp_model.Load(vocab_file)
+ self._no_prefix_space_tokens = None
+
+ """ Initialisation"""
+
+ @property
+ def no_prefix_space_tokens(self):
+ if self._no_prefix_space_tokens is None:
+ vocab = self.convert_ids_to_tokens(list(range(self.vocab_size)))
+ self._no_prefix_space_tokens = {i for i, tok in enumerate(vocab) if not tok.startswith("▁")}
+ return self._no_prefix_space_tokens
+
+ @property
+ def vocab_size(self):
+ """Returns vocab size"""
+ return self.sp_model.get_piece_size()
+
+ @property
+ def bos_token_id(self) -> Optional[int]:
+ return self.sp_model.bos_id()
+
+ @property
+ def eos_token_id(self) -> Optional[int]:
+ return self.sp_model.eos_id()
+
+ def get_vocab(self):
+ """Returns vocab as a dict"""
+ vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
+ vocab.update(self.added_tokens_encoder)
+ return vocab
+
+ def _tokenize(self, text):
+ """Returns a tokenized string."""
+ return self.sp_model.encode(text, out_type=str)
+
+ def _convert_token_to_id(self, token):
+ """Converts a token (str) in an id using the vocab."""
+ return self.sp_model.piece_to_id(token)
+
+ def _convert_id_to_token(self, index):
+ """Converts an index (integer) in a token (str) using the vocab."""
+ token = self.sp_model.IdToPiece(index)
+ return token
+
+ def _maybe_add_prefix_space(self, tokens, decoded):
+ if tokens and tokens[0] not in self.no_prefix_space_tokens:
+ return " " + decoded
+ else:
+ return decoded
+
+ def convert_tokens_to_string(self, tokens):
+ """Converts a sequence of tokens (string) in a single string."""
+ current_sub_tokens = []
+ out_string = ""
+ prev_is_special = False
+ for token in tokens:
+ # make sure that special tokens are not decoded using sentencepiece model
+ if token in self.all_special_tokens:
+ if not prev_is_special:
+ out_string += " "
+ out_string += self.sp_model.decode(current_sub_tokens) + token
+ prev_is_special = True
+ current_sub_tokens = []
+ else:
+ current_sub_tokens.append(token)
+ prev_is_special = False
+ out_string += self.sp_model.decode(current_sub_tokens)
+ out_string = self.clean_up_tokenization(out_string)
+ out_string = self._maybe_add_prefix_space(tokens=tokens, decoded=out_string)
+ return out_string[1:]
+
+ def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]:
+ """
+ Save the vocabulary and special tokens file to a directory.
+
+ Args:
+ save_directory (`str`):
+ The directory in which to save the vocabulary.
+
+ Returns:
+ `Tuple(str)`: Paths to the files saved.
+ """
+ if not os.path.isdir(save_directory):
+ logger.error(f"Vocabulary path ({save_directory}) should be a directory")
+ return
+ out_vocab_file = os.path.join(
+ save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
+ )
+
+ if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file):
+ copyfile(self.vocab_file, out_vocab_file)
+ elif not os.path.isfile(self.vocab_file):
+ with open(out_vocab_file, "wb") as fi:
+ content_spiece_model = self.sp_model.serialized_model_proto()
+ fi.write(content_spiece_model)
+
+ return (out_vocab_file,)
+
+ def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
+ if self.add_bos_token:
+ bos_token_ids = [self.bos_token_id]
+ else:
+ bos_token_ids = []
+
+ output = bos_token_ids + token_ids_0
+
+ if token_ids_1 is not None:
+ output = output + token_ids_1
+
+ if self.add_eos_token:
+ output = output + [self.eos_token_id]
+
+ return output
+
+ def get_special_tokens_mask(
+ self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
+ ) -> List[int]:
+ """
+ Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
+ special tokens using the tokenizer `prepare_for_model` method.
+
+ Args:
+ token_ids_0 (`List[int]`):
+ List of IDs.
+ token_ids_1 (`List[int]`, *optional*):
+ Optional second list of IDs for sequence pairs.
+ already_has_special_tokens (`bool`, *optional*, defaults to `False`):
+ Whether or not the token list is already formatted with special tokens for the model.
+
+ Returns:
+ `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
+ """
+ if already_has_special_tokens:
+ return super().get_special_tokens_mask(
+ token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
+ )
+
+ if token_ids_1 is None:
+ return [1] + ([0] * len(token_ids_0)) + [1]
+ return [1] + ([0] * len(token_ids_0)) + [1, 1] + ([0] * len(token_ids_1)) + [1]
+
+ def create_token_type_ids_from_sequences(
+ self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
+ ) -> List[int]:
+ """
+ Create a mask from the two sequences passed to be used in a sequence-pair classification task. T5 does not make
+ use of token type ids, therefore a list of zeros is returned.
+
+ Args:
+ token_ids_0 (`List[int]`):
+ List of IDs.
+ token_ids_1 (`List[int]`, *optional*):
+ Optional second list of IDs for sequence pairs.
+
+ Returns:
+ `List[int]`: List of zeros.
+ """
+ eos = [self.eos_token_id]
+
+ if token_ids_1 is None:
+ return len(token_ids_0 + eos) * [0]
+ return len(token_ids_0 + eos + token_ids_1 + eos) * [0]
\ No newline at end of file
diff --git a/triton_models/tokenizer/tokenizer.model b/triton_models/tokenizer/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..24f4d0607b1f6a966a5d653bb255813638de0bec
--- /dev/null
+++ b/triton_models/tokenizer/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aab622d98c98677a1a51f969e25765154487bf3e85c7819db105db2fcacba83f
+size 1658691
diff --git a/triton_models/tokenizer/tokenizer.py b/triton_models/tokenizer/tokenizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..966f24148ec4ad62231b579a5e87519de59a3611
--- /dev/null
+++ b/triton_models/tokenizer/tokenizer.py
@@ -0,0 +1,291 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import json
+import os.path as osp
+from typing import Optional, Sequence, Union
+
+import torch
+
+
+class SentencePieceTokenizer:
+ """Tokenizer of sentencepiece.
+
+ Args:
+ model_file (str): the path of the tokenizer model
+ """
+
+ def __init__(self, model_file: str):
+ from sentencepiece import SentencePieceProcessor
+ self.model = SentencePieceProcessor(model_file=model_file)
+ self._no_prefix_space_tokens = None
+
+ @property
+ def vocab_size(self):
+ """vocabulary size."""
+ return self.model.vocab_size()
+
+ @property
+ def bos_token_id(self):
+ """begine of the sentence token id."""
+ return self.model.bos_id()
+
+ @property
+ def eos_token_id(self):
+ """end of the sentence token id."""
+ return self.model.eos_id()
+
+ @property
+ def no_prefix_space_tokens(self):
+ """tokens without prefix space."""
+ if self._no_prefix_space_tokens is None:
+ vocab = self.model.IdToPiece(list(range(self.vocab_size)))
+ self._no_prefix_space_tokens = {
+ i
+ for i, tok in enumerate(vocab) if not tok.startswith('▁')
+ }
+ return self._no_prefix_space_tokens
+
+ def _maybe_add_prefix_space(self, tokens, decoded):
+ """maybe add prefix space for incremental decoding."""
+ if len(tokens) and tokens[0] not in self.no_prefix_space_tokens:
+ return ' ' + decoded
+ else:
+ return decoded
+
+ def encode(self, s: str):
+ """Tokenize a prompt.
+
+ Args:
+ s (str): a prompt
+ Returns:
+ list[int]: token ids
+ """
+ add_bos = False
+ add_eos = False
+ if s.find('') != -1:
+ s = s.replace('', '')
+ add_bos = True
+ if s == '':
+ s = ''
+ add_eos = True
+ return self.model.Encode(s, add_bos=add_bos, add_eos=add_eos)
+
+ def decode(self, t: Sequence[int], offset: Optional[int] = None):
+ """De-tokenize.
+
+ Args:
+ t (List[int]): a list of token ids
+ offset (int): for incrementally decoding. Default to None, which
+ means not applied.
+ Returns:
+ str: text of decoding tokens
+ """
+ if isinstance(t, torch.Tensor):
+ t = t.tolist()
+ t = t[offset:]
+ out_string = self.model.Decode(t)
+ if offset:
+ out_string = self._maybe_add_prefix_space(t, out_string)
+ return out_string
+
+ def __call__(self, s: Union[str, Sequence[str]]):
+ """Tokenize prompts.
+
+ Args:
+ s (str): prompts
+ Returns:
+ list[int]: token ids
+ """
+ import addict
+ add_bos = False
+ add_eos = False
+
+ input_ids = self.model.Encode(s, add_bos=add_bos, add_eos=add_eos)
+ return addict.Addict(input_ids=input_ids)
+
+
+class HuggingFaceTokenizer:
+ """Tokenizer of sentencepiece.
+
+ Args:
+ model_dir (str): the directory of the tokenizer model
+ """
+
+ def __init__(self, model_dir: str):
+ from transformers import (AutoTokenizer, CodeLlamaTokenizerFast,
+ LlamaTokenizer, LlamaTokenizerFast)
+ model_file = osp.join(model_dir, 'tokenizer.model')
+ backend_tokenizer_file = osp.join(model_dir, 'tokenizer.json')
+ model_file_exists = osp.exists(model_file)
+ if not osp.exists(backend_tokenizer_file) and model_file_exists:
+ print('WARNING: Can not find tokenizer.json. '
+ 'It may take long time to initialize the tokenizer.')
+ self.model = AutoTokenizer.from_pretrained(model_dir,
+ trust_remote_code=True)
+ self.need_padding = type(self.model) in [
+ LlamaTokenizer, LlamaTokenizerFast, CodeLlamaTokenizerFast
+ ]
+ self._no_prefix_space_tokens = None
+ # save tokenizer.json to reuse
+ if not osp.exists(backend_tokenizer_file) and model_file_exists:
+ if hasattr(self.model, 'backend_tokenizer'):
+ self.model.backend_tokenizer.save(backend_tokenizer_file)
+
+ if self.model.eos_token_id is None:
+ generation_config_file = osp.join(model_dir,
+ 'generation_config.json')
+ with open(generation_config_file, 'r') as f:
+ cfg = json.load(f)
+ self.model.eos_token_id = cfg['eos_token_id']
+
+ @property
+ def vocab_size(self):
+ """vocabulary size."""
+ return self.model.vocab_size
+
+ @property
+ def bos_token_id(self):
+ """begine of the sentence token id."""
+ return self.model.bos_token_id
+
+ @property
+ def eos_token_id(self):
+ """end of the sentence token id."""
+ return self.model.eos_token_id
+
+ @property
+ def no_prefix_space_tokens(self):
+ """tokens without prefix space."""
+ if self._no_prefix_space_tokens is None:
+ vocab = self.model.convert_ids_to_tokens(
+ list(range(self.vocab_size)))
+ self._no_prefix_space_tokens = {
+ i
+ for i, tok in enumerate(vocab) if not tok.startswith('▁')
+ }
+ return self._no_prefix_space_tokens
+
+ def _maybe_add_prefix_space(self, tokens, decoded):
+ """maybe add prefix space for incremental decoding."""
+ if self.need_padding and len(
+ tokens) and tokens[0] not in self.no_prefix_space_tokens:
+ return ' ' + decoded
+ else:
+ return decoded
+
+ def encode(self, s: str):
+ """Tokenize a prompt.
+
+ Args:
+ s (str): a prompt
+ Returns:
+ list[int]: token ids
+ """
+ add_special_tokens = False
+ if s.find('') != -1:
+ s = s.replace('', '')
+ if s == '':
+ s = ''
+ if len(s) == 0:
+ add_special_tokens = True
+ return self.model.encode(s, add_special_tokens=add_special_tokens)
+
+ def decode(self, t: Sequence[int], offset: Optional[int] = None):
+ """De-tokenize.
+
+ Args:
+ t (List[int]): a list of token ids
+ offset (int): for incrementally decoding. Default to None, which
+ means not applied.
+ Returns:
+ str: text of decoding tokens
+ """
+ skip_special_tokens = True
+ t = t[offset:]
+ out_string = self.model.decode(t,
+ skip_special_tokens=skip_special_tokens)
+ if offset:
+ out_string = self._maybe_add_prefix_space(t, out_string)
+ return out_string
+
+ def __call__(self, s: Union[str, Sequence[str]]):
+ """Tokenize prompts.
+
+ Args:
+ s (str): prompts
+ Returns:
+ list[int]: token ids
+ """
+ add_special_tokens = False
+ return self.model(s, add_special_tokens=add_special_tokens)
+
+
+class Tokenizer:
+ """Tokenize prompts or de-tokenize tokens into texts.
+
+ Args:
+ model_file (str): the path of the tokenizer model
+ """
+
+ def __init__(self, model_file: str):
+ if model_file.endswith('.model'):
+ model_folder = osp.split(model_file)[0]
+ else:
+ model_folder = model_file
+ model_file = osp.join(model_folder, 'tokenizer.model')
+ tokenizer_config_file = osp.join(model_folder, 'tokenizer_config.json')
+
+ model_file_exists = osp.exists(model_file)
+ config_exists = osp.exists(tokenizer_config_file)
+ use_hf_model = config_exists or not model_file_exists
+
+ if not use_hf_model:
+ self.model = SentencePieceTokenizer(model_file)
+ else:
+ self.model = HuggingFaceTokenizer(model_folder)
+
+ @property
+ def vocab_size(self):
+ """vocabulary size."""
+ return self.model.vocab_size
+
+ @property
+ def bos_token_id(self):
+ """begine of the sentence token id."""
+ return self.model.bos_token_id
+
+ @property
+ def eos_token_id(self):
+ """end of the sentence token id."""
+ return self.model.eos_token_id
+
+ def encode(self, s: str):
+ """Tokenize a prompt.
+
+ Args:
+ s (str): a prompt
+ Returns:
+ list[int]: token ids
+ """
+ return self.model.encode(s)
+
+ def decode(self, t: Sequence[int], offset: Optional[int] = None):
+ """De-tokenize.
+
+ Args:
+ t (List[int]): a list of token ids
+ offset (int): for incrementally decoding. Default to None, which
+ means not applied.
+ Returns:
+ str: text of decoding tokens
+ """
+ return self.model.decode(t, offset)
+
+ def __call__(self, s: Union[str, Sequence[str]]):
+ """Tokenize prompts.
+
+ Args:
+ s (str): prompts
+ Returns:
+ list[int]: token ids
+ """
+ return self.model(s)
diff --git a/triton_models/tokenizer/tokenizer_config.json b/triton_models/tokenizer/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..857ab9eccafd9682a491c525f5ebdc206c607de7
--- /dev/null
+++ b/triton_models/tokenizer/tokenizer_config.json
@@ -0,0 +1,15 @@
+{
+ "auto_map": {
+ "AutoTokenizer": [
+ "tokenization_internlm.InternLMTokenizer",
+ null
+ ]
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "tokenizer_class": "InternLMTokenizer",
+ "unk_token": ""
+}
diff --git a/triton_models/weights/config.ini b/triton_models/weights/config.ini
new file mode 100644
index 0000000000000000000000000000000000000000..022f810b8bec1d12989ef54a6865e4fef8d76b47
--- /dev/null
+++ b/triton_models/weights/config.ini
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:799897e139626e09f107e0c0b1f8894949246c1f2cd943b997d860d2727c6e19
+size 533
diff --git a/triton_models/weights/layers.0.attention.w_qkv.0.qweight b/triton_models/weights/layers.0.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..c407e074b81f23ed03edd35bf6b55a972816b1ad
--- /dev/null
+++ b/triton_models/weights/layers.0.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d5bebaea97ddc342b78e8f236b9996d30d473c033a7c3d6021b874ba0cd8b210
+size 39321600
diff --git a/triton_models/weights/layers.0.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.0.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..5e7797afb6c372865ce552e5541de8fd614d3e7a
--- /dev/null
+++ b/triton_models/weights/layers.0.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b32a37add95e31b362daaef2a5656a920384986da025a0e6b5f073437c5b7693
+size 2457600
diff --git a/triton_models/weights/layers.0.attention.wo.0.qweight b/triton_models/weights/layers.0.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..88778859b9d6dded8838c26a472ad58c99b1f0df
--- /dev/null
+++ b/triton_models/weights/layers.0.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:13858a78f6d336c6014c5fc2f9102ce0f48ca7b5acba5897ea7eb7c0d13984ec
+size 13107200
diff --git a/triton_models/weights/layers.0.attention.wo.0.scales_zeros b/triton_models/weights/layers.0.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..bf5e99516fede30b59afbab3bd00c56c037155e6
--- /dev/null
+++ b/triton_models/weights/layers.0.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b78089c5d8a92cc75872f2be9b2971f0288e36be233ad2c00a5f12c5d08e2420
+size 819200
diff --git a/triton_models/weights/layers.0.attention_norm.weight b/triton_models/weights/layers.0.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..6c02421be31ab3680ac2e23fe620cce2875fcc1a
--- /dev/null
+++ b/triton_models/weights/layers.0.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:629d4397992d92fe8f54b91b4728c7d243b6c799ea4b045c5e6809dd0d9fa942
+size 10240
diff --git a/triton_models/weights/layers.0.feed_forward.w13.0.qweight b/triton_models/weights/layers.0.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..6a889fe9148ff18089d839cde2d069a02925231e
--- /dev/null
+++ b/triton_models/weights/layers.0.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd7d6275d0ab59f3b2dd3b93b053fd487882f2c9cd3ffe809eca56412ec36ca1
+size 70778880
diff --git a/triton_models/weights/layers.0.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.0.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..1a5bd6a80b05be2b9bd2be80e3e723be67379291
--- /dev/null
+++ b/triton_models/weights/layers.0.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d51874fb7b4c32bc95ef39bd6bbb2f001090bf558c5232de51c0c1875911f0a
+size 4423680
diff --git a/triton_models/weights/layers.0.feed_forward.w2.0.qweight b/triton_models/weights/layers.0.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..355dd8b4e93b60e10391ba27fcbdaa3b79bf18db
--- /dev/null
+++ b/triton_models/weights/layers.0.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0590b8fa80842f609df6469552bd78e35b65a023db36b4829894c1d1e5d50f41
+size 35389440
diff --git a/triton_models/weights/layers.0.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.0.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..e19d29cb05aa4ae1879437325a65a72f44acbeb5
--- /dev/null
+++ b/triton_models/weights/layers.0.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c02e35a035e2ce3df607ec97f11b6ae0650a43b7a8fdd1fa802b3c1e23e86bd
+size 2211840
diff --git a/triton_models/weights/layers.0.ffn_norm.weight b/triton_models/weights/layers.0.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..dc5eea605129c1f451fffef0ba77b0dd1eb40e2c
--- /dev/null
+++ b/triton_models/weights/layers.0.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85f577bde93009bbfb7f1c14c3bc81bb9c5353f390c445e8eda437ecbe4fa101
+size 10240
diff --git a/triton_models/weights/layers.1.attention.w_qkv.0.qweight b/triton_models/weights/layers.1.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..a6e3461aecc04d4ec27726c7756320844df73ab1
--- /dev/null
+++ b/triton_models/weights/layers.1.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:05770c30f6a20b31ca6bd5f77e0895ee523872623792c4fe151e990e4085e240
+size 39321600
diff --git a/triton_models/weights/layers.1.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.1.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..8d75c7a51be722ee49526d3d2bb804361fcc4ce2
--- /dev/null
+++ b/triton_models/weights/layers.1.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a7e342cd5208649ca929dd799fe89e114e7a5429abbc66ca86b952ba80bf8b4d
+size 2457600
diff --git a/triton_models/weights/layers.1.attention.wo.0.qweight b/triton_models/weights/layers.1.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..e61754545b261ca824ef22c3fcafabfd5cb88f63
--- /dev/null
+++ b/triton_models/weights/layers.1.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1842b2a0a76be6ab1e8ad290ae9031eb547ac78e35e1bfc453176e13e70c255
+size 13107200
diff --git a/triton_models/weights/layers.1.attention.wo.0.scales_zeros b/triton_models/weights/layers.1.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..82620d6c0d29962d64a3f0ec3df8b39eb70088e5
--- /dev/null
+++ b/triton_models/weights/layers.1.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fdbc7d7720814717801c80f08c54f1059effb10cd4683f94a351de23f6ea890b
+size 819200
diff --git a/triton_models/weights/layers.1.attention_norm.weight b/triton_models/weights/layers.1.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..70e3c92f7adc8b18d5705fe0ffb33c6771b1f8d5
--- /dev/null
+++ b/triton_models/weights/layers.1.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9bb60ee32fe1687f47b2280c4af54a57990528a4a59fb2bb7782f28fd5f424da
+size 10240
diff --git a/triton_models/weights/layers.1.feed_forward.w13.0.qweight b/triton_models/weights/layers.1.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..5c4279d400aae939fc4e8c9737e642bcd917d338
--- /dev/null
+++ b/triton_models/weights/layers.1.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aae50ea32653bb6461c0e338082e98db2107b4a3d471934e4f3354410a6bcbf0
+size 70778880
diff --git a/triton_models/weights/layers.1.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.1.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..78d81f68e3f80e382374bc5002ba561eea394b55
--- /dev/null
+++ b/triton_models/weights/layers.1.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c0a782e19f6392ff88942dfe687137c14da1726dce104d5b4f6abe646397885f
+size 4423680
diff --git a/triton_models/weights/layers.1.feed_forward.w2.0.qweight b/triton_models/weights/layers.1.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..61475635b724a676f4136156dbfdfa680e9a2f5c
--- /dev/null
+++ b/triton_models/weights/layers.1.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5d2e652e54d966f5b8b7b405dbdfc66f1f1460489c3ba5fc76c1ee79844abc3
+size 35389440
diff --git a/triton_models/weights/layers.1.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.1.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..0db325d93b604a72c82a2d6695153f16af2931cd
--- /dev/null
+++ b/triton_models/weights/layers.1.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:33179e4b3a6d851ab0429f9da2244c69fdc9c18a395c19d88b561cba54cb9cd2
+size 2211840
diff --git a/triton_models/weights/layers.1.ffn_norm.weight b/triton_models/weights/layers.1.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..12afcd7b3c81404badb70c2f20d7e313ed0fbe75
--- /dev/null
+++ b/triton_models/weights/layers.1.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1c2406535fe735c7c1360214e79ab96071cbee60093f43571fed3247812aa788
+size 10240
diff --git a/triton_models/weights/layers.10.attention.w_qkv.0.qweight b/triton_models/weights/layers.10.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..2077f61ed818c323fb5a05912103784adcee2263
--- /dev/null
+++ b/triton_models/weights/layers.10.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:923f2874f171dc9d43c42eb966a312ac14153dd2b35ebdb63f583b8b57fa784c
+size 39321600
diff --git a/triton_models/weights/layers.10.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.10.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..3f9afc44410ba9483ddb72d98904f4419dc951a3
--- /dev/null
+++ b/triton_models/weights/layers.10.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e8a1a38ec040e7c9424a762d10cb76e06f64942ba66655dd5dabfc383d0bd52e
+size 2457600
diff --git a/triton_models/weights/layers.10.attention.wo.0.qweight b/triton_models/weights/layers.10.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..f728494a57a2cdd19602798568cdfdc15de5a9ed
--- /dev/null
+++ b/triton_models/weights/layers.10.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc2e6cd2b5022634b2d298a73b0286994a6df46567950917be5b075f29968d9d
+size 13107200
diff --git a/triton_models/weights/layers.10.attention.wo.0.scales_zeros b/triton_models/weights/layers.10.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..347232491e07473e4fc6da382551e194081ea209
--- /dev/null
+++ b/triton_models/weights/layers.10.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9fe0accad6e1e3df44cff0bd6e98604ea5f8036d42faa2ff6d33de628d4a748e
+size 819200
diff --git a/triton_models/weights/layers.10.attention_norm.weight b/triton_models/weights/layers.10.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..210066419975b644c55837b5d74442cea8cf4ac5
--- /dev/null
+++ b/triton_models/weights/layers.10.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae31b68e2104223f00cbf2d8dd4eddd4350ecdbf9328e03723c11f42193f6c9f
+size 10240
diff --git a/triton_models/weights/layers.10.feed_forward.w13.0.qweight b/triton_models/weights/layers.10.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..22e386aab56e8c0a6a7c42e26db588ac092b2e64
--- /dev/null
+++ b/triton_models/weights/layers.10.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bec6f7110658aeb4e5232055accc5dc6e643b1e3a646eb3cb37098210ecc73cc
+size 70778880
diff --git a/triton_models/weights/layers.10.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.10.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..0cb3525492e9e5a27d23ce1c4a739d234d26bf5f
--- /dev/null
+++ b/triton_models/weights/layers.10.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:177f6a7829d7e1f595a46873b3856b93d6e3e2cd42dc90c72c5c144aaed3cbe3
+size 4423680
diff --git a/triton_models/weights/layers.10.feed_forward.w2.0.qweight b/triton_models/weights/layers.10.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..3b27aac9ff4140c9bf35736cfb08d9766e778b09
--- /dev/null
+++ b/triton_models/weights/layers.10.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35ef7172b42eb5e0f35f5de5af321d315a82209152e65242a5c61759fcb84fbb
+size 35389440
diff --git a/triton_models/weights/layers.10.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.10.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..f13808d8d8ff7f50af33195c7c3709813a477925
--- /dev/null
+++ b/triton_models/weights/layers.10.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6453d9f08f6a09cf1e6c217e8b3ae70f527c11df1a37d93452f334da10f5afe0
+size 2211840
diff --git a/triton_models/weights/layers.10.ffn_norm.weight b/triton_models/weights/layers.10.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..f973d9a3751f571eb51028ce522571fd5b6896e5
--- /dev/null
+++ b/triton_models/weights/layers.10.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad1ff3009d2b1f547c90e33cb7ff634b6fec18e2a0a740ee1cc65342606dcabc
+size 10240
diff --git a/triton_models/weights/layers.11.attention.w_qkv.0.qweight b/triton_models/weights/layers.11.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..10822cbc2b5bed3917608ec8865eff435cbe6892
--- /dev/null
+++ b/triton_models/weights/layers.11.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dcdbacf395ba4b3b114ff215320c69726dce204c4b035a26e21afea020e58b6d
+size 39321600
diff --git a/triton_models/weights/layers.11.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.11.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..34ece2c4c43bbc0dc20bb27fb23dbde0b875d73f
--- /dev/null
+++ b/triton_models/weights/layers.11.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:297d320a54d2fe4a4a5c10597f4c719e2742a8cc1061bdd4d1f00dca94b37e65
+size 2457600
diff --git a/triton_models/weights/layers.11.attention.wo.0.qweight b/triton_models/weights/layers.11.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..5ca4331a0956d15596997cc4a8eb24205043b158
--- /dev/null
+++ b/triton_models/weights/layers.11.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3467c419b69910c177e0f9c9875cdf0934394816d2acfea561be60b431323531
+size 13107200
diff --git a/triton_models/weights/layers.11.attention.wo.0.scales_zeros b/triton_models/weights/layers.11.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..c03abc6455c3f891573ffdd617b16745a5387f78
--- /dev/null
+++ b/triton_models/weights/layers.11.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8e570d82587a10ba243a5391b803567428ef5892bd90a4ee6863125f2b28337
+size 819200
diff --git a/triton_models/weights/layers.11.attention_norm.weight b/triton_models/weights/layers.11.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..976895169f72353187ba768f26fd3609ad4cb705
--- /dev/null
+++ b/triton_models/weights/layers.11.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fcfeb433655def177b57217a5207d404a0217fc2473fa35d8529c2035be40739
+size 10240
diff --git a/triton_models/weights/layers.11.feed_forward.w13.0.qweight b/triton_models/weights/layers.11.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..e69134fe971e4b32cb947266da41573e96ccce18
--- /dev/null
+++ b/triton_models/weights/layers.11.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce374b360fec6334722bbcb06dcd26454235c1d7c937c0edf71c23b583c6022f
+size 70778880
diff --git a/triton_models/weights/layers.11.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.11.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..4424b7c67ded432dc61290312f8c2e93125b6be5
--- /dev/null
+++ b/triton_models/weights/layers.11.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f3a9f0fcc54a4c993aa6a4921938a4c21661b96ff58649df0e8768245d03adb
+size 4423680
diff --git a/triton_models/weights/layers.11.feed_forward.w2.0.qweight b/triton_models/weights/layers.11.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..749ad1d752556b33ba435dc85fc7c5bd42e61a27
--- /dev/null
+++ b/triton_models/weights/layers.11.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff38e3d1217a9b46f24dca12fa15eabe1cd2c7376b275fe49b3fef33d3e8de55
+size 35389440
diff --git a/triton_models/weights/layers.11.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.11.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..0e1cafadb4c15b550406ada2dc2b041866d64ef0
--- /dev/null
+++ b/triton_models/weights/layers.11.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e83049eacb191dd6e0f06fd6ff9b97661bc5871ef177e497a2a877cf82da399e
+size 2211840
diff --git a/triton_models/weights/layers.11.ffn_norm.weight b/triton_models/weights/layers.11.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..d8a6c2d9975919d0d585d78749b807ff3d36e2c0
--- /dev/null
+++ b/triton_models/weights/layers.11.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ad08bb31e5569e8bb020fd7f08e4de2155f6678dd3b515e5f628d4121839662
+size 10240
diff --git a/triton_models/weights/layers.12.attention.w_qkv.0.qweight b/triton_models/weights/layers.12.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..a50fa09a9917c2c5c87189e2584ca904d1fa7548
--- /dev/null
+++ b/triton_models/weights/layers.12.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d99e526632f8ca547dd14968bde9a6eb12a26e17449f7623b6f2f90a8f1c79e
+size 39321600
diff --git a/triton_models/weights/layers.12.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.12.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..e26f4300ca30f97f96379d461d651e9059de7ccc
--- /dev/null
+++ b/triton_models/weights/layers.12.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4baef4d616cb6b9f9cd8e03e79560a206123de1674c3c7a4c2e742eecf83e912
+size 2457600
diff --git a/triton_models/weights/layers.12.attention.wo.0.qweight b/triton_models/weights/layers.12.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..15ffcba5fd1ea539bcc2d1aa97708f21b8743503
--- /dev/null
+++ b/triton_models/weights/layers.12.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f41cd1e3cd3971e085090fe4ac21061d707a7a006903af8c3eb7184e6c4d9e02
+size 13107200
diff --git a/triton_models/weights/layers.12.attention.wo.0.scales_zeros b/triton_models/weights/layers.12.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..b336c00cc213299a3588d265f684d64bbcac5d7c
--- /dev/null
+++ b/triton_models/weights/layers.12.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff64169381b5f2d1baebfbafef969ce337ad62104d2bf16d351e71b4c4db195f
+size 819200
diff --git a/triton_models/weights/layers.12.attention_norm.weight b/triton_models/weights/layers.12.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..f4d4967b418543bacde66b25cda61d8ac90ca2df
--- /dev/null
+++ b/triton_models/weights/layers.12.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25fc08c0b79d0710b997375569aa98a6d07c906c455c7f3d04163c42d5530b7d
+size 10240
diff --git a/triton_models/weights/layers.12.feed_forward.w13.0.qweight b/triton_models/weights/layers.12.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..e3f4511b45c8de7c947ab8cf361149f966935c64
--- /dev/null
+++ b/triton_models/weights/layers.12.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:91cbe92c964e2e8b42dfa2b74b3e6e63095294dd24bc7dd03bb120f24dfe9e05
+size 70778880
diff --git a/triton_models/weights/layers.12.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.12.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..92c6a39147500b310d519abd974aa87d63f3c176
--- /dev/null
+++ b/triton_models/weights/layers.12.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:570ca7b1f776942d1bc45cfc22eea9d0fd0f9bea227ca5bbaa031e62b87a25a4
+size 4423680
diff --git a/triton_models/weights/layers.12.feed_forward.w2.0.qweight b/triton_models/weights/layers.12.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..102c2e69c82a3a63782e12ad8bf8b9aa504cf7f2
--- /dev/null
+++ b/triton_models/weights/layers.12.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e14fe9d19edef21ca5614bacca191ffd99b7370e646528f7308f78f1c451abd4
+size 35389440
diff --git a/triton_models/weights/layers.12.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.12.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..46ff64782e33a0d29f5fe3ff5572c43dce4ba95d
--- /dev/null
+++ b/triton_models/weights/layers.12.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20536e9792350d1f49407574d078364a05c251e02cc5fb0af1a9b28af87fae5b
+size 2211840
diff --git a/triton_models/weights/layers.12.ffn_norm.weight b/triton_models/weights/layers.12.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..987fd926755afec5a5fea4ad50104c0b311de17e
--- /dev/null
+++ b/triton_models/weights/layers.12.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d960dc35ec7927714bc03787ccc2c5fc43638f8cd0c5c02b477b7c84c510337
+size 10240
diff --git a/triton_models/weights/layers.13.attention.w_qkv.0.qweight b/triton_models/weights/layers.13.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..3f3deada8b1dcf5ac5bfdc0c7d445f7a4208d7ee
--- /dev/null
+++ b/triton_models/weights/layers.13.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3041f1c53bdacacc76a0999466f4529ee596cfb6753e2b19616d16381fbb1dc8
+size 39321600
diff --git a/triton_models/weights/layers.13.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.13.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..af55963d708419f9ee612d4fbfd63be3bbf5e020
--- /dev/null
+++ b/triton_models/weights/layers.13.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3895a5fec8fc688c350470b6a12e60804c6e333e78a26706c6397f1c772126bc
+size 2457600
diff --git a/triton_models/weights/layers.13.attention.wo.0.qweight b/triton_models/weights/layers.13.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..48cec662eb4cf361cbc0b033f8f89a9c3bacee26
--- /dev/null
+++ b/triton_models/weights/layers.13.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9cee8d4f7145a1978163e4ee906cc66ebcd1363eb61b028af369f38f290eee7b
+size 13107200
diff --git a/triton_models/weights/layers.13.attention.wo.0.scales_zeros b/triton_models/weights/layers.13.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..1866d9a0846391beaf9b7daef06218fa0f6e0999
--- /dev/null
+++ b/triton_models/weights/layers.13.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad84e0758736a5c6a3ded59886d0c7d446085c086525dbc756ff9cd30c7987d2
+size 819200
diff --git a/triton_models/weights/layers.13.attention_norm.weight b/triton_models/weights/layers.13.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..8b4c5bff241b72b615d861ce37b171c9656fec32
--- /dev/null
+++ b/triton_models/weights/layers.13.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8fad2bc1e132ed5647900357684f950f17634681951d0529fb4c766b8f7f67fc
+size 10240
diff --git a/triton_models/weights/layers.13.feed_forward.w13.0.qweight b/triton_models/weights/layers.13.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..316fa1f269a8a3465f882782c84b7b68a8ae9bb5
--- /dev/null
+++ b/triton_models/weights/layers.13.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57eeee94e4aafd4e471456581ed1da9230d115978447d30c08a53461f98c5dfa
+size 70778880
diff --git a/triton_models/weights/layers.13.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.13.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..984e33f10194cac9994068cb03fe3078d74ada9d
--- /dev/null
+++ b/triton_models/weights/layers.13.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc402db150c919b677898b956160ff9ba41342c724739eb898ac49c09ea24abb
+size 4423680
diff --git a/triton_models/weights/layers.13.feed_forward.w2.0.qweight b/triton_models/weights/layers.13.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..84ecfae78d8a426df4ef3e72380cbb3ffba77adc
--- /dev/null
+++ b/triton_models/weights/layers.13.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c738679bc502eeb5b884a88f8ef260e61905f8c3e468476084ef71375030af58
+size 35389440
diff --git a/triton_models/weights/layers.13.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.13.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..421c63fa2202d825fb6202c2e16d3f7a2fb78f28
--- /dev/null
+++ b/triton_models/weights/layers.13.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a48fc696cf022ef72392c6154473654bc880ab7511f9bbd257ea9e5a9824899b
+size 2211840
diff --git a/triton_models/weights/layers.13.ffn_norm.weight b/triton_models/weights/layers.13.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..08d5dd3bde59c5329cc7e17ba583287907a6b056
--- /dev/null
+++ b/triton_models/weights/layers.13.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c702c73584fc79f10457cfa58e6b525e0be21036bcb4b8ffd610d546ffa1797d
+size 10240
diff --git a/triton_models/weights/layers.14.attention.w_qkv.0.qweight b/triton_models/weights/layers.14.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..789c8c825a489f376465806cd875190f2ca7cb43
--- /dev/null
+++ b/triton_models/weights/layers.14.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f31344add1fce8379163b366be52f8864ee9e0f4db827ac130522b45fce488d
+size 39321600
diff --git a/triton_models/weights/layers.14.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.14.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..938738b128c5ad255bd99f995bfab05ed414bbef
--- /dev/null
+++ b/triton_models/weights/layers.14.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:78200b2ee0090ca544f4fae0e6b64e2470b4a6ababd78ff9da252c467735e792
+size 2457600
diff --git a/triton_models/weights/layers.14.attention.wo.0.qweight b/triton_models/weights/layers.14.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..4122a93dbef0431a64f9c48732a08840ecf0fc44
--- /dev/null
+++ b/triton_models/weights/layers.14.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea4e690093fc917acf565a0dbe45de12e5ad9022318ffa1df040db6df1357a3d
+size 13107200
diff --git a/triton_models/weights/layers.14.attention.wo.0.scales_zeros b/triton_models/weights/layers.14.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..a189ac56d3a702a72ab9ca6e0b33d875ab1006fb
--- /dev/null
+++ b/triton_models/weights/layers.14.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:590a2348c8e8f533a2147da6c95c29fe53deeaf88fa9bc28ebd8de67144684d8
+size 819200
diff --git a/triton_models/weights/layers.14.attention_norm.weight b/triton_models/weights/layers.14.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..23d98fd73157db3608e13ee996d27d2aea610f75
--- /dev/null
+++ b/triton_models/weights/layers.14.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f0510821df811e0e06e147c4e1a7943670051bc1e0e1e4ef403faf2ce6c14099
+size 10240
diff --git a/triton_models/weights/layers.14.feed_forward.w13.0.qweight b/triton_models/weights/layers.14.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..70f0be1606e057cbb41943d56237774478b209a2
--- /dev/null
+++ b/triton_models/weights/layers.14.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:81229afc2ff36fe64677eabf737e4968f00ba1b278cb80486d1613c0e4f8231a
+size 70778880
diff --git a/triton_models/weights/layers.14.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.14.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..ccbb6c5f23f0d768c77e98eb88b6505b2fd11857
--- /dev/null
+++ b/triton_models/weights/layers.14.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:48fae147b15e1d51a8a8c192c8ccb4dcb599c91218a05c016f150d2af7f0a7cd
+size 4423680
diff --git a/triton_models/weights/layers.14.feed_forward.w2.0.qweight b/triton_models/weights/layers.14.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..08de3ee31bb6124090ddb4b9972d541afd2ed521
--- /dev/null
+++ b/triton_models/weights/layers.14.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4facde3a0fa054667e9a966d1685d54b6ae3e639faffd95978c514dbcf2e538
+size 35389440
diff --git a/triton_models/weights/layers.14.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.14.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..2a40f708a73fa41b453cbbe64950d93570545edd
--- /dev/null
+++ b/triton_models/weights/layers.14.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d4256062a13bff5ccca05fda7ac591b66dd234e60d7a2c91c81d562ea81b4a68
+size 2211840
diff --git a/triton_models/weights/layers.14.ffn_norm.weight b/triton_models/weights/layers.14.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..08b8ef7f86da02adb4462280cb2c47345e64b409
--- /dev/null
+++ b/triton_models/weights/layers.14.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d7a4112d5dafa698da08f424b01f65018c47ec884503420ad17b186054c279b3
+size 10240
diff --git a/triton_models/weights/layers.15.attention.w_qkv.0.qweight b/triton_models/weights/layers.15.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..90321ba964bfd42419d52ae841072e74b03618c2
--- /dev/null
+++ b/triton_models/weights/layers.15.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8087d80732efa9ece12980d805a4c34597bc0878a8e1e30d2b689561f1416b0
+size 39321600
diff --git a/triton_models/weights/layers.15.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.15.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..9502b96471ac94759d022c41d2ab82aac4b83c04
--- /dev/null
+++ b/triton_models/weights/layers.15.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:92988fc162d4c396e6b3ac1087980cd14f8ff1f5aeb046bc374ec2463e34a5f2
+size 2457600
diff --git a/triton_models/weights/layers.15.attention.wo.0.qweight b/triton_models/weights/layers.15.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..c4d684765a866bf2b1dde9b657d64772fc5492cb
--- /dev/null
+++ b/triton_models/weights/layers.15.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9331eb642115aa1630fe5b4efcb42a550e88bc903447c50ea4f0e5286d1e6cc3
+size 13107200
diff --git a/triton_models/weights/layers.15.attention.wo.0.scales_zeros b/triton_models/weights/layers.15.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..65c4ea17fedca5de57aa7847a310d276cca4241b
--- /dev/null
+++ b/triton_models/weights/layers.15.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:61b1458be7d36e70de70642e861d4df5fc9f37434f973c5a744cc9dcac7bf950
+size 819200
diff --git a/triton_models/weights/layers.15.attention_norm.weight b/triton_models/weights/layers.15.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..6ede9e37599970a5c7ecfcc9d287b99831bbf60e
--- /dev/null
+++ b/triton_models/weights/layers.15.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d29ce9c397ef8f8c49fe8a05c03a0967005dcff9ed4f78935861894aa1335cb9
+size 10240
diff --git a/triton_models/weights/layers.15.feed_forward.w13.0.qweight b/triton_models/weights/layers.15.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..eed127531de85e18d34c8d1fcf25aecc3ddf97b9
--- /dev/null
+++ b/triton_models/weights/layers.15.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c773c950ea7c114b717afaa0fa06761c75b5bcbcdeb2f1a4b58de4cc9a66f9f
+size 70778880
diff --git a/triton_models/weights/layers.15.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.15.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..fcb7900dfce9ad18fd71c23fd38aa3def58687a8
--- /dev/null
+++ b/triton_models/weights/layers.15.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af44ddcbcff1dcea8d2ba8baaae58164c121cfd4e6b4d62e86d509d3ec15a805
+size 4423680
diff --git a/triton_models/weights/layers.15.feed_forward.w2.0.qweight b/triton_models/weights/layers.15.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..b351bc39fe079752e4dddf9c9444e018cb62f489
--- /dev/null
+++ b/triton_models/weights/layers.15.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ead0417435d48a3d60f946c5756d8a8a227d2292a387db6e34846a8ef4fcdc25
+size 35389440
diff --git a/triton_models/weights/layers.15.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.15.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..f69d7f9f80aaa446a204433e1c962863b12bac8f
--- /dev/null
+++ b/triton_models/weights/layers.15.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cbeb2f6ac65788077ab0ba991ca541a0870f0648b9efe5ed16082c9e4c07788f
+size 2211840
diff --git a/triton_models/weights/layers.15.ffn_norm.weight b/triton_models/weights/layers.15.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..a32f3cfd3903509f7db035220f6a5190f4799a5c
--- /dev/null
+++ b/triton_models/weights/layers.15.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a492032132d360fd7cd2cff44fe3c1a1249866ddddfec16ccb07fc9ea9094199
+size 10240
diff --git a/triton_models/weights/layers.16.attention.w_qkv.0.qweight b/triton_models/weights/layers.16.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..92e827a2253c51bd3da7a0c56ba9150279edf845
--- /dev/null
+++ b/triton_models/weights/layers.16.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb6cd1e2846811f3b791aec43a4b5601473732a50550299e892c090ee952b8e0
+size 39321600
diff --git a/triton_models/weights/layers.16.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.16.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..c11152bf209d266bf8cd43d7b55da74f3eaa2d81
--- /dev/null
+++ b/triton_models/weights/layers.16.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dcfc690b5f4955f3d6c717530c45b4c9d87dc4fff88a8d39e31f90dd41c895aa
+size 2457600
diff --git a/triton_models/weights/layers.16.attention.wo.0.qweight b/triton_models/weights/layers.16.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..d6115cd5703ec1ccaa3636a0566c111bb8a6e703
--- /dev/null
+++ b/triton_models/weights/layers.16.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe80870856ded37ab6a8e346ee26065ad60b099b2bca65628de6f0d28ca1c205
+size 13107200
diff --git a/triton_models/weights/layers.16.attention.wo.0.scales_zeros b/triton_models/weights/layers.16.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..f3c9eb3e44c80267d58f05986f3b08150b7a3e4f
--- /dev/null
+++ b/triton_models/weights/layers.16.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84e54a46aff0231bee5a1d9eda3e0312b3cd45b513c38cb7b26f2eb94f9380ce
+size 819200
diff --git a/triton_models/weights/layers.16.attention_norm.weight b/triton_models/weights/layers.16.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..6b9e994185a41a02ea183b5307c7e66cd9761a66
--- /dev/null
+++ b/triton_models/weights/layers.16.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7fee442e9ce624200c2ab49d4229d249ec5b275929908b30e700aa2fad116742
+size 10240
diff --git a/triton_models/weights/layers.16.feed_forward.w13.0.qweight b/triton_models/weights/layers.16.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..d9653626266cf31a12d323328ef4ae19bbd261ab
--- /dev/null
+++ b/triton_models/weights/layers.16.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f3f3f1aa0e6d2ac181f249f7a13e1ce60aa6839198510682f5e8922a3db804b
+size 70778880
diff --git a/triton_models/weights/layers.16.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.16.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..26788c17fcb1c4332a455c50dcc20edd756da301
--- /dev/null
+++ b/triton_models/weights/layers.16.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d60132fc97c6a85eabb2dfb5186b537a1d4f5922aa9448c1bd9e979c00b8ff7
+size 4423680
diff --git a/triton_models/weights/layers.16.feed_forward.w2.0.qweight b/triton_models/weights/layers.16.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..ce83f708014809aa8de424a4101bc620bd7a9786
--- /dev/null
+++ b/triton_models/weights/layers.16.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a26b960d3c0c9732c2a5ae2ce9762da7007c0fbf5cd94c0dd36389e7cb0608c2
+size 35389440
diff --git a/triton_models/weights/layers.16.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.16.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..d4c88a9bca0b3c248b10640e4c84f316999eb85f
--- /dev/null
+++ b/triton_models/weights/layers.16.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6b35c21aeed8f57d8a383db550fd785041cf14a6989ada823b8b004362f7da5
+size 2211840
diff --git a/triton_models/weights/layers.16.ffn_norm.weight b/triton_models/weights/layers.16.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..b7ecee192861b8721152973e44bd9c78a65fcc1b
--- /dev/null
+++ b/triton_models/weights/layers.16.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5e9e07af0abaabdb86d31dd473667ca029b7f45fdf2e2589ec5dc7df9116794
+size 10240
diff --git a/triton_models/weights/layers.17.attention.w_qkv.0.qweight b/triton_models/weights/layers.17.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..1114a8ded79c17a7baf613162e87fec05887f714
--- /dev/null
+++ b/triton_models/weights/layers.17.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5740a7aa3b2d56879e6e19aaa21d451b4eb1a007f8a2efdfd73221c6081440bc
+size 39321600
diff --git a/triton_models/weights/layers.17.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.17.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..fa312788a282376271ce2f4709190c91f64e2d3c
--- /dev/null
+++ b/triton_models/weights/layers.17.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0e125559a7fe48c6371de90018ad47553b8e5c8fb69dc7980dfc5ea80d20aa87
+size 2457600
diff --git a/triton_models/weights/layers.17.attention.wo.0.qweight b/triton_models/weights/layers.17.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..36d3d9ededa93c7889b0558399fca0b79dd081cd
--- /dev/null
+++ b/triton_models/weights/layers.17.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2595eadb85000c0da838d97da10e02e981f142657220c903e604649c2605be5d
+size 13107200
diff --git a/triton_models/weights/layers.17.attention.wo.0.scales_zeros b/triton_models/weights/layers.17.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..86a4c43d73d031f8a69669f85555ec00f49bd057
--- /dev/null
+++ b/triton_models/weights/layers.17.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82e12d60676d69bcbd19b80c69043aac55d0cbed42a06927480c6d72bced4c3d
+size 819200
diff --git a/triton_models/weights/layers.17.attention_norm.weight b/triton_models/weights/layers.17.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..4597494dc15916e28bf967cbf25ff2b0b50fc8e8
--- /dev/null
+++ b/triton_models/weights/layers.17.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb0d440b2b0e11341370d25b232210cd33cf0572ca4d7544ebf816acefcd8b36
+size 10240
diff --git a/triton_models/weights/layers.17.feed_forward.w13.0.qweight b/triton_models/weights/layers.17.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..ac7de699caa057e8adb12da141192c663567c530
--- /dev/null
+++ b/triton_models/weights/layers.17.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31f7a3114a9b7fae4a6e926630b2b459f66068570d2fde2b802fd96119344a7b
+size 70778880
diff --git a/triton_models/weights/layers.17.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.17.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..ac5e0ab2e2877b195cd8d7ce1ca598ca6d444c7a
--- /dev/null
+++ b/triton_models/weights/layers.17.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b2613d27bb69374ed53b5beb5f387a0a6a4ff87538d44ee751515c70c355622
+size 4423680
diff --git a/triton_models/weights/layers.17.feed_forward.w2.0.qweight b/triton_models/weights/layers.17.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..a6880d34e98a68dbc31fd2d7708499cb5404e6c1
--- /dev/null
+++ b/triton_models/weights/layers.17.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6838f02c7fcb93b4b0dfb0bddf993bbae23d6e835896657d493206eaf879d3fe
+size 35389440
diff --git a/triton_models/weights/layers.17.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.17.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..d98df5a915cd3202ec417ffcef4d89e8b908f097
--- /dev/null
+++ b/triton_models/weights/layers.17.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b0f457c445fdcca69d5e1d98e8d4a57accf0929b3638a05d75758cb0be9015f6
+size 2211840
diff --git a/triton_models/weights/layers.17.ffn_norm.weight b/triton_models/weights/layers.17.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..6588f7a1d90bd033b90c471cb7cb13cb1b175882
--- /dev/null
+++ b/triton_models/weights/layers.17.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b294d680e741eacca1a89566c5b1cc948602d9a69461ab0f07143777c4c96d5b
+size 10240
diff --git a/triton_models/weights/layers.18.attention.w_qkv.0.qweight b/triton_models/weights/layers.18.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..32b2fd66b0f5b27058272cfbc4b3eaca8f1f4cf0
--- /dev/null
+++ b/triton_models/weights/layers.18.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f489ebe3cd446e0886e9d8ce85d81cf6992db0766cb1e74d684a631db71e723
+size 39321600
diff --git a/triton_models/weights/layers.18.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.18.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..ead00b72a1234c4278af1c81c335ff6c7c1de1d5
--- /dev/null
+++ b/triton_models/weights/layers.18.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cba1af036e83b01b2fd0f2ee0d7c03e6b2a61d9462e087373a2d45b2dc9cb0e
+size 2457600
diff --git a/triton_models/weights/layers.18.attention.wo.0.qweight b/triton_models/weights/layers.18.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..87fba9ee5f2dabc7e7b341d1293b552411658441
--- /dev/null
+++ b/triton_models/weights/layers.18.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf6c647ec98f45eb3fa85481e5c2d805c959ead9044df51013d0090a27084428
+size 13107200
diff --git a/triton_models/weights/layers.18.attention.wo.0.scales_zeros b/triton_models/weights/layers.18.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..73ddf03b6b090fa9aae7c3b667ac52495b22617e
--- /dev/null
+++ b/triton_models/weights/layers.18.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fba1c840129c4cdb3b86ce180e103eda0f84e0ca195cdaf37a6859dad83a0957
+size 819200
diff --git a/triton_models/weights/layers.18.attention_norm.weight b/triton_models/weights/layers.18.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..1b5802ce7b3ec3c51bd18a520438c76c3a714771
--- /dev/null
+++ b/triton_models/weights/layers.18.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f8334300804dd78b0081ca33bd1f949dec16ead02341b1bc329b6a871e05e0c
+size 10240
diff --git a/triton_models/weights/layers.18.feed_forward.w13.0.qweight b/triton_models/weights/layers.18.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..5b3e55601e1248163d08124e375b3d1927bb059e
--- /dev/null
+++ b/triton_models/weights/layers.18.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd04405fb5918be5930491c51af480819be74b76d9899dcaecabf0f5697c0391
+size 70778880
diff --git a/triton_models/weights/layers.18.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.18.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..b9c4d4d50fe1cffc0801f4697952b44bac2634d2
--- /dev/null
+++ b/triton_models/weights/layers.18.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f5ce66e51bf4cb598028b608116a6881f2f720a6cfd542c0df8aafc123bcde4
+size 4423680
diff --git a/triton_models/weights/layers.18.feed_forward.w2.0.qweight b/triton_models/weights/layers.18.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..86eda579c12c39810347d16d61e9f9e16cb97b2f
--- /dev/null
+++ b/triton_models/weights/layers.18.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f84b974b31b948bd0fc1994afafdd01b52ac4211e99964817e9efbb046cf300
+size 35389440
diff --git a/triton_models/weights/layers.18.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.18.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..63ab3ab6d87666c0b4156e2620d62ff179f85c72
--- /dev/null
+++ b/triton_models/weights/layers.18.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:afa1a98a7724ce71f4153d51439df8571cb5d3dcc09225f4d813a38b054b5e52
+size 2211840
diff --git a/triton_models/weights/layers.18.ffn_norm.weight b/triton_models/weights/layers.18.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..995f8d7b6fcf248ffe44a0756ee4ac8f636e8272
--- /dev/null
+++ b/triton_models/weights/layers.18.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d71985c5cb06a0a9fa00504fc094ea19e7b40b4d65a9ac20a9d18ca68b6a52a1
+size 10240
diff --git a/triton_models/weights/layers.19.attention.w_qkv.0.qweight b/triton_models/weights/layers.19.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..bbc3cbb03d51c01049f8b4740c3989e8b25e7d2c
--- /dev/null
+++ b/triton_models/weights/layers.19.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96780150144555cbcad954fff9c920fbcbe8326213a13519b0d4cd1e5fc2ffa3
+size 39321600
diff --git a/triton_models/weights/layers.19.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.19.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..433bac5ab3eca8165644e5c577db7c075f3ee252
--- /dev/null
+++ b/triton_models/weights/layers.19.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e24d7f42b4c46c24436d9c9ff18b024cddf05d8d3264793e1fe19623101154a5
+size 2457600
diff --git a/triton_models/weights/layers.19.attention.wo.0.qweight b/triton_models/weights/layers.19.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..b7850d55cd348ce39ad560d2f99ed65192b47676
--- /dev/null
+++ b/triton_models/weights/layers.19.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dbc43604de385e1b4656069ea0dee114e4546ed8bed475a13dfe654cb40874a5
+size 13107200
diff --git a/triton_models/weights/layers.19.attention.wo.0.scales_zeros b/triton_models/weights/layers.19.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..f42c839aff671414fc6a85f6f360e16d14e514e7
--- /dev/null
+++ b/triton_models/weights/layers.19.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69c75098b44b0b34fccaba9a2e85dbcfb1d60490fafe0f5cdbcd7c4d4bbc497d
+size 819200
diff --git a/triton_models/weights/layers.19.attention_norm.weight b/triton_models/weights/layers.19.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..2f0e4d70366178690d250c9a62ef71a498cf1c4e
--- /dev/null
+++ b/triton_models/weights/layers.19.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a58a58209d1b889db451954bc959f4a5697da391e495f77d4bfd05939998855c
+size 10240
diff --git a/triton_models/weights/layers.19.feed_forward.w13.0.qweight b/triton_models/weights/layers.19.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..665d75145eb5a4ac76f7b947949cdea4458a323c
--- /dev/null
+++ b/triton_models/weights/layers.19.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a5bb3fdab3e2d3babad9a80ed863e9c7921e3bd47ba055d21364b304af1b19c
+size 70778880
diff --git a/triton_models/weights/layers.19.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.19.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..0f6e9bc7910245fbb1a94aac4b846029a05a2f06
--- /dev/null
+++ b/triton_models/weights/layers.19.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aca4447910cdecd3c32d2671c5cbc9041bb2ce959ffbe60435e4486bb1f33c1b
+size 4423680
diff --git a/triton_models/weights/layers.19.feed_forward.w2.0.qweight b/triton_models/weights/layers.19.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..0cb2e30722dd67791078a582595e183d882aa2bb
--- /dev/null
+++ b/triton_models/weights/layers.19.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de20f15a741e7bc21afa8a0886c9f4e441063d906dab148d45ac71edde19c56e
+size 35389440
diff --git a/triton_models/weights/layers.19.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.19.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..c6831fb524d821585be711707bfbd07d25e23dda
--- /dev/null
+++ b/triton_models/weights/layers.19.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:443c064567314ec501e90ec2f4ab964e09ccecaec16fb9e6fe9cb8853824968a
+size 2211840
diff --git a/triton_models/weights/layers.19.ffn_norm.weight b/triton_models/weights/layers.19.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..11502f15e68715d87b649dc9b04b2fe57e0f6a37
--- /dev/null
+++ b/triton_models/weights/layers.19.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8267e61384093bfa2b00bc8f73bc6f5a505a63436d1bcc468f9c8288af11d522
+size 10240
diff --git a/triton_models/weights/layers.2.attention.w_qkv.0.qweight b/triton_models/weights/layers.2.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..bd3906633e2f7a81df5c9e863f531bac096d2ee3
--- /dev/null
+++ b/triton_models/weights/layers.2.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:876e0c774b786ad2a67ac3ebe57be6fce14c1652e3891a94f25f3ce2f3ed086e
+size 39321600
diff --git a/triton_models/weights/layers.2.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.2.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..bc84c12ba00b738defd57fcdd6fdb033bccd470b
--- /dev/null
+++ b/triton_models/weights/layers.2.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b4a164ddd6238150dd289cf2cafbd1cf895ffe07a43a022814712875f33fb2e
+size 2457600
diff --git a/triton_models/weights/layers.2.attention.wo.0.qweight b/triton_models/weights/layers.2.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..a62156ab0c31740db71f976f56ef5fc90defd23f
--- /dev/null
+++ b/triton_models/weights/layers.2.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:13157d4a0fc52045d5610952b0bd648714149b00fe73f715665a54eae42ad7ff
+size 13107200
diff --git a/triton_models/weights/layers.2.attention.wo.0.scales_zeros b/triton_models/weights/layers.2.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..b6dc4a1a3435fd778776924d0cfc7c2cc8040100
--- /dev/null
+++ b/triton_models/weights/layers.2.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:71df75b0fe6e730d0a4b7d7c48b30e99a77531c33f428db9fd3ab666aace327a
+size 819200
diff --git a/triton_models/weights/layers.2.attention_norm.weight b/triton_models/weights/layers.2.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..8b2a0ff1bec121311da3df4471a97e69fb903967
--- /dev/null
+++ b/triton_models/weights/layers.2.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:052cca5ef15494158256e948ab4515ab2a8ba4044736d3da8c9723ccaca14e60
+size 10240
diff --git a/triton_models/weights/layers.2.feed_forward.w13.0.qweight b/triton_models/weights/layers.2.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..2ed7e93f29d8d8401021e7a3cacd88ea6b1ffac4
--- /dev/null
+++ b/triton_models/weights/layers.2.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc012e9fd0154cc014b7b86f09e6e2e99e4bcc581a0a4fc0dd14536959086e72
+size 70778880
diff --git a/triton_models/weights/layers.2.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.2.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..30b154d5f300736eae87b680827546ce929857b4
--- /dev/null
+++ b/triton_models/weights/layers.2.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34750b002886c51d129098ee4cfd8876ca7b483661034bb5f647186890306ff6
+size 4423680
diff --git a/triton_models/weights/layers.2.feed_forward.w2.0.qweight b/triton_models/weights/layers.2.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..b91257cb0bdcd0d1382f18248e66d9e4368805e3
--- /dev/null
+++ b/triton_models/weights/layers.2.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3df92de338f83219921612803afffcd95612ebdc2c720035d13f52e3f3bf3f33
+size 35389440
diff --git a/triton_models/weights/layers.2.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.2.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..cd6975f47d34c512f9f1c44cf36d90ccf54e4ec6
--- /dev/null
+++ b/triton_models/weights/layers.2.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3a6f97f49ce31f3ba41cef65075eec36a84dfc2fb431076512f085eb3c9a752
+size 2211840
diff --git a/triton_models/weights/layers.2.ffn_norm.weight b/triton_models/weights/layers.2.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..1a7386bf05d36e28af3c845b6a9c925ad8277022
--- /dev/null
+++ b/triton_models/weights/layers.2.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:09a3992e54d5fc8ec1d8dd7d1551201d07d1abd86ee8ac6becca5c5d187a592d
+size 10240
diff --git a/triton_models/weights/layers.20.attention.w_qkv.0.qweight b/triton_models/weights/layers.20.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..17919ee6ffe30cb5a074c561831632a65eba2e3e
--- /dev/null
+++ b/triton_models/weights/layers.20.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24ba473b67f09c4315d0b98e8d39be6b68ba598ef10b8091c9b8b106afff85be
+size 39321600
diff --git a/triton_models/weights/layers.20.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.20.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..02f0f501a96b105bf7a5c94bf5641a350d6b7753
--- /dev/null
+++ b/triton_models/weights/layers.20.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:15f8b0fb61b881bd7c955f83f567cd311e3ed6d4b67defe9d1a052f63cd439ac
+size 2457600
diff --git a/triton_models/weights/layers.20.attention.wo.0.qweight b/triton_models/weights/layers.20.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..c69e74ba623363865428d5a336d2f2c2e11ac1a5
--- /dev/null
+++ b/triton_models/weights/layers.20.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8e4c98cd90b4e19330b69281d54d59811366fffbfb7aa7420cc1ad724b64962
+size 13107200
diff --git a/triton_models/weights/layers.20.attention.wo.0.scales_zeros b/triton_models/weights/layers.20.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..9088bcc9df0d6cfefb68d0420abd30f5af12f192
--- /dev/null
+++ b/triton_models/weights/layers.20.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:36483e9ece138df351bbb33f663294da36d69cf63ae4286f04697f80dfb56e06
+size 819200
diff --git a/triton_models/weights/layers.20.attention_norm.weight b/triton_models/weights/layers.20.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..c50d319d32c51f97024a845017685ae3e1044cdb
--- /dev/null
+++ b/triton_models/weights/layers.20.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a463b782e29f19e5e96bf64e2ef4359a8de096b3d0612547db0d8b1663f0178a
+size 10240
diff --git a/triton_models/weights/layers.20.feed_forward.w13.0.qweight b/triton_models/weights/layers.20.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..e84bd3df5b0ab209c23febfa42459ecb94f89f55
--- /dev/null
+++ b/triton_models/weights/layers.20.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e49355a1ecd38e01bfc792b7ede60d957f877cdb2c62cc8c0ab6ae0025a0ad3
+size 70778880
diff --git a/triton_models/weights/layers.20.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.20.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..76fa8db0867f99986579c392e149845265b741f1
--- /dev/null
+++ b/triton_models/weights/layers.20.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9111a889f0e442c0d876edac95ebe1f8ebc6a73fb12a6d7f1626689511fa9fde
+size 4423680
diff --git a/triton_models/weights/layers.20.feed_forward.w2.0.qweight b/triton_models/weights/layers.20.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..7425b86af0c70372513a9628c5e1c474edf5b2dd
--- /dev/null
+++ b/triton_models/weights/layers.20.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3987730998ce1d3dcc6640f7da95d401a6b2260357fa7eb95413fcaca6abac2d
+size 35389440
diff --git a/triton_models/weights/layers.20.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.20.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..05d1102b351c1ca3b6e4f93b6e348a0fe878580a
--- /dev/null
+++ b/triton_models/weights/layers.20.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e92746da18999b7f9c8f35607d137a501f8aa6291ad08e34c611b6a9eaa1b39
+size 2211840
diff --git a/triton_models/weights/layers.20.ffn_norm.weight b/triton_models/weights/layers.20.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..7e24601b0476958bb0ff93518e02ed0ba97ed641
--- /dev/null
+++ b/triton_models/weights/layers.20.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:635f5f75f74720a37e75d6457c7a20fba4ef80c1e638cbee18135261bcc2d5bb
+size 10240
diff --git a/triton_models/weights/layers.21.attention.w_qkv.0.qweight b/triton_models/weights/layers.21.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..0afbb19d05949f2af8929bc40817e2202d4f46ed
--- /dev/null
+++ b/triton_models/weights/layers.21.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3d1ddbad19543182a12b1e7a5b2023c6eb4a7f5a8f3cdc58f73a6770b9a414d
+size 39321600
diff --git a/triton_models/weights/layers.21.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.21.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..ab7c5b33f9cc45993ccc0affde7bc3684fe1edd6
--- /dev/null
+++ b/triton_models/weights/layers.21.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39caa0a31f82542c75dfb63c134faac24e47bb64e7d797143f93adc93770bbbd
+size 2457600
diff --git a/triton_models/weights/layers.21.attention.wo.0.qweight b/triton_models/weights/layers.21.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..ac1511860815c399a882d9b8dcf0615a9cb3194c
--- /dev/null
+++ b/triton_models/weights/layers.21.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0054fa8fac412d16fb96f62bee67210dd519c0064e3d4408f6bab4cd7c074b75
+size 13107200
diff --git a/triton_models/weights/layers.21.attention.wo.0.scales_zeros b/triton_models/weights/layers.21.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..14a404bdd3bacc559edd9b27a4d061f030a38299
--- /dev/null
+++ b/triton_models/weights/layers.21.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1387534c3c80df7aae92469de52cef0805ae97855762cf98e4d13428a624f12
+size 819200
diff --git a/triton_models/weights/layers.21.attention_norm.weight b/triton_models/weights/layers.21.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..4f58eb73a77db96d86ad23e7633125a3363ce6fb
--- /dev/null
+++ b/triton_models/weights/layers.21.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b3ed00022c0d6d1369e68a2905bf507d0471eb73491211dfa1f501e77db18e7
+size 10240
diff --git a/triton_models/weights/layers.21.feed_forward.w13.0.qweight b/triton_models/weights/layers.21.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..e36e1a9dd4b5dbc90f6666cd8f3c34e46a7d6540
--- /dev/null
+++ b/triton_models/weights/layers.21.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34865a010042596a39bb91adcf4b98082532a85beaca848d35549638d7e6bb11
+size 70778880
diff --git a/triton_models/weights/layers.21.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.21.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..93090164a37ed554c1127390d5d80ed99be7ab87
--- /dev/null
+++ b/triton_models/weights/layers.21.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6064806de81739e00a291a14e948a2f71144c275faf4da28f48fd69441b77a4
+size 4423680
diff --git a/triton_models/weights/layers.21.feed_forward.w2.0.qweight b/triton_models/weights/layers.21.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..f307b28860f60be4223617b9c8d5b205b8e8d66e
--- /dev/null
+++ b/triton_models/weights/layers.21.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65c8af0c6b7e9df976f8de586afedc02a25a093892c79f3dc1f9026c2dd18ee2
+size 35389440
diff --git a/triton_models/weights/layers.21.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.21.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..a37c51cfc8a0ee3db7d3fa13e5af02e29c20289f
--- /dev/null
+++ b/triton_models/weights/layers.21.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f1ba346ee0b193d318997f90548ab3e84e4a59907c1a613c48bd29f688e399b
+size 2211840
diff --git a/triton_models/weights/layers.21.ffn_norm.weight b/triton_models/weights/layers.21.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..b23ee6e095adebe805bc155574f3479b471ed4ca
--- /dev/null
+++ b/triton_models/weights/layers.21.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ee2d43f8ab23a0b9a53489f78cb489bf2ac2ac5c2e6a86b3bde9277fb8e34e4
+size 10240
diff --git a/triton_models/weights/layers.22.attention.w_qkv.0.qweight b/triton_models/weights/layers.22.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..8c0961b79a25c39931b68eb5a16933782cbc78f3
--- /dev/null
+++ b/triton_models/weights/layers.22.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1086f11eaca24eef157c9449fa1b2054aae46689043e7c6910f0cb258e307300
+size 39321600
diff --git a/triton_models/weights/layers.22.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.22.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..12182b8977711ac726891e914ed00afd85b72844
--- /dev/null
+++ b/triton_models/weights/layers.22.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0299c7c1fe456600a14f8e446f32cbbf81fdd7230258cec7e1d46e30d9ed1a24
+size 2457600
diff --git a/triton_models/weights/layers.22.attention.wo.0.qweight b/triton_models/weights/layers.22.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..2788970eb60179b8354479a87b9f63e9ba9280e1
--- /dev/null
+++ b/triton_models/weights/layers.22.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4bf77fe020928df984115a82eb80441340042d9afb6ac3b92eb8d574b96e466
+size 13107200
diff --git a/triton_models/weights/layers.22.attention.wo.0.scales_zeros b/triton_models/weights/layers.22.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..85611c5fb1e6076b0c72fa0d2180577565744c03
--- /dev/null
+++ b/triton_models/weights/layers.22.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:27e9289d563135db90c1d9e0cb5a1ff9b5b06573844497791de1c9cce1a6aa3c
+size 819200
diff --git a/triton_models/weights/layers.22.attention_norm.weight b/triton_models/weights/layers.22.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..fd7eb9fd70cebe27bfec9e0b312cb41b710a9ffc
--- /dev/null
+++ b/triton_models/weights/layers.22.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:67e9a5cae8b212c733ca5cb7797ab3ae78993320abecdd2ca292ed406d0e504b
+size 10240
diff --git a/triton_models/weights/layers.22.feed_forward.w13.0.qweight b/triton_models/weights/layers.22.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..080d2e843df410ebf7a89b409120d30efe61b3a0
--- /dev/null
+++ b/triton_models/weights/layers.22.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:78bb18b4c3bd1260036cfef972bf5ad480e4e1e2f2b1f8858ba0a04ccde0b632
+size 70778880
diff --git a/triton_models/weights/layers.22.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.22.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..bb7ffd9de44d5dab4e6e002647c5d5b6c8a5c10f
--- /dev/null
+++ b/triton_models/weights/layers.22.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7cd80942a13246b64658d8f892fa5adce4f626310fbbc542a91246cc4afa0bd5
+size 4423680
diff --git a/triton_models/weights/layers.22.feed_forward.w2.0.qweight b/triton_models/weights/layers.22.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..0e417790775a036bb117e4b57f0a9dc372eb302e
--- /dev/null
+++ b/triton_models/weights/layers.22.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e532fa3a1ffb2097eca6c282611b9fc640c0f839c364bbf62d241bbab142585
+size 35389440
diff --git a/triton_models/weights/layers.22.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.22.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..d79a38960bc9e8cd524fc98e8a572055361b70d8
--- /dev/null
+++ b/triton_models/weights/layers.22.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df059483cfd2401d77b061f8e99fb39a6838014f454b9926ffd984b391537a22
+size 2211840
diff --git a/triton_models/weights/layers.22.ffn_norm.weight b/triton_models/weights/layers.22.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..b7769dd3a2d2e68f00552cdd9d6f4f9e19299f98
--- /dev/null
+++ b/triton_models/weights/layers.22.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:77c1d8b0604653e1806790c8aa28ec1bbf007d4f38b2871086dc0a52a2f1694f
+size 10240
diff --git a/triton_models/weights/layers.23.attention.w_qkv.0.qweight b/triton_models/weights/layers.23.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..3701419269ba2882bbdca817ebb37c3556765cda
--- /dev/null
+++ b/triton_models/weights/layers.23.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b0fc78d58e4ef712208ad235f64331a6565e84f1d8ac27b0a22572bdb8ca232
+size 39321600
diff --git a/triton_models/weights/layers.23.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.23.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..30750d3ebb3c2ff7e174902efab27d1aca58ad8f
--- /dev/null
+++ b/triton_models/weights/layers.23.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aabd12ff874d7c90c169f50068fc42e30f29f14ca54e86ddd6d5daa3dadd34c9
+size 2457600
diff --git a/triton_models/weights/layers.23.attention.wo.0.qweight b/triton_models/weights/layers.23.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..06a4856cfdad8094199253db3c89733fda9c8e18
--- /dev/null
+++ b/triton_models/weights/layers.23.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2d9b54b54673dd62fcf9929061f7964c144d7334a94a33d4e6b3205762ea92d
+size 13107200
diff --git a/triton_models/weights/layers.23.attention.wo.0.scales_zeros b/triton_models/weights/layers.23.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..e7f1698b0e1be1b0c3cba7476c7a2d6770067620
--- /dev/null
+++ b/triton_models/weights/layers.23.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01847dae18a14818a583736d302301a41f10f8383c7f944c25bab6c0fafa0a2b
+size 819200
diff --git a/triton_models/weights/layers.23.attention_norm.weight b/triton_models/weights/layers.23.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..cec04f93e839538e29208e4c443f7b280418205b
--- /dev/null
+++ b/triton_models/weights/layers.23.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45e9cdc5bfbbfcb013a7679f725f7dd5c333086bec2b9b6badd61d04f71b2876
+size 10240
diff --git a/triton_models/weights/layers.23.feed_forward.w13.0.qweight b/triton_models/weights/layers.23.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..af9f0031c4fcab8d35e9162b8c9d10b2f9de74d4
--- /dev/null
+++ b/triton_models/weights/layers.23.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa413a46df79c251a766fa42d196689040c23606d10b6d90ca9963edc355ac2e
+size 70778880
diff --git a/triton_models/weights/layers.23.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.23.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..dbab17ad7163a1b279288ed601b8c4482c8ed9f9
--- /dev/null
+++ b/triton_models/weights/layers.23.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a157351254440b0dfb8caf628ddc6bbd47768d933eb42122b36271118ea1f08
+size 4423680
diff --git a/triton_models/weights/layers.23.feed_forward.w2.0.qweight b/triton_models/weights/layers.23.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..dcf0b1bf5cfa09141fd3ad29855316e2b95005d4
--- /dev/null
+++ b/triton_models/weights/layers.23.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9030195b461c0e8de77b7a7a3d2cd2a68b5fe0a66e0d3d6b4afc55ec0188dac0
+size 35389440
diff --git a/triton_models/weights/layers.23.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.23.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..042f2a071bd35c3c6b65e9f7af7e65bf98afddcc
--- /dev/null
+++ b/triton_models/weights/layers.23.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0047da921eb25ad15196e869836fc213d994e02257f2ad0f624045a0cc1feb9e
+size 2211840
diff --git a/triton_models/weights/layers.23.ffn_norm.weight b/triton_models/weights/layers.23.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..287026ad5e7c84a3035633103ba14678dc180d87
--- /dev/null
+++ b/triton_models/weights/layers.23.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:55b2e3748a0fcb1a1b6016f96e1889b9b62251f886bf4126dc29acbb4aed049d
+size 10240
diff --git a/triton_models/weights/layers.24.attention.w_qkv.0.qweight b/triton_models/weights/layers.24.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..73af570a3350d70551a083373bb4b380bfd310c9
--- /dev/null
+++ b/triton_models/weights/layers.24.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dc92561b90f05de31ffbeda9fd9549ffbb55597a83cc9e9b7cc1a8ba3a400b90
+size 39321600
diff --git a/triton_models/weights/layers.24.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.24.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..887dca80eb343ab4a425bdc8379b5b04b3dc5e41
--- /dev/null
+++ b/triton_models/weights/layers.24.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23ca55bd49383f9d09f62f91e8d046f12fe22c1747673c65b24c8dbfc02f3833
+size 2457600
diff --git a/triton_models/weights/layers.24.attention.wo.0.qweight b/triton_models/weights/layers.24.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..ed73870a5b4c33a5e82e9c7240156ca00640eda0
--- /dev/null
+++ b/triton_models/weights/layers.24.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc5829f96050696a31c504a92bb51cb6f16d1bc728cfcda2d2bb789246a0e3cf
+size 13107200
diff --git a/triton_models/weights/layers.24.attention.wo.0.scales_zeros b/triton_models/weights/layers.24.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..0a6f946ad0e5492c8fc8cae5e249208c7a78fdee
--- /dev/null
+++ b/triton_models/weights/layers.24.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6443d2ca419938f8646d47bb1e0544d359eaee09ea9443cd6a9a56c72462d866
+size 819200
diff --git a/triton_models/weights/layers.24.attention_norm.weight b/triton_models/weights/layers.24.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..0b42768a27a6f262c171c97ab3d39c517417f335
--- /dev/null
+++ b/triton_models/weights/layers.24.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eddd201412059e5508e933fec64a305535d09538ec8c12fec62e6dbee6b2396a
+size 10240
diff --git a/triton_models/weights/layers.24.feed_forward.w13.0.qweight b/triton_models/weights/layers.24.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..55d08f408fc1c3c11d12bf7e164fdeac2b867331
--- /dev/null
+++ b/triton_models/weights/layers.24.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de51f1d075b4e6f522c2b086fd8caa571bf96acf0e5bf1b4e1e92582587be229
+size 70778880
diff --git a/triton_models/weights/layers.24.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.24.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..a576c989211275e4fcbcad44bfbb816274e4e345
--- /dev/null
+++ b/triton_models/weights/layers.24.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b810bd2d2534e0f2cc6abffc6abb24bb3d4677cf08d55b0f1dc5d83aff174589
+size 4423680
diff --git a/triton_models/weights/layers.24.feed_forward.w2.0.qweight b/triton_models/weights/layers.24.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..e89fe1f3021347bcaed9f397019369256a8ffcfa
--- /dev/null
+++ b/triton_models/weights/layers.24.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b677a13819656121cb98c401fd64715352162ae0477ad55228b38e7f373a0ff5
+size 35389440
diff --git a/triton_models/weights/layers.24.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.24.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..14957f70860348d32ab44e38dd73e8c4164ad03b
--- /dev/null
+++ b/triton_models/weights/layers.24.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0c356da5338336de243b9c22ed1b3127101350e57fa8b994b34a490a7ebe691e
+size 2211840
diff --git a/triton_models/weights/layers.24.ffn_norm.weight b/triton_models/weights/layers.24.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..ca0a915b5fdeb2c607742914d45de988db719090
--- /dev/null
+++ b/triton_models/weights/layers.24.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0f7a7ed402843cfcd2cf3940156a2bfc38ba4173405054b84b8de95c67d2331b
+size 10240
diff --git a/triton_models/weights/layers.25.attention.w_qkv.0.qweight b/triton_models/weights/layers.25.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..4c412aaa7620a131836a68dc416f14156cd7bd34
--- /dev/null
+++ b/triton_models/weights/layers.25.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6ebe55a24615388376df965096d3fb82f7a1959b5d4eb660cdd1a89e7a77925
+size 39321600
diff --git a/triton_models/weights/layers.25.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.25.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..a182cc6f58443cf117b3b48727005364ab2a3e0c
--- /dev/null
+++ b/triton_models/weights/layers.25.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62c7eb451c226ee4c5d141a199380913579ba1fc98d4f2687fc96c140b0f6b16
+size 2457600
diff --git a/triton_models/weights/layers.25.attention.wo.0.qweight b/triton_models/weights/layers.25.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..1467af4313fa5afaa595262997db82986f70bb0a
--- /dev/null
+++ b/triton_models/weights/layers.25.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd4214410848d1e1038466834366466ca961143b7fd9c50093ba1fbea0f00762
+size 13107200
diff --git a/triton_models/weights/layers.25.attention.wo.0.scales_zeros b/triton_models/weights/layers.25.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..a872d267e45fce215691239de0da99fbf8b4de45
--- /dev/null
+++ b/triton_models/weights/layers.25.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eedbbbdddab8367532cf5fa9c9a89d94327ae066f489cccc194f1d03e641c0b4
+size 819200
diff --git a/triton_models/weights/layers.25.attention_norm.weight b/triton_models/weights/layers.25.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..ac3b64ebedaf982d2dd4e7f91b70b61fa1fd4ec7
--- /dev/null
+++ b/triton_models/weights/layers.25.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1067ad18f8caa487a6033cbd8bc67b8f612978cb1d3513efe641de8ec2f2ebe8
+size 10240
diff --git a/triton_models/weights/layers.25.feed_forward.w13.0.qweight b/triton_models/weights/layers.25.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..b686286bb37e23be2d7da4464a49f51663432cfe
--- /dev/null
+++ b/triton_models/weights/layers.25.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb5fc9fd13e83efedb67ca8d66a6d4d654d44fdfd4b69308c1a6c490846ec254
+size 70778880
diff --git a/triton_models/weights/layers.25.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.25.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..19a4d7214468779de1f278582fcc817308e28434
--- /dev/null
+++ b/triton_models/weights/layers.25.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7fdad734d038d0c9ff37a7d207604bac99f2dde18852f29837600c03aeb29579
+size 4423680
diff --git a/triton_models/weights/layers.25.feed_forward.w2.0.qweight b/triton_models/weights/layers.25.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..7a41d1b48926b3f9b76614a382b7a87a9e563e2c
--- /dev/null
+++ b/triton_models/weights/layers.25.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e3574bd244b9d40e49b20bb9235fc014205d6461df18822d38b1e114337d29a
+size 35389440
diff --git a/triton_models/weights/layers.25.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.25.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..9ef676950d208f5eb572439d0adf1b44af2c1b1f
--- /dev/null
+++ b/triton_models/weights/layers.25.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2485ed307d37bd1d7f7936d1854eec600b0cb61b5c8b1b4c5e8db8639a2c6bbe
+size 2211840
diff --git a/triton_models/weights/layers.25.ffn_norm.weight b/triton_models/weights/layers.25.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..1000625515adee887f9f21f5f7630726eb975af5
--- /dev/null
+++ b/triton_models/weights/layers.25.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:873a8713aad4a60d6c9246d6c2377d9006de9dd754471f910ba60311b7c10a23
+size 10240
diff --git a/triton_models/weights/layers.26.attention.w_qkv.0.qweight b/triton_models/weights/layers.26.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..93c0c79ac13b63d7679f6dd50ada18dc27d48eb0
--- /dev/null
+++ b/triton_models/weights/layers.26.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49cadbc2cd2e56e7b64d6103f050359e225c671f83250fb8abacf4f5d3583384
+size 39321600
diff --git a/triton_models/weights/layers.26.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.26.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..530a60890bcc1a440b000b49993ca3eb96fce17d
--- /dev/null
+++ b/triton_models/weights/layers.26.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b08d7a7b996980c869504c58a2b3add36839fca377c49f9a93b84f1d8147a222
+size 2457600
diff --git a/triton_models/weights/layers.26.attention.wo.0.qweight b/triton_models/weights/layers.26.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..9fa6acb230f20838b0b17a0b15ed57fd94c47771
--- /dev/null
+++ b/triton_models/weights/layers.26.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:593672a0401dfa652b7930a23265b1db4b2e2d38d8731581d2155b78a5d50e01
+size 13107200
diff --git a/triton_models/weights/layers.26.attention.wo.0.scales_zeros b/triton_models/weights/layers.26.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..a4d77bf1007fec009e4856d37d1b17437407d7f6
--- /dev/null
+++ b/triton_models/weights/layers.26.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6362915dd9ab107a6b6558dacb5b20763cbdd7cd062addfa25c31f7a17299d56
+size 819200
diff --git a/triton_models/weights/layers.26.attention_norm.weight b/triton_models/weights/layers.26.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..498e1b7dfd28e24b971bc77015e4e813eb15ae4a
--- /dev/null
+++ b/triton_models/weights/layers.26.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2521b3469ac2da9ed94e845abd3cfa4ec74590b183a7926717b8067de21496f8
+size 10240
diff --git a/triton_models/weights/layers.26.feed_forward.w13.0.qweight b/triton_models/weights/layers.26.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..6cf087f985139c5a88854084893eee23c724d9e5
--- /dev/null
+++ b/triton_models/weights/layers.26.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1789e411b2631d94e54cd7313664af955c7a89ef173e6409011fd512e67228f5
+size 70778880
diff --git a/triton_models/weights/layers.26.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.26.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..8a6fb051b8458a7fbfd9daf50214585b276c1918
--- /dev/null
+++ b/triton_models/weights/layers.26.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b9d01200baa6827e06da0c69a1f55d6d7c767631794ccc1faf1849d9167e891
+size 4423680
diff --git a/triton_models/weights/layers.26.feed_forward.w2.0.qweight b/triton_models/weights/layers.26.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..ece4cc102ddb485d8e110f0a32e3e3628278cf83
--- /dev/null
+++ b/triton_models/weights/layers.26.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dbc8e474c8dcaf0555d2d1928179123b520b4d9b3918d038c65c6a757c80946c
+size 35389440
diff --git a/triton_models/weights/layers.26.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.26.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..b953ab9c9776c3f3d76b26ef948d809bfba0fcaa
--- /dev/null
+++ b/triton_models/weights/layers.26.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fcc7badc11b355b0e98140fdb3d113ed9f35ef9aa01411049571c786c09641c4
+size 2211840
diff --git a/triton_models/weights/layers.26.ffn_norm.weight b/triton_models/weights/layers.26.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..3304487d29f3d2369a897a06c75df27890503cea
--- /dev/null
+++ b/triton_models/weights/layers.26.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:278d82db36fdcfbbb907f735b099bdb477170b7daba6f61099a92924c3570489
+size 10240
diff --git a/triton_models/weights/layers.27.attention.w_qkv.0.qweight b/triton_models/weights/layers.27.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..84e00cf54e17e4d61cb628587f32bb6dc455edbf
--- /dev/null
+++ b/triton_models/weights/layers.27.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e30731f3c675169a342dc6670d3b83cfb74cc01287ad2b0d447e547bae46f2ed
+size 39321600
diff --git a/triton_models/weights/layers.27.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.27.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..a5ce4923d8f8cc249f1f65e56da40c13ad70d552
--- /dev/null
+++ b/triton_models/weights/layers.27.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:27bcb32fafb07864f78c6d684b31741c693a20d154af388b8433a3359f6240a0
+size 2457600
diff --git a/triton_models/weights/layers.27.attention.wo.0.qweight b/triton_models/weights/layers.27.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..6068f5ac77e6da9962d9fa93a5c4cba663e6bf27
--- /dev/null
+++ b/triton_models/weights/layers.27.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c99dcedd25a9dd14bc56b7ab8b33cbb95396eff227485719b52ab654a33fcf03
+size 13107200
diff --git a/triton_models/weights/layers.27.attention.wo.0.scales_zeros b/triton_models/weights/layers.27.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..0d6572873afa6bfacad45018d054ddc98f46fd3e
--- /dev/null
+++ b/triton_models/weights/layers.27.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e7e86589a1e5d96e2c3017e00c68d7f1151bf274c22ecd8cc0837b295f72fada
+size 819200
diff --git a/triton_models/weights/layers.27.attention_norm.weight b/triton_models/weights/layers.27.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..50aa9a23be62e59f5c14fb9f0573e3798e154db1
--- /dev/null
+++ b/triton_models/weights/layers.27.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:750b4cfdcda59682a05513f8ebca40490df2d4c2a04ef870dd1ff57c2c2564b2
+size 10240
diff --git a/triton_models/weights/layers.27.feed_forward.w13.0.qweight b/triton_models/weights/layers.27.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..03893c344bb5680e9562ee0521b9a84990930b5f
--- /dev/null
+++ b/triton_models/weights/layers.27.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:978125c0ae2cb51951d1047bee97e87cd418deafc192423f7e765bdd041148d8
+size 70778880
diff --git a/triton_models/weights/layers.27.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.27.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..9de545511d787b6af23fb2d73e15c85051d6342a
--- /dev/null
+++ b/triton_models/weights/layers.27.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b9cd386d2ee4eff5fec24156239b7ef6d305d575a04257eb06d135fb430b0bc
+size 4423680
diff --git a/triton_models/weights/layers.27.feed_forward.w2.0.qweight b/triton_models/weights/layers.27.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..d0326fe3270d215aaa0373c69a8145a5789e0c0c
--- /dev/null
+++ b/triton_models/weights/layers.27.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a1eb132cfc839ca3b40f8a0cbbb41dfd612dcc04e2ed8a1417270ab6d6bd779
+size 35389440
diff --git a/triton_models/weights/layers.27.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.27.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..00d2dc9132cb3875dadb959c0d61a4f7d4ddbb6e
--- /dev/null
+++ b/triton_models/weights/layers.27.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:539426cda10af4f7f0ea6b664a4eb782421d266addd121124974ada741af4d5a
+size 2211840
diff --git a/triton_models/weights/layers.27.ffn_norm.weight b/triton_models/weights/layers.27.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..929b2164631fca15bfeafd1eb43327e75855403b
--- /dev/null
+++ b/triton_models/weights/layers.27.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ac3fdf0af8ccb5ed8154c79ce3fdc3ce84411f6850a6e74b000aaa9508bd8d6
+size 10240
diff --git a/triton_models/weights/layers.28.attention.w_qkv.0.qweight b/triton_models/weights/layers.28.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..34a85bcdb8afc6c821f575477717769cf8ce4927
--- /dev/null
+++ b/triton_models/weights/layers.28.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:294162cc0e4148b788f6331c29c9e289b6e9b67d968dffcd939aafc54c9394da
+size 39321600
diff --git a/triton_models/weights/layers.28.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.28.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..8a33ee971a4cfed74960eb32463e8fac56227288
--- /dev/null
+++ b/triton_models/weights/layers.28.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:81708ffe2b6a068d998815070d6dc076fcaa51fe0fb100cd864aa53588d67e84
+size 2457600
diff --git a/triton_models/weights/layers.28.attention.wo.0.qweight b/triton_models/weights/layers.28.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..35631d000f40b2dd4e15f6bb140316ab86bf8beb
--- /dev/null
+++ b/triton_models/weights/layers.28.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9913861c5b5e98dc5f0c8c351cce7c18d73119e6fb4bc9d3deaa22c163ddee18
+size 13107200
diff --git a/triton_models/weights/layers.28.attention.wo.0.scales_zeros b/triton_models/weights/layers.28.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..f083f295a66eaf716d42f12dbe26172a4e4b1868
--- /dev/null
+++ b/triton_models/weights/layers.28.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:add8467502965525eba05e938e44b5696dbc4cea13466ec24e277f8e6860c9d1
+size 819200
diff --git a/triton_models/weights/layers.28.attention_norm.weight b/triton_models/weights/layers.28.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..aca56d06d4d7e27fc8eb6f3f1c6b19b44197c0fe
--- /dev/null
+++ b/triton_models/weights/layers.28.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3e6954637850e2f0e1f35943df9b8cdc366f9fc655c8eab9ba46aaf9e6b3bfc
+size 10240
diff --git a/triton_models/weights/layers.28.feed_forward.w13.0.qweight b/triton_models/weights/layers.28.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..a44f194e0f2ef3606a64e2b09a6b7ec75f599e9e
--- /dev/null
+++ b/triton_models/weights/layers.28.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e9e293a63df216eb02f62afe77604608189e51a7c909a4c0427fc6837bc63910
+size 70778880
diff --git a/triton_models/weights/layers.28.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.28.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..eb0ab64ff267f88a85c355029b9b43d15a735739
--- /dev/null
+++ b/triton_models/weights/layers.28.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c0f74ba24ea4c3d5eb77e0b6fc7f9bd1cf86e43dfa8a9d8579b51b1e522b28a2
+size 4423680
diff --git a/triton_models/weights/layers.28.feed_forward.w2.0.qweight b/triton_models/weights/layers.28.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..f65611e1c4bb8220b998621ab00eda98798778b2
--- /dev/null
+++ b/triton_models/weights/layers.28.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8340d0e3dab5f95f5eb5b448b8258e2d8b86168aa2157f69a622b5f4540a0d78
+size 35389440
diff --git a/triton_models/weights/layers.28.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.28.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..b5e12410abe1917fbb43d596335d9df09f43d3a1
--- /dev/null
+++ b/triton_models/weights/layers.28.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7b88602fa7311052a5f89a8df51ae551a26507622ef138ffb3370fedda9e948
+size 2211840
diff --git a/triton_models/weights/layers.28.ffn_norm.weight b/triton_models/weights/layers.28.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..6a98fab11ee2d4abe85f323db8936419a2cb974f
--- /dev/null
+++ b/triton_models/weights/layers.28.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:647d7759cae2ca080bf030ce3d845c0cf42b4b1437dd54b8be2207582d9acc83
+size 10240
diff --git a/triton_models/weights/layers.29.attention.w_qkv.0.qweight b/triton_models/weights/layers.29.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..170f329227d0f813f94fd915409326034a654f03
--- /dev/null
+++ b/triton_models/weights/layers.29.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:59691e116a74d2b67963ae8f732b30f32d50965405b9fcc5a7fd08d03c8e48a1
+size 39321600
diff --git a/triton_models/weights/layers.29.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.29.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..e8ca6aee62a3d333fd981eb995c6a6ee6fc0b5bf
--- /dev/null
+++ b/triton_models/weights/layers.29.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:99c6764954cf1b097747b438aca5f5f3ed1386aa9f39e690db290c21ade9a2fc
+size 2457600
diff --git a/triton_models/weights/layers.29.attention.wo.0.qweight b/triton_models/weights/layers.29.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..d557710affc166b40d5973ccd2916d68337a41a7
--- /dev/null
+++ b/triton_models/weights/layers.29.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3563299ff5f8f498d7d0235272247cc005334619dd7b7679543f7659e2b08cd2
+size 13107200
diff --git a/triton_models/weights/layers.29.attention.wo.0.scales_zeros b/triton_models/weights/layers.29.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..5897c55267a9c290ea8bf969288d0a6c8f333175
--- /dev/null
+++ b/triton_models/weights/layers.29.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab6e6eeb79e87568605bf403ed9bf41a94c36056bed831198b09f6fa5a1bb3ac
+size 819200
diff --git a/triton_models/weights/layers.29.attention_norm.weight b/triton_models/weights/layers.29.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..b5c5e773cf5a07669d26487e430d087fe743c0f5
--- /dev/null
+++ b/triton_models/weights/layers.29.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ab87c6f8c93c848cd75c7e77c345851c3d34b434f3269de48533bd1461d9b62
+size 10240
diff --git a/triton_models/weights/layers.29.feed_forward.w13.0.qweight b/triton_models/weights/layers.29.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..aff460b58be7ea49acaaab59e3a6e7a2829d1ecc
--- /dev/null
+++ b/triton_models/weights/layers.29.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44dacca51044a6a46bb00d96a8e63ed0b26af25598e02ecd3ef04d7533526bf9
+size 70778880
diff --git a/triton_models/weights/layers.29.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.29.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..1b15fe20895be8233aee3dddc780df6266c2acb9
--- /dev/null
+++ b/triton_models/weights/layers.29.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9090cef05b427afce71246682a0f392746dbd14d977b217a6f7a7065607d4b4b
+size 4423680
diff --git a/triton_models/weights/layers.29.feed_forward.w2.0.qweight b/triton_models/weights/layers.29.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..79e85467525c5f2ba49ae7a5c34419929de98c2f
--- /dev/null
+++ b/triton_models/weights/layers.29.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7efed8831fb1f0041667851979387b85e0e1c28a5b09a7e237a0f8c607a6e793
+size 35389440
diff --git a/triton_models/weights/layers.29.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.29.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..ef0b183eb08c1e91340291099c5cbe1364e600e7
--- /dev/null
+++ b/triton_models/weights/layers.29.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7f49a083f86fb725245964020f61ae8f6645c1c13549ba537d83277ba4f8955f
+size 2211840
diff --git a/triton_models/weights/layers.29.ffn_norm.weight b/triton_models/weights/layers.29.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..b5bd2a268b5af846aa36fdc4567e4dbe5b5e2435
--- /dev/null
+++ b/triton_models/weights/layers.29.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:33f0b03450d1537c8b1914b63aa9c49e3d0bddc8f78f80fd58f923db5c779e79
+size 10240
diff --git a/triton_models/weights/layers.3.attention.w_qkv.0.qweight b/triton_models/weights/layers.3.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..b0732c267b0f17e115420b095eaddc72d141ef01
--- /dev/null
+++ b/triton_models/weights/layers.3.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2697800453ff8a550e8d05ca4d1fb8c07d240f86321d3f2413a2c033993897ee
+size 39321600
diff --git a/triton_models/weights/layers.3.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.3.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..7a4a0339eed8006b55439af8af921f911870605d
--- /dev/null
+++ b/triton_models/weights/layers.3.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f15c92b9bc68b15233f788de8cedfaf46ec21b1158e8b013dcac80c2f2292ab4
+size 2457600
diff --git a/triton_models/weights/layers.3.attention.wo.0.qweight b/triton_models/weights/layers.3.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..1abb0a2c5e744bf869d54c79502ef91a4c404575
--- /dev/null
+++ b/triton_models/weights/layers.3.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da886d77e50bfbeffd5261bbf15d7fe56edca0c74ac21ea0ad4ee7416d6eb9bd
+size 13107200
diff --git a/triton_models/weights/layers.3.attention.wo.0.scales_zeros b/triton_models/weights/layers.3.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..52fe4bd1f6ff873458c2b1ef536d1e403a311e23
--- /dev/null
+++ b/triton_models/weights/layers.3.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4621163114c7bdf0eb98b4eff98db42938e80e7183596292f1e698d894c60ba6
+size 819200
diff --git a/triton_models/weights/layers.3.attention_norm.weight b/triton_models/weights/layers.3.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..052d6c4934e8a3896079125050ce133c977f1491
--- /dev/null
+++ b/triton_models/weights/layers.3.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9e4cc1e3afabb937151882f43d2c0bf17b8eebec66623cc749b8f7f8a45bf36
+size 10240
diff --git a/triton_models/weights/layers.3.feed_forward.w13.0.qweight b/triton_models/weights/layers.3.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..86d40185cd9d515a0d35faae998a67dc96abe641
--- /dev/null
+++ b/triton_models/weights/layers.3.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f99cca2e43cb7d9437ecff88d9d13090658b0a426f20907c36e2a71cadefabd
+size 70778880
diff --git a/triton_models/weights/layers.3.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.3.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..f7eae56cadb8dbca87e9c000b57ae334b3e27d51
--- /dev/null
+++ b/triton_models/weights/layers.3.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac61e68380a9d17a4b0741ec5bb1449216a86468599bff7b6efb0355864b6fd6
+size 4423680
diff --git a/triton_models/weights/layers.3.feed_forward.w2.0.qweight b/triton_models/weights/layers.3.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..fc09f3c1bae19edee6634bf20311fcfa6d3bffa6
--- /dev/null
+++ b/triton_models/weights/layers.3.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c81ae7b6e910318837f9d8adff2723bf16763dd4fb0713c350a55ac1701d8b80
+size 35389440
diff --git a/triton_models/weights/layers.3.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.3.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..919baa889b6358f53be0355cc8a131f4002c4485
--- /dev/null
+++ b/triton_models/weights/layers.3.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f401a1778f8f3b9fe22048864b89ffd66957a6259bec8720f3d872722836057c
+size 2211840
diff --git a/triton_models/weights/layers.3.ffn_norm.weight b/triton_models/weights/layers.3.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..26bfa8d403e2ab21e7c4fc47ccc3e4a9d7197ea9
--- /dev/null
+++ b/triton_models/weights/layers.3.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a555adf9a781988cc2711458d623fc50c7b39fbe49fff7274bd368bc31bd6cc
+size 10240
diff --git a/triton_models/weights/layers.30.attention.w_qkv.0.qweight b/triton_models/weights/layers.30.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..1e65703f4d90e082963ce5ea7e1d581fb5ec82d9
--- /dev/null
+++ b/triton_models/weights/layers.30.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0790cc72ed53e07f05a5627ce2ed72177d8eaddfb8410d845c71f75cd9d5d7f1
+size 39321600
diff --git a/triton_models/weights/layers.30.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.30.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..f2b3a5d90fa96311c282322200600e88b317214b
--- /dev/null
+++ b/triton_models/weights/layers.30.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db2e5a05031edd7d4ca92ac4416c8fb8f059c8e37fe320ea6131dea683377467
+size 2457600
diff --git a/triton_models/weights/layers.30.attention.wo.0.qweight b/triton_models/weights/layers.30.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..d9da4aed30853c32548601f9ba2b7daeae2c5cfc
--- /dev/null
+++ b/triton_models/weights/layers.30.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f3bb24d48436d0634205f097abaef1e4a1dd69b990d77b569708cfcadba1e84
+size 13107200
diff --git a/triton_models/weights/layers.30.attention.wo.0.scales_zeros b/triton_models/weights/layers.30.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..3d8fa4ad00525b74bc2081bc36d0e60e240018c2
--- /dev/null
+++ b/triton_models/weights/layers.30.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bcbfd40039547f28e9ea7b8adb8a4cf4ee47c086d64d9a94768d8a1bccfdc98a
+size 819200
diff --git a/triton_models/weights/layers.30.attention_norm.weight b/triton_models/weights/layers.30.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..cf2305b21b979d61dcadc915f3074ca1aa1792ce
--- /dev/null
+++ b/triton_models/weights/layers.30.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc2271a5af8be5b249be4554c38f1191f6dff492c94ef08b580ed0264dfc70e6
+size 10240
diff --git a/triton_models/weights/layers.30.feed_forward.w13.0.qweight b/triton_models/weights/layers.30.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..c7c97bdf301c8094053ce9c985e17f9bfcec1288
--- /dev/null
+++ b/triton_models/weights/layers.30.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2bea8e3962e9235e1186cb292a0850b8f3d5fb0558b26044c98d59fc6cdce29d
+size 70778880
diff --git a/triton_models/weights/layers.30.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.30.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..e287aa22f8a7275ed007cb5c47d7f3a2ed1e9349
--- /dev/null
+++ b/triton_models/weights/layers.30.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf23f0a4b641b7352f7edc87a740fa353d225999beea7d91c898bcfce2d82d42
+size 4423680
diff --git a/triton_models/weights/layers.30.feed_forward.w2.0.qweight b/triton_models/weights/layers.30.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..45853ab5d0ab2ad3cf38918e6cdadf7476d229be
--- /dev/null
+++ b/triton_models/weights/layers.30.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8dcd6a1ff2c867b88e5d78e0b6b38390975b3970bc3e0c728c0587d4de455a2b
+size 35389440
diff --git a/triton_models/weights/layers.30.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.30.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..e7080eff1c023a937009cc3d9e6d7cad70d63559
--- /dev/null
+++ b/triton_models/weights/layers.30.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6c4c66acd7e52eed656b350432d55bf7555af87357c9320c98195b07f5b3df3a
+size 2211840
diff --git a/triton_models/weights/layers.30.ffn_norm.weight b/triton_models/weights/layers.30.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..654de166c00783cd5c1215b5cba63eed32d97c04
--- /dev/null
+++ b/triton_models/weights/layers.30.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4430691e23cc620423fd5ac94d05be2a3ae25598a73a121d44edd4809612be27
+size 10240
diff --git a/triton_models/weights/layers.31.attention.w_qkv.0.qweight b/triton_models/weights/layers.31.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..40521dd92476e3c076d5c90ac9cb456413eb5461
--- /dev/null
+++ b/triton_models/weights/layers.31.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9cdfc3d6bb11c09527d6955c4cc6914a14aeb77405da872322251c590585af7
+size 39321600
diff --git a/triton_models/weights/layers.31.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.31.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..5daa6b3cba24a25cdc8e16bb07678a7a206479af
--- /dev/null
+++ b/triton_models/weights/layers.31.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:99d714a9771db774fc86cee11fc49cf55458af960e98f7cc91c4b1676332825d
+size 2457600
diff --git a/triton_models/weights/layers.31.attention.wo.0.qweight b/triton_models/weights/layers.31.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..c58259c902fe387282f88af6e625b8f61da469dc
--- /dev/null
+++ b/triton_models/weights/layers.31.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bfd3f20becb6e72e872acb5aead475542bf8c8818fac64607f57dc24dd603c75
+size 13107200
diff --git a/triton_models/weights/layers.31.attention.wo.0.scales_zeros b/triton_models/weights/layers.31.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..b4fd49dbb084eaa96cb5ed60e76ec34c337d23cc
--- /dev/null
+++ b/triton_models/weights/layers.31.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f1676735b104cc84f2a11d54ebf3d6d049e7835848b85f45bf730a0cc538597
+size 819200
diff --git a/triton_models/weights/layers.31.attention_norm.weight b/triton_models/weights/layers.31.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..420a59e35131615c65ea18de100a28af95615938
--- /dev/null
+++ b/triton_models/weights/layers.31.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:18ef39198a124c1ce25092cc78c28ba8d688e67bdf2aecbdb3b62eae0f6b8754
+size 10240
diff --git a/triton_models/weights/layers.31.feed_forward.w13.0.qweight b/triton_models/weights/layers.31.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..85bde4d5d7cdeb91b92c7380152cd0f76ed6b1ea
--- /dev/null
+++ b/triton_models/weights/layers.31.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f3af06225dbbff6b5d1aa39c09c7a81978adcb87f71e15e71fd829dad268eb2
+size 70778880
diff --git a/triton_models/weights/layers.31.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.31.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..c4ae536cfa46f4d6ab1612df0032ab6323f52e41
--- /dev/null
+++ b/triton_models/weights/layers.31.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab5e87039c7c043b0e1182316d88345064337a17abacfe9c70d9b4a5c25be6d7
+size 4423680
diff --git a/triton_models/weights/layers.31.feed_forward.w2.0.qweight b/triton_models/weights/layers.31.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..d1602d7b8abc7ed9f2fcf9872dfec74699e441f4
--- /dev/null
+++ b/triton_models/weights/layers.31.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5edfa0730dd548b9168ff2af5278c102bf4be8996199b959574633ebff450fe1
+size 35389440
diff --git a/triton_models/weights/layers.31.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.31.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..8c5d0a9c48e05698211762f30755161869cdc3a2
--- /dev/null
+++ b/triton_models/weights/layers.31.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5c21c26f6c5c98fb756ef76c7e6540845aa4b3f7ad4f4774e18e0da5fcc08c7
+size 2211840
diff --git a/triton_models/weights/layers.31.ffn_norm.weight b/triton_models/weights/layers.31.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..d1f892f2e5faf21d09ddbe960f00a7dab8555686
--- /dev/null
+++ b/triton_models/weights/layers.31.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:393859e4f68f8b2ab8c1ccc492ada79de19935a9e30af1dd66138bbd035e0cd4
+size 10240
diff --git a/triton_models/weights/layers.32.attention.w_qkv.0.qweight b/triton_models/weights/layers.32.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..e05e702fbafa80381f686be868488c58515e95cb
--- /dev/null
+++ b/triton_models/weights/layers.32.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4cabdf43c095279a120dd88f790ee4882eb35b56aa73174e77dbc620b27a584
+size 39321600
diff --git a/triton_models/weights/layers.32.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.32.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..33678e31fbda0606e56eaad1bcc9b04e563887ea
--- /dev/null
+++ b/triton_models/weights/layers.32.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:972f13355559bfa6330f502a7262fa4c11cf6d3d479e8888daf4d3bb79bc1b42
+size 2457600
diff --git a/triton_models/weights/layers.32.attention.wo.0.qweight b/triton_models/weights/layers.32.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..b84c35ab62908c67f99a6b2f4299a5091913fa64
--- /dev/null
+++ b/triton_models/weights/layers.32.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:92ca5aa18017a233c29f1e563edcb07a22685d0a019f722c3ef9f0a6b3d55761
+size 13107200
diff --git a/triton_models/weights/layers.32.attention.wo.0.scales_zeros b/triton_models/weights/layers.32.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..64321736cc3014f29bddd964956b129020c1023b
--- /dev/null
+++ b/triton_models/weights/layers.32.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a227a1dfde7069ad60e9e080c49d0341096b41759e484559b8ee00946aa52e6
+size 819200
diff --git a/triton_models/weights/layers.32.attention_norm.weight b/triton_models/weights/layers.32.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..dc0a13efe67931beba1e895e81faa65697957b6b
--- /dev/null
+++ b/triton_models/weights/layers.32.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a7af7d14306d88d81039f2866e9e0a8775bf4e0fd4038cbb89ecae2a137b1564
+size 10240
diff --git a/triton_models/weights/layers.32.feed_forward.w13.0.qweight b/triton_models/weights/layers.32.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..c8b5523b4431a613b986537fadf37e9786fe2e5d
--- /dev/null
+++ b/triton_models/weights/layers.32.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ad74f2350e9f638df654efe4c6c9aae2ed6fcc75316247fe6b030588b398f1a
+size 70778880
diff --git a/triton_models/weights/layers.32.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.32.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..23874f78386d17583c1928f5ccc44b3de13dfd7a
--- /dev/null
+++ b/triton_models/weights/layers.32.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb0861650511ba3c9179a5210e18b3801fe8cb6bbaeed60dbd06cd9f4695266f
+size 4423680
diff --git a/triton_models/weights/layers.32.feed_forward.w2.0.qweight b/triton_models/weights/layers.32.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..b8a1a1892c8e8aa5a8ab50d30d918aeb489d8fb0
--- /dev/null
+++ b/triton_models/weights/layers.32.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7596c917b358aedd9527160c419778f795ec05881dee870ee4d2a9b0f1c83d4
+size 35389440
diff --git a/triton_models/weights/layers.32.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.32.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..0e7eddabd431b21fbcc65d724f4b280cf74f118a
--- /dev/null
+++ b/triton_models/weights/layers.32.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c55f63b0709de9ae65da11cb3b4b50a2bafea0b2d30e2dd8c4e61a2f0ea3799
+size 2211840
diff --git a/triton_models/weights/layers.32.ffn_norm.weight b/triton_models/weights/layers.32.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..c9f1e32f6bf3cce6c8e8f854cc3ca4a63047073a
--- /dev/null
+++ b/triton_models/weights/layers.32.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a787f2a35d27730e41ede41cbc8a8b6e94c2e882d8c3c23814961168951ff009
+size 10240
diff --git a/triton_models/weights/layers.33.attention.w_qkv.0.qweight b/triton_models/weights/layers.33.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..eef7686699dde1fa063ec4df3758ec603b7344c3
--- /dev/null
+++ b/triton_models/weights/layers.33.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e65009c71890992d2b88bb196f28fdffa6a94ea56ff746bc0f5487b4d99f8b3c
+size 39321600
diff --git a/triton_models/weights/layers.33.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.33.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..91c46ebc83940c1e25ddabc3f16e53b106b7fc6c
--- /dev/null
+++ b/triton_models/weights/layers.33.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:baae46ba79efc1612ba5bfae40218541ea2956c8bd59baadffba061d03803689
+size 2457600
diff --git a/triton_models/weights/layers.33.attention.wo.0.qweight b/triton_models/weights/layers.33.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..cf0cf94ea665e6af7c626fa348d89398e7236ef6
--- /dev/null
+++ b/triton_models/weights/layers.33.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:917c8e11250e52feecc756f3bd801affab81641057a12c5c6fd413e40b9ce092
+size 13107200
diff --git a/triton_models/weights/layers.33.attention.wo.0.scales_zeros b/triton_models/weights/layers.33.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..86d68d48cb4fd32c230bd8e816f64c47d3afe1b2
--- /dev/null
+++ b/triton_models/weights/layers.33.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eabd661ef3d8642f2acdc01f6010dda00e0c8dc853e9433c3da1ea8e574e7b8b
+size 819200
diff --git a/triton_models/weights/layers.33.attention_norm.weight b/triton_models/weights/layers.33.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..38cf256e01f47db087a2f5275c736875e04adf97
--- /dev/null
+++ b/triton_models/weights/layers.33.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c928c23a1f356fb50164dd48e7f77712c8b7a637b31549f2c20c602d1567432
+size 10240
diff --git a/triton_models/weights/layers.33.feed_forward.w13.0.qweight b/triton_models/weights/layers.33.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..2dd6de165fe2b99a2eb4efc0a9a3c3d0f9ed2892
--- /dev/null
+++ b/triton_models/weights/layers.33.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2e96c527aeb46bd741ed59ea5cb8abcfa5002c9ea5c282fea658a1de190fe71
+size 70778880
diff --git a/triton_models/weights/layers.33.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.33.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..fcbc302fe92e37547996ff78ba0f36185be2f4b3
--- /dev/null
+++ b/triton_models/weights/layers.33.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7518149ac80f9ba34eebf89d55ebf057fe5b9bb143ff5e254af2b8773f945b04
+size 4423680
diff --git a/triton_models/weights/layers.33.feed_forward.w2.0.qweight b/triton_models/weights/layers.33.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..4bb32af77216ea737ddfca05aae4865106571ccf
--- /dev/null
+++ b/triton_models/weights/layers.33.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c1df17ecbd06a31a7c2d809b9501358ed0eafe0b740d4d68d6498dde64f2546e
+size 35389440
diff --git a/triton_models/weights/layers.33.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.33.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..5eebff895186ae5822e26842b19d960d2983e2e5
--- /dev/null
+++ b/triton_models/weights/layers.33.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fbe36e6d5c5ba99e3624e9979e26375366051ebeeec8fc13d32ae937d4b84679
+size 2211840
diff --git a/triton_models/weights/layers.33.ffn_norm.weight b/triton_models/weights/layers.33.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..3288241a95536f55f30f1b9a3f2c8d11465b5820
--- /dev/null
+++ b/triton_models/weights/layers.33.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:81977510fae94f3ab469e47a823bfbd6c792d6ba2099034c67cb553628d8b5f3
+size 10240
diff --git a/triton_models/weights/layers.34.attention.w_qkv.0.qweight b/triton_models/weights/layers.34.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..59bcde47ec933f0754dca2480395c63527c7d44c
--- /dev/null
+++ b/triton_models/weights/layers.34.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4317fdcfe4fcdaf756a153874acb02cbd453d506e42fe6e879b96e03929d8d86
+size 39321600
diff --git a/triton_models/weights/layers.34.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.34.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..98b782f4d4d0609f80363770f85473366164314f
--- /dev/null
+++ b/triton_models/weights/layers.34.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ceff704ac61ef5720e913f4e8ee9e3954ac611a4083500541f2e63b55d8c872
+size 2457600
diff --git a/triton_models/weights/layers.34.attention.wo.0.qweight b/triton_models/weights/layers.34.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..1aa1ffdb67d8fe3b549bf0e35d4f19d71dd5c8aa
--- /dev/null
+++ b/triton_models/weights/layers.34.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e24718e79ba06d7f12d366880780a80e88f7ff6caa727b0ccf6df483d5516275
+size 13107200
diff --git a/triton_models/weights/layers.34.attention.wo.0.scales_zeros b/triton_models/weights/layers.34.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..6945a83f780a16f31bfd419f35d107e5b3547395
--- /dev/null
+++ b/triton_models/weights/layers.34.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:13c7275717bd3c7cf21768d70805dc131ce62bd9dea3e7d1670699e8d038ae90
+size 819200
diff --git a/triton_models/weights/layers.34.attention_norm.weight b/triton_models/weights/layers.34.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..3b0e1eacb1b4afe9a3186288d95c107385ae5818
--- /dev/null
+++ b/triton_models/weights/layers.34.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa0ce1c604cbf0f95ff75c6f31279d1b7c3d71facaa02f20d609f82f63f33109
+size 10240
diff --git a/triton_models/weights/layers.34.feed_forward.w13.0.qweight b/triton_models/weights/layers.34.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..2b1c6e64f8cdbeae39bd6d18fcbd9de2da50238b
--- /dev/null
+++ b/triton_models/weights/layers.34.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:12bebfdcdecc7fdc9f6ca8867253fa0e07774032cef58fed463c8737917b8997
+size 70778880
diff --git a/triton_models/weights/layers.34.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.34.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..4c29428ba422e632af6553bd6a16fb959e3bc8b6
--- /dev/null
+++ b/triton_models/weights/layers.34.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a543104943fca072f14c360a9daf53cca4adcc6b2f4d30e433fc19fd10d5f839
+size 4423680
diff --git a/triton_models/weights/layers.34.feed_forward.w2.0.qweight b/triton_models/weights/layers.34.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..0a374700fd89fea5b7702b4efaff699a34ca3c99
--- /dev/null
+++ b/triton_models/weights/layers.34.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e21100b3c9841813eee147fa313b032c53f4aff7459a902ca0123935c68cee3
+size 35389440
diff --git a/triton_models/weights/layers.34.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.34.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..c0de0592eab6e9afbd7b28d8682270f74f23fd3d
--- /dev/null
+++ b/triton_models/weights/layers.34.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:526816346ff43e7c2f3ea4afa0490a7e58230aeed5df941426dc28d7523f3e05
+size 2211840
diff --git a/triton_models/weights/layers.34.ffn_norm.weight b/triton_models/weights/layers.34.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..62e864ecfbfc2ba69d409c2c00580f7fef5f3eac
--- /dev/null
+++ b/triton_models/weights/layers.34.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fabcb5e2455d2892e2d030fd0aa141e1c835874741fa06af15bde5b2d9864658
+size 10240
diff --git a/triton_models/weights/layers.35.attention.w_qkv.0.qweight b/triton_models/weights/layers.35.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..aab005862e4f633ce5c49217e83a419cf4f68d39
--- /dev/null
+++ b/triton_models/weights/layers.35.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26e7e5df2925e9c41dd1f99d3c69500ee4536ab0468bdb1ae6d7b4b1935aa3b8
+size 39321600
diff --git a/triton_models/weights/layers.35.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.35.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..80c4f7f6824371a86e5454aea3f2e8f05181a048
--- /dev/null
+++ b/triton_models/weights/layers.35.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41ca4a33a748a62d2bc478160a3ce379669a4dec2f4a9889644235f73520cc28
+size 2457600
diff --git a/triton_models/weights/layers.35.attention.wo.0.qweight b/triton_models/weights/layers.35.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..cc2d3ef345241dea5b1eb9d7fc1445bf7c454743
--- /dev/null
+++ b/triton_models/weights/layers.35.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bfdbdb6f5208b29370900aff361ec6e89f0e36feb5b6426568cf478b400e31ee
+size 13107200
diff --git a/triton_models/weights/layers.35.attention.wo.0.scales_zeros b/triton_models/weights/layers.35.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..a399c78a1395e8495c288fef24aaf61bce619de4
--- /dev/null
+++ b/triton_models/weights/layers.35.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e735e912872d7ee9b2820655f98bdaf49823a2c95f99cc6f86a38e0e8dd0b62b
+size 819200
diff --git a/triton_models/weights/layers.35.attention_norm.weight b/triton_models/weights/layers.35.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..e66b8762376218ba4a51e802efc1bc389f538656
--- /dev/null
+++ b/triton_models/weights/layers.35.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b79cb19eff7fc91e607418ac7ee23d98811278eb5981655e23ae99b0c105de7
+size 10240
diff --git a/triton_models/weights/layers.35.feed_forward.w13.0.qweight b/triton_models/weights/layers.35.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..c8db3f1bce0989840d80b3f92342e63fc20d06af
--- /dev/null
+++ b/triton_models/weights/layers.35.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5cd1b32383f20045ed2120820c9ad1d6aa1814573b454894c1163e8b16889120
+size 70778880
diff --git a/triton_models/weights/layers.35.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.35.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..056fa9ff818074d1c18ae73682e9a00f24c06f96
--- /dev/null
+++ b/triton_models/weights/layers.35.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c71ba43954b1de4b0653353700577ed4c05631796f4023fa09f308f2ee8b4dc7
+size 4423680
diff --git a/triton_models/weights/layers.35.feed_forward.w2.0.qweight b/triton_models/weights/layers.35.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..59e12db31c851f678bff231bd39364b4d3e56fe2
--- /dev/null
+++ b/triton_models/weights/layers.35.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:508e1c64f0610a3973e45388b6c1ec9c1f553e7cb811ef8d4f8c6468a7497ee1
+size 35389440
diff --git a/triton_models/weights/layers.35.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.35.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..d1beeca9f28b3c509244932b9bc805d5e4f2aebf
--- /dev/null
+++ b/triton_models/weights/layers.35.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb8dd34c7d6f11c386913ba6642db47b623bd1408828070af8fcb11274b01183
+size 2211840
diff --git a/triton_models/weights/layers.35.ffn_norm.weight b/triton_models/weights/layers.35.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..76fb36fde104c455172e8fdaf7161f2c532acb7c
--- /dev/null
+++ b/triton_models/weights/layers.35.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e57802d84fcc200bff10b61730b668072a2503aa2a49edfc359636aec7b2e960
+size 10240
diff --git a/triton_models/weights/layers.36.attention.w_qkv.0.qweight b/triton_models/weights/layers.36.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..f00d55789f4d6ba7e50bd6643315231eb16fdc81
--- /dev/null
+++ b/triton_models/weights/layers.36.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56993652bd9e6b77ec1e557cb091f06ef3cbaf17b513f480322f90c48b28a827
+size 39321600
diff --git a/triton_models/weights/layers.36.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.36.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..ac3d3f84adf6ebca52b4208eb2e69ecfebf67e94
--- /dev/null
+++ b/triton_models/weights/layers.36.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eed93bbfca1c3a9de1e514c0629f19e884c9a8a4e21d306f48fe5b5f0e0b4f46
+size 2457600
diff --git a/triton_models/weights/layers.36.attention.wo.0.qweight b/triton_models/weights/layers.36.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..532bbe0e24f3a6883ee609f52f045c0b4c647292
--- /dev/null
+++ b/triton_models/weights/layers.36.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a146d00846874c6b5cd8adc905d98fca26d6bb31c935f5ab3e08770d034c2dc1
+size 13107200
diff --git a/triton_models/weights/layers.36.attention.wo.0.scales_zeros b/triton_models/weights/layers.36.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..2942642ec6ecd8c7f0d960290aee4dc64f970895
--- /dev/null
+++ b/triton_models/weights/layers.36.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:411eee4ccfcf9de52a02bfd7a3ca0cea0ecea69ca19b2e61c66b0addcbc37f08
+size 819200
diff --git a/triton_models/weights/layers.36.attention_norm.weight b/triton_models/weights/layers.36.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..b45f41079dd4df924bfaa77f040afd489ce92ff8
--- /dev/null
+++ b/triton_models/weights/layers.36.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d34da3fdd9970089a26393a6bdbeedcbe7c988c2cdedf6ebf1e7ada37260fb0d
+size 10240
diff --git a/triton_models/weights/layers.36.feed_forward.w13.0.qweight b/triton_models/weights/layers.36.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..acae8ed30ffb83d4325e9f06458569fc1d0a0e25
--- /dev/null
+++ b/triton_models/weights/layers.36.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5fed1374281b002e428ab313bbf7f29233e3aa887afca16f6cfaf992871b47c
+size 70778880
diff --git a/triton_models/weights/layers.36.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.36.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..6d8316f8319280b440a23f4950a6e327d37a16e4
--- /dev/null
+++ b/triton_models/weights/layers.36.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64205cb1e41ba1a30e7c4b23e74cf4cb79478bf410314b10f1350235f54038f6
+size 4423680
diff --git a/triton_models/weights/layers.36.feed_forward.w2.0.qweight b/triton_models/weights/layers.36.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..0617dc86e08fa6c60e111006769f1882d28a7831
--- /dev/null
+++ b/triton_models/weights/layers.36.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d00b80ea5a578900819b68d53e54f330dcdf2d62f22e3df1695a008501dacf39
+size 35389440
diff --git a/triton_models/weights/layers.36.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.36.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..cbcc3dfe3f47e759e8755153276a3e226802e10a
--- /dev/null
+++ b/triton_models/weights/layers.36.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:701feda35c4892466bff2e6baf81b0d6a5b336242c2dc2a321a2b3356fe4be1c
+size 2211840
diff --git a/triton_models/weights/layers.36.ffn_norm.weight b/triton_models/weights/layers.36.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..8dc45006436291c976fdf785592f6b01b765cd69
--- /dev/null
+++ b/triton_models/weights/layers.36.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db78a30cafb10cc4392029f2e028f608ed482c87dceb467a0b7777d0b0a8085f
+size 10240
diff --git a/triton_models/weights/layers.37.attention.w_qkv.0.qweight b/triton_models/weights/layers.37.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..0d5fcf198cd3dd6d79df874a78518dac36520a9d
--- /dev/null
+++ b/triton_models/weights/layers.37.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:77e011bdd833cfa02b19913378f7737310d9186b2d213f2a5f690927bb9a712a
+size 39321600
diff --git a/triton_models/weights/layers.37.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.37.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..02ab11044f4939510afd9393d76443fc80ceb828
--- /dev/null
+++ b/triton_models/weights/layers.37.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:325c1961e6cee7bc9e56aa2bb16dfff1bca465d8a353e406ebb3f448f605a83b
+size 2457600
diff --git a/triton_models/weights/layers.37.attention.wo.0.qweight b/triton_models/weights/layers.37.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..b8f048710d012f7d64f06713aea8bded8e3c7d12
--- /dev/null
+++ b/triton_models/weights/layers.37.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b2094bc87d200f916ce2d3e99a9785e12a48ecc9d2488c61ffe4a4d32ad6774b
+size 13107200
diff --git a/triton_models/weights/layers.37.attention.wo.0.scales_zeros b/triton_models/weights/layers.37.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..3d9143c02ea9dccbda6eb10a63069664ac892f7d
--- /dev/null
+++ b/triton_models/weights/layers.37.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eae8d48835e3298dcffc21f7c2b3e6fdd4c3363f9963170015ea1764c50e8361
+size 819200
diff --git a/triton_models/weights/layers.37.attention_norm.weight b/triton_models/weights/layers.37.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..5922a16817849e6c3088c48490b1c4a156f3b85c
--- /dev/null
+++ b/triton_models/weights/layers.37.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d160efa5bbc58989b76247ab0232d8e0f295378ca4f09e47bb99e3de3df18bdf
+size 10240
diff --git a/triton_models/weights/layers.37.feed_forward.w13.0.qweight b/triton_models/weights/layers.37.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..3705904f0ad0e915726eda67bf18f09d7cdb35d9
--- /dev/null
+++ b/triton_models/weights/layers.37.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:126e68d296a71b5b7d3fbb9750fbb2ed107cf125780c3d4c3845fb1922e233e0
+size 70778880
diff --git a/triton_models/weights/layers.37.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.37.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..6edce5f77c172c68f4c5f22a07c2370e1b0a4f79
--- /dev/null
+++ b/triton_models/weights/layers.37.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a1f8dd5e63865cbbbab94f8465c040df9c0324e8c2671df2b262f25f913b1b1
+size 4423680
diff --git a/triton_models/weights/layers.37.feed_forward.w2.0.qweight b/triton_models/weights/layers.37.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..02756849fc1cb4a6cdca3a83f6b8a38643dde93d
--- /dev/null
+++ b/triton_models/weights/layers.37.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a43a9b1c9ae71391e7ef925c771a22d57448510a06918a1a02a7b36c320e8c48
+size 35389440
diff --git a/triton_models/weights/layers.37.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.37.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..5a4726aff038c411a0eccec3a829850d33e0877f
--- /dev/null
+++ b/triton_models/weights/layers.37.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f0ed714baa871e80229534f2f6c4fea94b94ba700b954845a31bc4ba378143d
+size 2211840
diff --git a/triton_models/weights/layers.37.ffn_norm.weight b/triton_models/weights/layers.37.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..6123cf693465d1ee6659d2dd1657713ae03c83e3
--- /dev/null
+++ b/triton_models/weights/layers.37.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:294cc4ec9f663d2aaa845cc54d75219f4a4e272f3a72d6863f6a088c1b4f6940
+size 10240
diff --git a/triton_models/weights/layers.38.attention.w_qkv.0.qweight b/triton_models/weights/layers.38.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..bd0a46ef19cc63b4989af9fd0333d6e6d4f9eddf
--- /dev/null
+++ b/triton_models/weights/layers.38.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7091b5c5798a37ff6e4a627a531ba70d6620a4d694a8dd939cce94529874b55c
+size 39321600
diff --git a/triton_models/weights/layers.38.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.38.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..53133cd675be1dd40fbf5b314f9184ccd01e5aa0
--- /dev/null
+++ b/triton_models/weights/layers.38.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac11bd1aadea4ed1dc08dd423de95696402ce896b7d2abc231aa0c217b73a6ec
+size 2457600
diff --git a/triton_models/weights/layers.38.attention.wo.0.qweight b/triton_models/weights/layers.38.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..1fce3c5222999324c54591c8c39ad8a00de53eef
--- /dev/null
+++ b/triton_models/weights/layers.38.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:936ba750f08e5a39ce050617ac644560d68dc458a4f95cc4f0dae1fcd47c1165
+size 13107200
diff --git a/triton_models/weights/layers.38.attention.wo.0.scales_zeros b/triton_models/weights/layers.38.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..4de67df29b215c19a9de0a0b78461d46c81a7b8b
--- /dev/null
+++ b/triton_models/weights/layers.38.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c90cc66e869c8d3d3cbcf3bad5a574b44544e1cbae7f0f1bf3c2e5d7feb6101e
+size 819200
diff --git a/triton_models/weights/layers.38.attention_norm.weight b/triton_models/weights/layers.38.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..806d70b6e44c391a70494a837223162c038a37d8
--- /dev/null
+++ b/triton_models/weights/layers.38.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9d71c6c6e3178689fc64057d17e3c83de696d8cda1e2d4e779e09eec367733d
+size 10240
diff --git a/triton_models/weights/layers.38.feed_forward.w13.0.qweight b/triton_models/weights/layers.38.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..86c8f9b888bfbe757cef62460ab8793097c8f542
--- /dev/null
+++ b/triton_models/weights/layers.38.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:701efe3efeb2f36f3bc91ca02bf3521c5970fd546fbea61d07bb823cbd4d12de
+size 70778880
diff --git a/triton_models/weights/layers.38.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.38.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..3b3c959ddfe8e1cde8fea13961844fbf3605dc9a
--- /dev/null
+++ b/triton_models/weights/layers.38.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2469472f214c93cef1aa32072203267fe2e1fd3d064ce6d65291c087130a7bd3
+size 4423680
diff --git a/triton_models/weights/layers.38.feed_forward.w2.0.qweight b/triton_models/weights/layers.38.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..3abb4f4bec01bdf56053b89f77a46a770754d755
--- /dev/null
+++ b/triton_models/weights/layers.38.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cba6389bddfb0f9edcb939214165f18212d2050e3e3c14115aff58ca2bdc5c94
+size 35389440
diff --git a/triton_models/weights/layers.38.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.38.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..e55b51dff6e6833b873a09fcb778aba56b3ef790
--- /dev/null
+++ b/triton_models/weights/layers.38.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4af3ab9ef907f1d0bdd530ac74a4a9d99b809549a3e9ab406d92f5836a88e3da
+size 2211840
diff --git a/triton_models/weights/layers.38.ffn_norm.weight b/triton_models/weights/layers.38.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..3aa66547ec0b6247f78b9c1ddc91465d738c2865
--- /dev/null
+++ b/triton_models/weights/layers.38.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e27ea4cb8121eb66ca1c17b6bf3b483abe6cd24ad001ba9cc1d6ee1188e0333
+size 10240
diff --git a/triton_models/weights/layers.39.attention.w_qkv.0.qweight b/triton_models/weights/layers.39.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..5aaead279daf4dc8e112cc47d3a26036895a2284
--- /dev/null
+++ b/triton_models/weights/layers.39.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d7499e902cb1546aa75a88b4534248819cb32ce4535d58cbeccb0fb3fa6e6686
+size 39321600
diff --git a/triton_models/weights/layers.39.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.39.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..4b55390d0adcac98a45b84240db4dd56185827b2
--- /dev/null
+++ b/triton_models/weights/layers.39.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:639f3b51d5aff9135db1e5b24eefeea3ba929ac0b1d4a34d0a6ef3ec9a710bae
+size 2457600
diff --git a/triton_models/weights/layers.39.attention.wo.0.qweight b/triton_models/weights/layers.39.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..869f5e4c1b5d8dfcee5984cc928c03ced9056f28
--- /dev/null
+++ b/triton_models/weights/layers.39.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e2529a666f27b81e647688bb4a40949582dc534a67ebafba69b3b519da9f7ead
+size 13107200
diff --git a/triton_models/weights/layers.39.attention.wo.0.scales_zeros b/triton_models/weights/layers.39.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..e83579f0c5f25ae1b5ebd181d985bb0bcc5790f7
--- /dev/null
+++ b/triton_models/weights/layers.39.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:18d117c7d988c79e87d0be41d7a8c7d0fd64eae817ff56871e758a4282f02ea7
+size 819200
diff --git a/triton_models/weights/layers.39.attention_norm.weight b/triton_models/weights/layers.39.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..498206e9cc3149eae0c871dd66c60ee4503e3417
--- /dev/null
+++ b/triton_models/weights/layers.39.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d2970b1089b3352a77225151164a5e22e7383f8205e9a25308254bbb87b735f
+size 10240
diff --git a/triton_models/weights/layers.39.feed_forward.w13.0.qweight b/triton_models/weights/layers.39.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..3bcb0c40ac8d2d83f10bbc94d822792eb66330ec
--- /dev/null
+++ b/triton_models/weights/layers.39.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf85f4474993a15c8d2179c1b1cf0f0c505eda1d49e1ca6001445e7579630f98
+size 70778880
diff --git a/triton_models/weights/layers.39.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.39.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..50ff8eb962f703137528817fc2c74873e1918759
--- /dev/null
+++ b/triton_models/weights/layers.39.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20d23d94159279ca1750df31b0f66ae5bec2e3d9e79d4d41f28841e5d25358c7
+size 4423680
diff --git a/triton_models/weights/layers.39.feed_forward.w2.0.qweight b/triton_models/weights/layers.39.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..b2dff828060587ec5c00ad3c880c6dda071fef72
--- /dev/null
+++ b/triton_models/weights/layers.39.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd0a822779439f14b9f9cd0055018d56064470c23c759a43753cb234420a8fa3
+size 35389440
diff --git a/triton_models/weights/layers.39.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.39.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..1157650d104f73b75bacbae1722171cf70289389
--- /dev/null
+++ b/triton_models/weights/layers.39.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf11822f9bc4f4a916a300f94f100eb656d48f54704ac8ed0f6f5a9085d023b3
+size 2211840
diff --git a/triton_models/weights/layers.39.ffn_norm.weight b/triton_models/weights/layers.39.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..d4041b7ea1a05d88c36d64c9b9e3909ae43a6f77
--- /dev/null
+++ b/triton_models/weights/layers.39.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa1eda1d7b738f37caf8bad688d857c481ea49d9106370e7c11f89aefa6279d7
+size 10240
diff --git a/triton_models/weights/layers.4.attention.w_qkv.0.qweight b/triton_models/weights/layers.4.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..84d8da4ed243147a02183038b0cddb8393cd4528
--- /dev/null
+++ b/triton_models/weights/layers.4.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c99c36446c48f2614b5625d7588c30aa833b50013952320e6127daa79cf96928
+size 39321600
diff --git a/triton_models/weights/layers.4.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.4.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..03ffc6d3ae222cb8fd16454a998e064470aae40f
--- /dev/null
+++ b/triton_models/weights/layers.4.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8221d062f74975f45a06d374bf04e80b846b3b9e02d92222d1995ea7461da11d
+size 2457600
diff --git a/triton_models/weights/layers.4.attention.wo.0.qweight b/triton_models/weights/layers.4.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..fbb66e570d59c591a67205f0455be193b81ed75a
--- /dev/null
+++ b/triton_models/weights/layers.4.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b525339c2a2ecf50273cabf7f3555bc3ed2f41aa7fb15d505ba1bceba7d2e048
+size 13107200
diff --git a/triton_models/weights/layers.4.attention.wo.0.scales_zeros b/triton_models/weights/layers.4.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..c5387c0d79ee990ee032a64f8228f339d08278cb
--- /dev/null
+++ b/triton_models/weights/layers.4.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d8c149f8aa4f3e964ab73c6f00369c5ebb109ed9e3036fa5e40ea2f3d3cdc239
+size 819200
diff --git a/triton_models/weights/layers.4.attention_norm.weight b/triton_models/weights/layers.4.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..c304a6f40cb3399be0e0582db52e7fdb917e5120
--- /dev/null
+++ b/triton_models/weights/layers.4.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:560573cba7eb6ff100e43c657f4f17c7cd1f7697835037d8ea1e36fd3dc3bb2b
+size 10240
diff --git a/triton_models/weights/layers.4.feed_forward.w13.0.qweight b/triton_models/weights/layers.4.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..91c4fb430a5ddbe7756e9f6713042d4573442304
--- /dev/null
+++ b/triton_models/weights/layers.4.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44c91a7c35a0eb804a98c6efda89e69b0c6270f6d67d6ee8b6ff0e9065f4ceee
+size 70778880
diff --git a/triton_models/weights/layers.4.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.4.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..cfdff3631601e474047a8f2c0f53cff417ee0213
--- /dev/null
+++ b/triton_models/weights/layers.4.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4f510050db938c9b9c05ef0d049ca239c033c36c70a98a8f6b5b242eb2c4b18
+size 4423680
diff --git a/triton_models/weights/layers.4.feed_forward.w2.0.qweight b/triton_models/weights/layers.4.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..93cac60f9d04b3f0d933d903dce7761e004c03ae
--- /dev/null
+++ b/triton_models/weights/layers.4.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4225435c4c1e63e85422811a34e628f4f43cf58fe3af89d15f5b8b5d0f4b405
+size 35389440
diff --git a/triton_models/weights/layers.4.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.4.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..57171ed10fae63ffe54cfb6c19618e269998a80e
--- /dev/null
+++ b/triton_models/weights/layers.4.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5980a630f9f7c990959b3afe35d500b5723c7dcf84cc19a46335b4c5f4b8adeb
+size 2211840
diff --git a/triton_models/weights/layers.4.ffn_norm.weight b/triton_models/weights/layers.4.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..8f233bd398a235ab339da841770db17a05d26540
--- /dev/null
+++ b/triton_models/weights/layers.4.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f5ecc8d8deb357f66b4135ab08406d21a116e7a8af3db0ec0344b4948b75f0f
+size 10240
diff --git a/triton_models/weights/layers.40.attention.w_qkv.0.qweight b/triton_models/weights/layers.40.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..df0186c0c9337bc4f1a2c0ea6e77caec9b33f9d1
--- /dev/null
+++ b/triton_models/weights/layers.40.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79c294bc3a95ba3b44392c95df60c43a0b39e52f0524e00ebca87d9197897f21
+size 39321600
diff --git a/triton_models/weights/layers.40.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.40.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..faaf19e8a263a159c5c8c7428b9a7ad84d9d7c4a
--- /dev/null
+++ b/triton_models/weights/layers.40.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc5682bb0d41d1fa06f542995fa39ee0f9dac2e8b0c2a9d26c1ac2e93b69e28c
+size 2457600
diff --git a/triton_models/weights/layers.40.attention.wo.0.qweight b/triton_models/weights/layers.40.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..fb8281f2ae32e0c091a78217222c339748e8e62b
--- /dev/null
+++ b/triton_models/weights/layers.40.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57ac3727bab93097fd86b6fd92436ab4ea430883ae4414f5b36d9b49460b354f
+size 13107200
diff --git a/triton_models/weights/layers.40.attention.wo.0.scales_zeros b/triton_models/weights/layers.40.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..f4b3d3ba7526bbd7051652b9084898328b899196
--- /dev/null
+++ b/triton_models/weights/layers.40.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:833bb665a1f64105a3c67f3d66ce4a9df60f7fc2d1f7d546448e87bf87d0d3ef
+size 819200
diff --git a/triton_models/weights/layers.40.attention_norm.weight b/triton_models/weights/layers.40.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..0141e6aceb34b8f47fd8be1d2dfbde496508ac02
--- /dev/null
+++ b/triton_models/weights/layers.40.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:32ecb2c22fea11208ca1876f0ca96021b593dc6b4d229921c995bc42f80a79f3
+size 10240
diff --git a/triton_models/weights/layers.40.feed_forward.w13.0.qweight b/triton_models/weights/layers.40.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..ff7e63ac0153c963b806597dcf45bd8569771e87
--- /dev/null
+++ b/triton_models/weights/layers.40.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:95b1e22ab265a82336a422a67653304b3cc6caa17737fafde92d3f896046596b
+size 70778880
diff --git a/triton_models/weights/layers.40.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.40.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..b0681d1c946f26dea1d1b0fb9fd4922c8b3fcb14
--- /dev/null
+++ b/triton_models/weights/layers.40.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1096808f812c329d3fec20d87ffe221d40a2ef7392b945a7af738e8e7b7e9b3f
+size 4423680
diff --git a/triton_models/weights/layers.40.feed_forward.w2.0.qweight b/triton_models/weights/layers.40.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..59caee6b8a6af35b4ffb97f7ead6bcfbe463ef66
--- /dev/null
+++ b/triton_models/weights/layers.40.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f7fb5799d22ec13194202df9beb5d6b5d75a5c161fd1e69abe55cd5c2a2efed
+size 35389440
diff --git a/triton_models/weights/layers.40.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.40.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..72b8331c71183b18202cdfb3f14e0ee2594e0e0e
--- /dev/null
+++ b/triton_models/weights/layers.40.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:32f4b5a118d49c7bda6875800503629e911c04f1cf66e5e9426a5f234113e255
+size 2211840
diff --git a/triton_models/weights/layers.40.ffn_norm.weight b/triton_models/weights/layers.40.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..575d7ad255d34828aeed4fec9bbeff9d797f5566
--- /dev/null
+++ b/triton_models/weights/layers.40.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4091f715a3aac7e680279d7fb4b7dc4b870bd57b465b18b4aff0519e805b7f80
+size 10240
diff --git a/triton_models/weights/layers.41.attention.w_qkv.0.qweight b/triton_models/weights/layers.41.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..72122d5d45e1b911786b44d76165cad383caaf9c
--- /dev/null
+++ b/triton_models/weights/layers.41.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d10cffaf557a41906e735b261f5b488840f3c1c051a628623a4a50232ace99e4
+size 39321600
diff --git a/triton_models/weights/layers.41.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.41.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..5f0eec8ff7d0306eb08c2c96cc501d5db2ddfa1f
--- /dev/null
+++ b/triton_models/weights/layers.41.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d5511dc05c8311856def70ab2c02b05ad9bba4284efc006264af3c1bbb74b31
+size 2457600
diff --git a/triton_models/weights/layers.41.attention.wo.0.qweight b/triton_models/weights/layers.41.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..a02573c70e499a9055502da27ea8c99ae003571c
--- /dev/null
+++ b/triton_models/weights/layers.41.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dd9af88328b68ccd7436d9ca2136dbbf7038ac5b7b35f3c6bbbdacfed043d145
+size 13107200
diff --git a/triton_models/weights/layers.41.attention.wo.0.scales_zeros b/triton_models/weights/layers.41.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..f9b391e1c3da69e6e8b049ab5e29b388046c78c8
--- /dev/null
+++ b/triton_models/weights/layers.41.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:912027ca1decc8bd74e4a7b6209b419e41427b48d4a464c0627e91789cb8d2a1
+size 819200
diff --git a/triton_models/weights/layers.41.attention_norm.weight b/triton_models/weights/layers.41.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..5d2885eaa330a1d25ee4d163df743f27d8fe747e
--- /dev/null
+++ b/triton_models/weights/layers.41.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6357832983a96fe75b923c80b0038cf93c8cdd296e473944f32f0f5a0714d47d
+size 10240
diff --git a/triton_models/weights/layers.41.feed_forward.w13.0.qweight b/triton_models/weights/layers.41.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..ec972124e68adc9f0f14b8eaef08427c87930b13
--- /dev/null
+++ b/triton_models/weights/layers.41.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e00b951acdcb876f31db90f4d59ec692089c2834b896ee8dd0e5a7107dbf460e
+size 70778880
diff --git a/triton_models/weights/layers.41.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.41.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..66a0d3bd029f26d98e54edfe7931d083f62c8c16
--- /dev/null
+++ b/triton_models/weights/layers.41.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b0620bd3a4c63fb6d8da4bdff6378e959ae65101853d4f1d4ab52c5a1fb8eff2
+size 4423680
diff --git a/triton_models/weights/layers.41.feed_forward.w2.0.qweight b/triton_models/weights/layers.41.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..a2a9ad18c77158b76a76615ec0f9e22689cecf65
--- /dev/null
+++ b/triton_models/weights/layers.41.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9351434f432c445b09aea58aa8d53fa980b8f98a62de04b8d6a49aa87b7edd1c
+size 35389440
diff --git a/triton_models/weights/layers.41.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.41.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..c93b97018671c1df78aa74dc42d4b87076c35c02
--- /dev/null
+++ b/triton_models/weights/layers.41.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0ddb07228faf6e2146fd598726e52c7e18a6fae783811bca5333ff9934741999
+size 2211840
diff --git a/triton_models/weights/layers.41.ffn_norm.weight b/triton_models/weights/layers.41.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..c4d923261af827a85658de95f1e52fcffa2867cb
--- /dev/null
+++ b/triton_models/weights/layers.41.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b1e4c28a1a3b6ccc3c04410b58098d41d577bf179bf89480f076c43ac9b47a8
+size 10240
diff --git a/triton_models/weights/layers.42.attention.w_qkv.0.qweight b/triton_models/weights/layers.42.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..a09f00c23d1c02d0bd870ceb30ce3ce809f24af2
--- /dev/null
+++ b/triton_models/weights/layers.42.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:02bc3dbf873472a1cc8c4c096f965037baf4f1c5917e9ed59dd6e4f12bc2daeb
+size 39321600
diff --git a/triton_models/weights/layers.42.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.42.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..73a88150bc4686cc1ad638f881e951abf0a642f7
--- /dev/null
+++ b/triton_models/weights/layers.42.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dd5d11dd1e46d08ed70622a1aa88e8bbcd881f875d418b5fb53df70706920352
+size 2457600
diff --git a/triton_models/weights/layers.42.attention.wo.0.qweight b/triton_models/weights/layers.42.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..a5e44fa147daea01ce314b15491710b52f3c81c6
--- /dev/null
+++ b/triton_models/weights/layers.42.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8976fb3991a9e4fa4610d21fd507aed0ed6a1e4888a66c3b87a61fe5e830813d
+size 13107200
diff --git a/triton_models/weights/layers.42.attention.wo.0.scales_zeros b/triton_models/weights/layers.42.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..62a6ee6b6bb1d64f2f7df03f10aafe5778048989
--- /dev/null
+++ b/triton_models/weights/layers.42.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:deed6e751e6970eb5209a9357086238a961ef2a2c9cf28b795fea1bab4bbf842
+size 819200
diff --git a/triton_models/weights/layers.42.attention_norm.weight b/triton_models/weights/layers.42.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..fcf1715bebaced6ed09d00d6911f43bc42f2683c
--- /dev/null
+++ b/triton_models/weights/layers.42.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a13d96aaaebbe4b0a06d3a9530d90456f89d3e7092f7a853954bcd696bbc008c
+size 10240
diff --git a/triton_models/weights/layers.42.feed_forward.w13.0.qweight b/triton_models/weights/layers.42.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..85b15ef2ee1b3259629666abf007015273fc6584
--- /dev/null
+++ b/triton_models/weights/layers.42.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b3da6adfa7658ea0b45fb43f9ce1e8538d1d65e066a70ea744278e13dfccc83
+size 70778880
diff --git a/triton_models/weights/layers.42.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.42.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..5e6f678b412de77690fde7518a36d7f387920e06
--- /dev/null
+++ b/triton_models/weights/layers.42.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9044a5774340b794d10e3fa151a4221d8d8d16ec0889aaa066f059aa6f7afe8f
+size 4423680
diff --git a/triton_models/weights/layers.42.feed_forward.w2.0.qweight b/triton_models/weights/layers.42.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..de85117ed1460b918c4ab3853db7d25b7b650d9e
--- /dev/null
+++ b/triton_models/weights/layers.42.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:04a9aa12e6b2a57c93a5f6881639b22129dd4d7d1f584ca997e476cb1c8cfdf5
+size 35389440
diff --git a/triton_models/weights/layers.42.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.42.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..f300d2f4c06c785eb322eaa726674b4bf42987aa
--- /dev/null
+++ b/triton_models/weights/layers.42.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b17e8ef5c262487d99d6713a98bd9d35e160a9701b74e2b801ed85b4762d3a3
+size 2211840
diff --git a/triton_models/weights/layers.42.ffn_norm.weight b/triton_models/weights/layers.42.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..cc63b1967b596b45f5b511eb8fff807f67ed8ab0
--- /dev/null
+++ b/triton_models/weights/layers.42.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e6c56049902467bde53929c1f0d900d8f2a77f4327993b7cf0b69a1c3deea4c
+size 10240
diff --git a/triton_models/weights/layers.43.attention.w_qkv.0.qweight b/triton_models/weights/layers.43.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..c4e47e64d1a6d8d171f0057099b46f6d4f310cb0
--- /dev/null
+++ b/triton_models/weights/layers.43.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:596866d0337001fbcf5d41c006b22419566bbdc819bde1cc4ad4346b7579af16
+size 39321600
diff --git a/triton_models/weights/layers.43.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.43.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..1c74ea3b1e476039cfde266df01cc3a913243271
--- /dev/null
+++ b/triton_models/weights/layers.43.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5b87a5f7a4e0dc7d884a4ed4f128ce727346d5d1e7a27ad394bf8d06713e4f8
+size 2457600
diff --git a/triton_models/weights/layers.43.attention.wo.0.qweight b/triton_models/weights/layers.43.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..72ab228de3886fb2506751b0b27c2d8a87575dfe
--- /dev/null
+++ b/triton_models/weights/layers.43.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8615fe55576d17e05fc5ee30d748d0e4a8d7e6f49494743d118934ec6c16a80c
+size 13107200
diff --git a/triton_models/weights/layers.43.attention.wo.0.scales_zeros b/triton_models/weights/layers.43.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..3e2e3462fa73716f6c66461f556a1a8f7259696a
--- /dev/null
+++ b/triton_models/weights/layers.43.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fae6d4260d689cdf4e37df122f1eff339d266763e773c1e619f624b573e5fcb1
+size 819200
diff --git a/triton_models/weights/layers.43.attention_norm.weight b/triton_models/weights/layers.43.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..7c19f74cc239f21f3ed40a6e7264b3bea5584029
--- /dev/null
+++ b/triton_models/weights/layers.43.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a8659140f7e1fda099c89955e6123442289ecd4f422cf42b5dc0eb3f9b2f1af
+size 10240
diff --git a/triton_models/weights/layers.43.feed_forward.w13.0.qweight b/triton_models/weights/layers.43.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..f3eff7ae2a008222560f649689508336cbff026e
--- /dev/null
+++ b/triton_models/weights/layers.43.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65670d21937bdff406c0c58d67c77f1bba4cdc27c3c0874a12b0ae2559cd9510
+size 70778880
diff --git a/triton_models/weights/layers.43.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.43.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..0a4d04fd5420a647949ad3cb0302e8b7806b4fae
--- /dev/null
+++ b/triton_models/weights/layers.43.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:86b0d6fc091242c661f5cd9e04470626832d8d656d8c090f58f6a14971901208
+size 4423680
diff --git a/triton_models/weights/layers.43.feed_forward.w2.0.qweight b/triton_models/weights/layers.43.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..90fdef0d10c0673c4a97504e8f9f1a9c5e7cec06
--- /dev/null
+++ b/triton_models/weights/layers.43.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f433f1317b38c42c86d3be4092b214698480f4ed42b63e18872a254ae5947cc3
+size 35389440
diff --git a/triton_models/weights/layers.43.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.43.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..f446fce2a95c43a85b282b8d03218a71aa3d56b4
--- /dev/null
+++ b/triton_models/weights/layers.43.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8e1d83cb197fa3d00679534218088970cf23d1ef95cf4f8a6b3824c7ae9c7a9
+size 2211840
diff --git a/triton_models/weights/layers.43.ffn_norm.weight b/triton_models/weights/layers.43.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..e0b86f3866abdac2de6cd97cc9579ce2d6648931
--- /dev/null
+++ b/triton_models/weights/layers.43.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0c096d5696370b1e01c7edf6ad6fbdbaaa32cb4f1c90c32de9d2b36cb69e872f
+size 10240
diff --git a/triton_models/weights/layers.44.attention.w_qkv.0.qweight b/triton_models/weights/layers.44.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..51e95577e75346a1de6d0176ced9c49bc42219e5
--- /dev/null
+++ b/triton_models/weights/layers.44.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e2a9ed82302e731b8ebdbb4a0f08fe507ff81d4f669247886c35caeaa083e7e7
+size 39321600
diff --git a/triton_models/weights/layers.44.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.44.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..93c0019acf37306abc5e2d3fd4775ca95b6d5c96
--- /dev/null
+++ b/triton_models/weights/layers.44.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:73ff6e4ba35370da4fc1f8747aab96f6f6edebd942ee5890b2b8b16d54194433
+size 2457600
diff --git a/triton_models/weights/layers.44.attention.wo.0.qweight b/triton_models/weights/layers.44.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..66002364d8e206536ac97558f8ad83cfdac2b122
--- /dev/null
+++ b/triton_models/weights/layers.44.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3fce887850b072cb7e291998bb3d970cf24fad51f57e10bfb75a80ac005a7073
+size 13107200
diff --git a/triton_models/weights/layers.44.attention.wo.0.scales_zeros b/triton_models/weights/layers.44.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..387bb757ef425ab46f64580b07da33cefc31fb93
--- /dev/null
+++ b/triton_models/weights/layers.44.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5965232ac3cc721eb611892381a90bd19a6045c7c2198297a4655af77c16ef9f
+size 819200
diff --git a/triton_models/weights/layers.44.attention_norm.weight b/triton_models/weights/layers.44.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..71e6540283ab5f35edf7e172974f299446a45700
--- /dev/null
+++ b/triton_models/weights/layers.44.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f42f7b64c12abcf5f62d24acea28f231801a3bebf91c70b93ba6485603c2f8b1
+size 10240
diff --git a/triton_models/weights/layers.44.feed_forward.w13.0.qweight b/triton_models/weights/layers.44.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..277e5b44cdce7ec2a222519f5a1560e326b3095b
--- /dev/null
+++ b/triton_models/weights/layers.44.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0e525f951018af2c8fb6f239003f0564fec7ce99282c966c981868a5f18fcb09
+size 70778880
diff --git a/triton_models/weights/layers.44.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.44.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..85807954026b77e73f966277a27525d794aed536
--- /dev/null
+++ b/triton_models/weights/layers.44.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d5d777a474f4279c5cf749d28f0f60eda60836b249ac193d817b4fc775b6aab
+size 4423680
diff --git a/triton_models/weights/layers.44.feed_forward.w2.0.qweight b/triton_models/weights/layers.44.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..6313bf64f8aac369c6f80cb50fac66982a454cb4
--- /dev/null
+++ b/triton_models/weights/layers.44.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a2aa74c29e296320ac034f23c1486588f33fd0f830d017d8cb8e3525f212897
+size 35389440
diff --git a/triton_models/weights/layers.44.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.44.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..8d00acac0a9119f555eaf8e9b48072f49a56ff0e
--- /dev/null
+++ b/triton_models/weights/layers.44.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3068fbd9245156bdec8925b8b10df99b02ff8939d392fb9701bf66cb8b207d34
+size 2211840
diff --git a/triton_models/weights/layers.44.ffn_norm.weight b/triton_models/weights/layers.44.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..7de430abe2a9f5b06b9ddbb5499982505e6ef5bf
--- /dev/null
+++ b/triton_models/weights/layers.44.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:997f6e365a5428d9819ee65050f473f47674ed15ad9203b4eae3d1c30d680266
+size 10240
diff --git a/triton_models/weights/layers.45.attention.w_qkv.0.qweight b/triton_models/weights/layers.45.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..ecb704140f29559cb1ab35a5435fe15bdf2da350
--- /dev/null
+++ b/triton_models/weights/layers.45.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24f110ddb392fa9424b3f4a68b77d61c14acb606153bc3e7c8d07363dc2e1527
+size 39321600
diff --git a/triton_models/weights/layers.45.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.45.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..2b4b5e30505c68e4190ae7e863ebdd1644af60a1
--- /dev/null
+++ b/triton_models/weights/layers.45.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:78e74c3b7a473f21b3674345eaedb22b49facfce0589125e0bda7cac6a35c417
+size 2457600
diff --git a/triton_models/weights/layers.45.attention.wo.0.qweight b/triton_models/weights/layers.45.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..1585f6595f955e1ca201f936815d24cb2d15a97a
--- /dev/null
+++ b/triton_models/weights/layers.45.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5b183870a1d325fe5a9474b5cf797af8d830e5f0d75cf47844fb3b2cb4c680f
+size 13107200
diff --git a/triton_models/weights/layers.45.attention.wo.0.scales_zeros b/triton_models/weights/layers.45.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..35134da26cc281917108805223b04cb08ab87598
--- /dev/null
+++ b/triton_models/weights/layers.45.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:918f0c1835c367a35daf3a3c1ebc0e14eb1335d87f9a35bf7bcf6fb37ec35753
+size 819200
diff --git a/triton_models/weights/layers.45.attention_norm.weight b/triton_models/weights/layers.45.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..d38da315f3b682f55e1e8802545c66bd98640b1f
--- /dev/null
+++ b/triton_models/weights/layers.45.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f81d91d943b0e5e48b909d22636a1d7bafa7701921aae4c37699cee13a5e6973
+size 10240
diff --git a/triton_models/weights/layers.45.feed_forward.w13.0.qweight b/triton_models/weights/layers.45.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..ef3b95f2e7b6751596e3e497ba84e0f486ac6d9d
--- /dev/null
+++ b/triton_models/weights/layers.45.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07723995b8b09d9cccc7597dd5928a226dfb39ddde04367431344fe06b5ce32e
+size 70778880
diff --git a/triton_models/weights/layers.45.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.45.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..3465212aca7585ddfbedd39906525bb6325bd8a0
--- /dev/null
+++ b/triton_models/weights/layers.45.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6ffb65d6a1367de8464cb1c2209e4b09c4ce9ebe5b6eab126a53eccea5ec3ca
+size 4423680
diff --git a/triton_models/weights/layers.45.feed_forward.w2.0.qweight b/triton_models/weights/layers.45.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..74069bc31d6317e0cf847193d8cc2ba3a8c857b1
--- /dev/null
+++ b/triton_models/weights/layers.45.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb324df723e6b2dff0d174cccee281db7b1a4d381a8d27e7dd55386895fbb2eb
+size 35389440
diff --git a/triton_models/weights/layers.45.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.45.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..24832dd1ac1e7884d04b58bb4bb4a47883a808ea
--- /dev/null
+++ b/triton_models/weights/layers.45.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a45d9b0bba8561f2de3c3e6138db9908469cb2c15f7beee1a933759b8028c3c
+size 2211840
diff --git a/triton_models/weights/layers.45.ffn_norm.weight b/triton_models/weights/layers.45.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..20496277f1bdb72e509349a669bbf836248be512
--- /dev/null
+++ b/triton_models/weights/layers.45.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:038b6eacb53a25762b4a2459c4e7ec0e7ddc821f73c6ace3827bdb975a08ba46
+size 10240
diff --git a/triton_models/weights/layers.46.attention.w_qkv.0.qweight b/triton_models/weights/layers.46.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..536d8d2bc3ca3fda4fb65aaba9dd63eec8f231e2
--- /dev/null
+++ b/triton_models/weights/layers.46.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d7fa1ef1a6b077f7aaf565a26967a129c610f719448e631a64b85713fe99814a
+size 39321600
diff --git a/triton_models/weights/layers.46.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.46.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..067d8bd611a4fedfa964a147b0dfbad0ae296d4f
--- /dev/null
+++ b/triton_models/weights/layers.46.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c944085503c3e74360efe35d57b6a067547a0826502cf799126a87b0a38204c
+size 2457600
diff --git a/triton_models/weights/layers.46.attention.wo.0.qweight b/triton_models/weights/layers.46.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..e7c9635cd16364ee07a77480a23c56645e421d42
--- /dev/null
+++ b/triton_models/weights/layers.46.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f320197f91e53ecc8c525f4b9fe98f8ef5d8f81a2206bca357d6b3cdedd58505
+size 13107200
diff --git a/triton_models/weights/layers.46.attention.wo.0.scales_zeros b/triton_models/weights/layers.46.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..6c967e12fd3d21ab6995791802fe6b8b26d3e521
--- /dev/null
+++ b/triton_models/weights/layers.46.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70dfa88e49d6e11c85319bb97fdb3774984e710d83276b96c4b1c00e2f1f69b7
+size 819200
diff --git a/triton_models/weights/layers.46.attention_norm.weight b/triton_models/weights/layers.46.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..0d4ddf16d4022c68753971625b53decc857914c9
--- /dev/null
+++ b/triton_models/weights/layers.46.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:842970f4b87b208a7447bc9ce02526650da1539c1d2e7470fb6aacf403a79662
+size 10240
diff --git a/triton_models/weights/layers.46.feed_forward.w13.0.qweight b/triton_models/weights/layers.46.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..eb6cd1361fd8c4cf0614617e68b0a499db469704
--- /dev/null
+++ b/triton_models/weights/layers.46.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:905ff58bd7cf5ec3452dccd71c2786850e4aab4fcd15c4e5de6badb716e534c3
+size 70778880
diff --git a/triton_models/weights/layers.46.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.46.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..efffe1a7d113f85caa3d698036d8b12867a68ba3
--- /dev/null
+++ b/triton_models/weights/layers.46.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b0c52f5a566564cbcb718a9254b4c119a2109eea59c34fd11474c98a7b1603d
+size 4423680
diff --git a/triton_models/weights/layers.46.feed_forward.w2.0.qweight b/triton_models/weights/layers.46.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..e14f7a723a1149d85cd1ed423d4ec2326cc68e0d
--- /dev/null
+++ b/triton_models/weights/layers.46.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3407a1c08c7eec2701684e0e34382c5d8f416eaf6a7ad83a90295e4ba5d6ce85
+size 35389440
diff --git a/triton_models/weights/layers.46.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.46.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..70d35c609cf638135a31f408cfa35c84da6d5df3
--- /dev/null
+++ b/triton_models/weights/layers.46.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:432e3ab105c2e5629e0628c7c3ef0b7365488de1b8673a39e8e3f21ac19f09a5
+size 2211840
diff --git a/triton_models/weights/layers.46.ffn_norm.weight b/triton_models/weights/layers.46.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..7c178fd6c87d3c81108403f1535129fdeb24afca
--- /dev/null
+++ b/triton_models/weights/layers.46.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8113f80a2e2276f2cec1877b0f41b65e2656b5b3936af513852bd86ac0811c62
+size 10240
diff --git a/triton_models/weights/layers.47.attention.w_qkv.0.qweight b/triton_models/weights/layers.47.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..a252c37b6897d797d8de5ddb32bf02d9c27d0031
--- /dev/null
+++ b/triton_models/weights/layers.47.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c7b52e85631703914d32879bf3c753d445394527541150bba710c3704eb3ac5d
+size 39321600
diff --git a/triton_models/weights/layers.47.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.47.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..cac68135c0df3bf0fffd415ca504988fa857d638
--- /dev/null
+++ b/triton_models/weights/layers.47.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb797d7f0051ca316ac5e220fd18e35c915467e711a179aef4b7642a31cf72ea
+size 2457600
diff --git a/triton_models/weights/layers.47.attention.wo.0.qweight b/triton_models/weights/layers.47.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..8359823e183d3bfa28aa738eb4dd4d61c7cfd379
--- /dev/null
+++ b/triton_models/weights/layers.47.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b2d2cfda8841863d0dfe96c02290d189dffe6981f60e7f5e7ee133f358347251
+size 13107200
diff --git a/triton_models/weights/layers.47.attention.wo.0.scales_zeros b/triton_models/weights/layers.47.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..2308d0c2c459029374b2ec60e22b0815fa6cf272
--- /dev/null
+++ b/triton_models/weights/layers.47.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:060b23658e058a9fd595fa135ae06a30edab7b4bfa11686db40d226151ee0edd
+size 819200
diff --git a/triton_models/weights/layers.47.attention_norm.weight b/triton_models/weights/layers.47.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..07b924cf63ebecdafe45d7632c3c0717211d8626
--- /dev/null
+++ b/triton_models/weights/layers.47.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6abe07e102c02b1cfbb2dc3503369f3ebd520c282bf85a7db80f72b6e193ee33
+size 10240
diff --git a/triton_models/weights/layers.47.feed_forward.w13.0.qweight b/triton_models/weights/layers.47.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..d8c5b1ec731255291afed7e3871eebe1538d1b87
--- /dev/null
+++ b/triton_models/weights/layers.47.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c0dfd4416fc8d69d404926ad504e05f776a3ab74532f51424f96ca9c4aa99f5f
+size 70778880
diff --git a/triton_models/weights/layers.47.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.47.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..0141b1c64d4006269d6007307bbed2080cb1883d
--- /dev/null
+++ b/triton_models/weights/layers.47.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dd36a040512ec82d62c52f4cb0414cf09e0f5382e3adf4c3ee407921dab71d84
+size 4423680
diff --git a/triton_models/weights/layers.47.feed_forward.w2.0.qweight b/triton_models/weights/layers.47.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..1c3319e3749c3bf774b63c257651482798326c7a
--- /dev/null
+++ b/triton_models/weights/layers.47.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a02c04b6ff731ce847ae03e16324704ecd8c492939c1554803c7e72308af0c1e
+size 35389440
diff --git a/triton_models/weights/layers.47.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.47.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..333ca0b9c3467f9fcda556c8e510a617866b19bd
--- /dev/null
+++ b/triton_models/weights/layers.47.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df5a2fd483e6b87bc9bc81c2dc4f70f8ad8c1820b53348a9bf59659085720a47
+size 2211840
diff --git a/triton_models/weights/layers.47.ffn_norm.weight b/triton_models/weights/layers.47.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..c278a3d06dd85a5b11c833fb0116b156a575370b
--- /dev/null
+++ b/triton_models/weights/layers.47.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a11ab8e2b096ae68c9008f546de1704b226f3cd1d369445752b54a2749eb79e3
+size 10240
diff --git a/triton_models/weights/layers.48.attention.w_qkv.0.qweight b/triton_models/weights/layers.48.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..e0895a7d99e53c101787ca771e4de56cf0aa0ac6
--- /dev/null
+++ b/triton_models/weights/layers.48.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:98c32850525e48dc76024aafc093bd80ed150ab00bc6ea9377ee402ea479de27
+size 39321600
diff --git a/triton_models/weights/layers.48.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.48.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..973ab3c59fcb475b0fc09c68af4f93fe727ef27b
--- /dev/null
+++ b/triton_models/weights/layers.48.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d200af888ee44f4e43665dfe21aab5a83ea72196374a031ed2ca3e9fe829ecc4
+size 2457600
diff --git a/triton_models/weights/layers.48.attention.wo.0.qweight b/triton_models/weights/layers.48.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..eb680ea52bef5a617f447a964545d383f82d3201
--- /dev/null
+++ b/triton_models/weights/layers.48.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:210ff96b356a00c141bd5763c1bec50adfbad97474ef6c0a2fb8ce187d63355f
+size 13107200
diff --git a/triton_models/weights/layers.48.attention.wo.0.scales_zeros b/triton_models/weights/layers.48.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..c62f24fdfdd84eb6543931c16d8822ab33922b3e
--- /dev/null
+++ b/triton_models/weights/layers.48.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:30ac84e8092af709a2d28b7e3a5c39c124b8eb5835389dd1484ea3fae6b8a98c
+size 819200
diff --git a/triton_models/weights/layers.48.attention_norm.weight b/triton_models/weights/layers.48.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..031e4a29d0b00b72447bb702933b59b381f7771a
--- /dev/null
+++ b/triton_models/weights/layers.48.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb07ae203f28c93489a808ab0e87401c7ca657573cc1ee11ba24060064021716
+size 10240
diff --git a/triton_models/weights/layers.48.feed_forward.w13.0.qweight b/triton_models/weights/layers.48.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..5acd4b576c1a6d658b05e2b19068a57d76eed242
--- /dev/null
+++ b/triton_models/weights/layers.48.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af48a97ce7d9460d4507f6c0b59d75381a1736a43e3bf4dc2ddea822f9e63a6f
+size 70778880
diff --git a/triton_models/weights/layers.48.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.48.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..eadd7827461d1888f169599858561a4df8816d34
--- /dev/null
+++ b/triton_models/weights/layers.48.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4616411755fba6cba0ae47db98f75086c3dd0f81257052c4aada435d45a0d082
+size 4423680
diff --git a/triton_models/weights/layers.48.feed_forward.w2.0.qweight b/triton_models/weights/layers.48.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..fd34eb4ed92b9787e4673ba3fb7bde4f0c988beb
--- /dev/null
+++ b/triton_models/weights/layers.48.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49436aee5f95f442d4ee49ddaf46bd2bbb63251bc9f38b0ebd902803e88e4b21
+size 35389440
diff --git a/triton_models/weights/layers.48.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.48.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..3e02eee56eba09dd8bb4740e5f6a4fd525022b6e
--- /dev/null
+++ b/triton_models/weights/layers.48.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:335aaa6b28b1bfb139a46284519c77b73af4180c705c7ec0bbb91f1bed11ec7b
+size 2211840
diff --git a/triton_models/weights/layers.48.ffn_norm.weight b/triton_models/weights/layers.48.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..5e3716cdcd26bbf26e722c9995cb52206cf3aebc
--- /dev/null
+++ b/triton_models/weights/layers.48.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba7017c48d8a5406dd5dfddcd03c264f4a78a12df7bae97f185ed57ef58e93e9
+size 10240
diff --git a/triton_models/weights/layers.49.attention.w_qkv.0.qweight b/triton_models/weights/layers.49.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..3c0fca0e118c3b6058e7e418de0d839f06fc1605
--- /dev/null
+++ b/triton_models/weights/layers.49.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f34b86fe529eb0b54c4c4e6c99b13b29283a03201aa3ca530b0c8a1a8fc691b0
+size 39321600
diff --git a/triton_models/weights/layers.49.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.49.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..b91eb3ef6710fff815421608073080aa38408ead
--- /dev/null
+++ b/triton_models/weights/layers.49.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d53bb457b1a6a594467222d377341349ee90e6a381a3c34f33b8e2440ec5006
+size 2457600
diff --git a/triton_models/weights/layers.49.attention.wo.0.qweight b/triton_models/weights/layers.49.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..2dfcfb7ca431d52a2a01d95613a89441aa6dc02f
--- /dev/null
+++ b/triton_models/weights/layers.49.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb9dcead624021b3a229dcd44c273d6545763546ac5e889bc58f6011cfc9b5fc
+size 13107200
diff --git a/triton_models/weights/layers.49.attention.wo.0.scales_zeros b/triton_models/weights/layers.49.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..ce69eb003d9c795858c2c19882ca9200dc6e869f
--- /dev/null
+++ b/triton_models/weights/layers.49.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e93b581e771f6fd9efefe32f9eb5bcec346ca7fab5907db67e6c14b31266e74f
+size 819200
diff --git a/triton_models/weights/layers.49.attention_norm.weight b/triton_models/weights/layers.49.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..c57ccdf1e093b30a13269d6b8b39b4347b0a2990
--- /dev/null
+++ b/triton_models/weights/layers.49.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d7a95df133b3c6187aa8a85154a4f7f35370d7157d8176ee02e830e9dc2b71f7
+size 10240
diff --git a/triton_models/weights/layers.49.feed_forward.w13.0.qweight b/triton_models/weights/layers.49.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..2b4d4fc40bab66566318babc8bb2393065f27f55
--- /dev/null
+++ b/triton_models/weights/layers.49.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf78e3de1f4559e61c04ee1740c95a4249ea7d3759317a6c36c203f2a2c04980
+size 70778880
diff --git a/triton_models/weights/layers.49.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.49.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..80a14865cf83e59d95b11938705867fccfce925e
--- /dev/null
+++ b/triton_models/weights/layers.49.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5c0f853845d9d5746f1d70b729880ac58ae24b10475211aed17edb39505c97b
+size 4423680
diff --git a/triton_models/weights/layers.49.feed_forward.w2.0.qweight b/triton_models/weights/layers.49.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..6e9a119b6d6254dadcc2697890eb11c64308b40f
--- /dev/null
+++ b/triton_models/weights/layers.49.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:793ce4e775702c9a55b86ec9fb14dddbb52a5bd8982afebe57997d97a50709bf
+size 35389440
diff --git a/triton_models/weights/layers.49.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.49.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..69c254011375f3740a2b3defcd8b6ffeee66050c
--- /dev/null
+++ b/triton_models/weights/layers.49.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:27258319d9870e66c823d062c0880bc863483784f30ce5a9c4164b4f95fd86fa
+size 2211840
diff --git a/triton_models/weights/layers.49.ffn_norm.weight b/triton_models/weights/layers.49.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..ea37b12c05994bb8944c0c859c9b83e80d7c2baf
--- /dev/null
+++ b/triton_models/weights/layers.49.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3157b8c6f61cc097b10e57d67983ef672a7ee91f388a598d06485b79c094a78
+size 10240
diff --git a/triton_models/weights/layers.5.attention.w_qkv.0.qweight b/triton_models/weights/layers.5.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..bf459a99dbb3a141e8701801dceeba9e36ffc60b
--- /dev/null
+++ b/triton_models/weights/layers.5.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:12134f6adc221ccdd117632c7181a0c0c487ce0f84122ba1f8fd0ba5b01f2f8a
+size 39321600
diff --git a/triton_models/weights/layers.5.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.5.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..20d516e1af5e7bfb005547ba8196d9e64c63acf1
--- /dev/null
+++ b/triton_models/weights/layers.5.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f6174d60fcad6c73899b9810f5c9b184e7a3f251354ee478ac8cb1ef3005273
+size 2457600
diff --git a/triton_models/weights/layers.5.attention.wo.0.qweight b/triton_models/weights/layers.5.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..5524dae98b9f4c5583756492674ebb67fe098e5e
--- /dev/null
+++ b/triton_models/weights/layers.5.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd4a06c3bc3a94d7b0bb92ff8fb9cc3da6be044cc1ce01dd136c21e33412227f
+size 13107200
diff --git a/triton_models/weights/layers.5.attention.wo.0.scales_zeros b/triton_models/weights/layers.5.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..60bf71c7f494e9bdc79ef08c111dbf1bb86e44f9
--- /dev/null
+++ b/triton_models/weights/layers.5.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a8c7ea6ae74aec2984da2cadaf61f33887d216cd92fad345dc15a6f897a9d7a
+size 819200
diff --git a/triton_models/weights/layers.5.attention_norm.weight b/triton_models/weights/layers.5.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..1a2bd60b9d664c6a9150c9be84bc3560964ae16b
--- /dev/null
+++ b/triton_models/weights/layers.5.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c94ad8afb5952990d5933107c89d2a155c2a54cde53e8fb8f1aa4251799f4009
+size 10240
diff --git a/triton_models/weights/layers.5.feed_forward.w13.0.qweight b/triton_models/weights/layers.5.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..2a317ce73a4b220b942817a794eb05d87f974076
--- /dev/null
+++ b/triton_models/weights/layers.5.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1677ed46a1cf6383443e9b46563122bb1694ad7b838730dcec8c2ff14b29177f
+size 70778880
diff --git a/triton_models/weights/layers.5.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.5.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..6f2e5824cd9bf7a71131c0d2f5924bca68a76d11
--- /dev/null
+++ b/triton_models/weights/layers.5.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:15a54d6521c991c7643a313d76f7cf4f56533b0e2d1b19a023d813c1ce630af9
+size 4423680
diff --git a/triton_models/weights/layers.5.feed_forward.w2.0.qweight b/triton_models/weights/layers.5.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..8b521f52d5c7ceb38e443c23fb6ba3df68facda8
--- /dev/null
+++ b/triton_models/weights/layers.5.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66319881ebf37328ae34d8fa5fc4c78f7164e324620797c35e0f6d75a827491a
+size 35389440
diff --git a/triton_models/weights/layers.5.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.5.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..268bb3bf70cf66a6743212af204b2907b45bd1a7
--- /dev/null
+++ b/triton_models/weights/layers.5.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3951a5f3d4ffb200b1fd86cc0511043dae254aa2f1bfa9dc8e4442610e7ba2c
+size 2211840
diff --git a/triton_models/weights/layers.5.ffn_norm.weight b/triton_models/weights/layers.5.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..0bce5d9fbf2878956f119d3af7184e7b376a5488
--- /dev/null
+++ b/triton_models/weights/layers.5.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b56855e4a8b1e346802e5559b6f2e030313f90a2cd19d5921c6bebc0f07d361
+size 10240
diff --git a/triton_models/weights/layers.50.attention.w_qkv.0.qweight b/triton_models/weights/layers.50.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..f3c8ebe243d42b34113d4fe41f3b58542bd57044
--- /dev/null
+++ b/triton_models/weights/layers.50.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff8fc9d6b9ce4dece43c6c1774a8afcdad8ba63eca69c82c58828dd8a0c65515
+size 39321600
diff --git a/triton_models/weights/layers.50.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.50.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..ba38ae5eea9d064cafeb65d32818e0c2ac7340f1
--- /dev/null
+++ b/triton_models/weights/layers.50.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87379abbb4aa049e8b505a8f9a616a8b74741a9e8e540543aa5385d798f91693
+size 2457600
diff --git a/triton_models/weights/layers.50.attention.wo.0.qweight b/triton_models/weights/layers.50.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..8b9656435db9c1f3da1ec412ce81a4444ca204a6
--- /dev/null
+++ b/triton_models/weights/layers.50.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:48f3fead20908914b024dd939830b6ebd0546e612c42227dd8ff526e3e37ad51
+size 13107200
diff --git a/triton_models/weights/layers.50.attention.wo.0.scales_zeros b/triton_models/weights/layers.50.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..4811df704121d5a767eec4f24b5f98dc6822b557
--- /dev/null
+++ b/triton_models/weights/layers.50.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8fac7b509c22a5a90753db4417d882f4cc3e567c6f605fbe55af645f769025a
+size 819200
diff --git a/triton_models/weights/layers.50.attention_norm.weight b/triton_models/weights/layers.50.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..9fc3587dd5a59441ee568a6603acd807efcb5d4a
--- /dev/null
+++ b/triton_models/weights/layers.50.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01f2c1f8e779bbf597a6d666eeb5eaa8e4d8cb15f4eb8e1b0d3a8538afb4627e
+size 10240
diff --git a/triton_models/weights/layers.50.feed_forward.w13.0.qweight b/triton_models/weights/layers.50.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..f7bb61ece53c2fc8fe62dd20fbe1c93ba72bdc69
--- /dev/null
+++ b/triton_models/weights/layers.50.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1df611aeb386a28d90211bdd0a80aeeff5bd3ab97c92189ddcd34b7f45aafb37
+size 70778880
diff --git a/triton_models/weights/layers.50.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.50.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..e3e74fb458229aa1d5aa58f21d7998458d4c801d
--- /dev/null
+++ b/triton_models/weights/layers.50.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:235aa403865c7aff85f2e678a6c1b89258bfebf2d89e0c5a976e81a56b0303a2
+size 4423680
diff --git a/triton_models/weights/layers.50.feed_forward.w2.0.qweight b/triton_models/weights/layers.50.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..5eda641ab492274e587f3ce085850e6e656fd5ed
--- /dev/null
+++ b/triton_models/weights/layers.50.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ffb9eed5c86d5615127b940c304c605848e19db0ac080a475cb30eb45966457f
+size 35389440
diff --git a/triton_models/weights/layers.50.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.50.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..7365145fc711b2b4ea476971a9b05bcb88d7448a
--- /dev/null
+++ b/triton_models/weights/layers.50.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:048f5551e4083921c8a7e101d5fca03b8888d58f8417835c0384e9335a4e53c7
+size 2211840
diff --git a/triton_models/weights/layers.50.ffn_norm.weight b/triton_models/weights/layers.50.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..eeae6352b8f0f4cb67850e2d4479c1432f5dab99
--- /dev/null
+++ b/triton_models/weights/layers.50.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a9e4b45bca501bf5bce4e4ebd1347fb4ed3fabb7127a7e1a12ab82f52351b18
+size 10240
diff --git a/triton_models/weights/layers.51.attention.w_qkv.0.qweight b/triton_models/weights/layers.51.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..8b7ff57c18846fc4b3a4e85563747b09d105592e
--- /dev/null
+++ b/triton_models/weights/layers.51.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8b5e8365e2d972885079da92eab676e87559e91bd389b9be3dc09fa23ee52064
+size 39321600
diff --git a/triton_models/weights/layers.51.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.51.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..a14cb344e84b45a5ef870ba4e9b3d8002be89960
--- /dev/null
+++ b/triton_models/weights/layers.51.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9aaf8f69058d81e8e9cb1aaf66afd46654bae0882e6e61273f0521ee232f9712
+size 2457600
diff --git a/triton_models/weights/layers.51.attention.wo.0.qweight b/triton_models/weights/layers.51.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..0df19788ad9ecdbe96f42436651658222dd1e009
--- /dev/null
+++ b/triton_models/weights/layers.51.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e6e7a21627f063656afeb9a02831044949fe0adefa9827adef48f0df24d0117
+size 13107200
diff --git a/triton_models/weights/layers.51.attention.wo.0.scales_zeros b/triton_models/weights/layers.51.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..54947e4ac37cf0e7f30efe41e933e4f105368c58
--- /dev/null
+++ b/triton_models/weights/layers.51.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:36a317cc4ca0201e534b7da3452d7412bafdc78e140aaa58821854748946c17f
+size 819200
diff --git a/triton_models/weights/layers.51.attention_norm.weight b/triton_models/weights/layers.51.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..e8b6e4a3b07073cd8ec69e41e5d95f3743f98bf2
--- /dev/null
+++ b/triton_models/weights/layers.51.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9c57839075990fd1c1369934d1f72ba36255fbd00a9eb2e27273d295692c761
+size 10240
diff --git a/triton_models/weights/layers.51.feed_forward.w13.0.qweight b/triton_models/weights/layers.51.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..7ecad10b54261f5dff6001bbbb1deca21276f8c0
--- /dev/null
+++ b/triton_models/weights/layers.51.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b3f81d2592ba3916b85d3e33389826c47dfa10571a7f8e9cbb90cedf40eca1cf
+size 70778880
diff --git a/triton_models/weights/layers.51.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.51.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..04a72647fabfba0f6be4be772b20d4767eb59b36
--- /dev/null
+++ b/triton_models/weights/layers.51.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cbb85fc05b4946ec354f0656fe365f796b12f0fd38c3eb366670d83fa33b8c36
+size 4423680
diff --git a/triton_models/weights/layers.51.feed_forward.w2.0.qweight b/triton_models/weights/layers.51.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..b728258c33a8f5d3f54b55df7c080bd2ebb3eca6
--- /dev/null
+++ b/triton_models/weights/layers.51.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f87d16276de9bc55fa835e0e5ba67224d3791e0df5edfcffcabdf54e3d6ffbc0
+size 35389440
diff --git a/triton_models/weights/layers.51.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.51.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..fe6311c4bb4b8470d774b15b559ba80b878dcefa
--- /dev/null
+++ b/triton_models/weights/layers.51.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:27c5f9d4a592b80286a49bd580b0f77e309e5cfcd167f40399687420400269d6
+size 2211840
diff --git a/triton_models/weights/layers.51.ffn_norm.weight b/triton_models/weights/layers.51.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..cdffa914d7118eef40d31423b647b522d4abe662
--- /dev/null
+++ b/triton_models/weights/layers.51.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f7352185fdccd5e645ff7b41985bbbdd37f2570f515d6e333a7ee9c744af1082
+size 10240
diff --git a/triton_models/weights/layers.52.attention.w_qkv.0.qweight b/triton_models/weights/layers.52.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..0d4a8bdea047504a65ca68fd51403ff7f0b89a75
--- /dev/null
+++ b/triton_models/weights/layers.52.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c50781a9cb0c5970428253db7c199d59babc547e50c89f4c5166ca7ce3d0608c
+size 39321600
diff --git a/triton_models/weights/layers.52.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.52.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..45456272120b02d74a4b45103122fa868ca0331d
--- /dev/null
+++ b/triton_models/weights/layers.52.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9730172d15b3edd8f229d09d8e2cdd937a02c0726af08f38736fdacd72023531
+size 2457600
diff --git a/triton_models/weights/layers.52.attention.wo.0.qweight b/triton_models/weights/layers.52.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..66013208833d6e811b90dbf7fd4df475920b6f1e
--- /dev/null
+++ b/triton_models/weights/layers.52.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9172473c6b837b545d706bd617e772e2f9a494886ef50af4e0067941622539ac
+size 13107200
diff --git a/triton_models/weights/layers.52.attention.wo.0.scales_zeros b/triton_models/weights/layers.52.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..031054e240e674791b55aa59ac66ba3aea69b6b6
--- /dev/null
+++ b/triton_models/weights/layers.52.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f46a28b7bce36a8795d6b41872ac6c28392cb98757e84f23ace881c0602a3ac
+size 819200
diff --git a/triton_models/weights/layers.52.attention_norm.weight b/triton_models/weights/layers.52.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..ec7e53e4c9b4d1062976716724f5d223b31e90c7
--- /dev/null
+++ b/triton_models/weights/layers.52.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a00076cff40b9515638312a387d12f6e074098ccfc7a46e25f1d78cb7e59cf8e
+size 10240
diff --git a/triton_models/weights/layers.52.feed_forward.w13.0.qweight b/triton_models/weights/layers.52.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..bbd75723a838dba416d9f777f5d70d28d8eb3d37
--- /dev/null
+++ b/triton_models/weights/layers.52.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5190121e64d16e05e232c6b9d0c91a8bdbce7e59b89e3124708054ecc171918b
+size 70778880
diff --git a/triton_models/weights/layers.52.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.52.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..c57b547e864b12736526bc2c179435c5d14d8ce1
--- /dev/null
+++ b/triton_models/weights/layers.52.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f8014e42e5423d1066d0b6c416e4d03d43a305dd9635a32b71df628b092471f
+size 4423680
diff --git a/triton_models/weights/layers.52.feed_forward.w2.0.qweight b/triton_models/weights/layers.52.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..b86a55fcfb82d3e59a7e12d94ee9a01b95e2ef82
--- /dev/null
+++ b/triton_models/weights/layers.52.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:188307e8888bdefff35c0c5104210a5d74b6f143f5916b97894420c99cf1deeb
+size 35389440
diff --git a/triton_models/weights/layers.52.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.52.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..9f3c4e14222b807904cee0f22554bc87602f6a8e
--- /dev/null
+++ b/triton_models/weights/layers.52.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74d5103f82193232105d65be341a0e93ccf93907adbae8898b33234e935ac3a6
+size 2211840
diff --git a/triton_models/weights/layers.52.ffn_norm.weight b/triton_models/weights/layers.52.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..2a2dad1007514f2dbe5b9282c226be00d1d7dd8c
--- /dev/null
+++ b/triton_models/weights/layers.52.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:32d5b790d50a3e4cb9d87f618fe7a3d0016ae2019b0552e4f40c2e10ae43d19e
+size 10240
diff --git a/triton_models/weights/layers.53.attention.w_qkv.0.qweight b/triton_models/weights/layers.53.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..2ffd65182c3eed34762aac72d44fac1320889d35
--- /dev/null
+++ b/triton_models/weights/layers.53.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:37ad4b00ad091ebc77d9e81f607146b806b76fcba7bfc972db3c8f0e4c84974e
+size 39321600
diff --git a/triton_models/weights/layers.53.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.53.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..2827cd2b99378598bb5fc6b5da4b9509fb6d39cd
--- /dev/null
+++ b/triton_models/weights/layers.53.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:67d88c0de02f1f2f079caf7a1b7942db7bdf171db3fd97dd22c2247d43dfeb47
+size 2457600
diff --git a/triton_models/weights/layers.53.attention.wo.0.qweight b/triton_models/weights/layers.53.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..f0b25e5c4319622513e93af9299b0270698a47f3
--- /dev/null
+++ b/triton_models/weights/layers.53.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f0cdf00d195aafeb058c65aff745467a0c60d1a03be4a50c5252ec41d0518b14
+size 13107200
diff --git a/triton_models/weights/layers.53.attention.wo.0.scales_zeros b/triton_models/weights/layers.53.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..d92a24203f37de69a5f376dd16ee12daf8d2efa9
--- /dev/null
+++ b/triton_models/weights/layers.53.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:950b09957286fb871811d54984656ad2eb68c8ea4b639da5af14791150c2f557
+size 819200
diff --git a/triton_models/weights/layers.53.attention_norm.weight b/triton_models/weights/layers.53.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..763af8c18a06af3e0fec01583d0f44fca8d62f05
--- /dev/null
+++ b/triton_models/weights/layers.53.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8403b61b7f2c2c210a9411498c42d2d240a0b0b006367e6aec30987778f466e5
+size 10240
diff --git a/triton_models/weights/layers.53.feed_forward.w13.0.qweight b/triton_models/weights/layers.53.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..d845c5dd88d947c6431a30933f657cdf52a92039
--- /dev/null
+++ b/triton_models/weights/layers.53.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:68dbc1a6eb63961bb099eed4c60e1acb1fff214ad052c0bbb13151030c43f9d6
+size 70778880
diff --git a/triton_models/weights/layers.53.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.53.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..38205bbf766e5afe57f0f3ebab1df09fa6b6591d
--- /dev/null
+++ b/triton_models/weights/layers.53.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:175dd1ea8fb9c1a2e62e36ded41c47bc0ed307ea26e6613386a9ef4e62f31ab7
+size 4423680
diff --git a/triton_models/weights/layers.53.feed_forward.w2.0.qweight b/triton_models/weights/layers.53.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..559e7ff2fef783273196bc54beba262684a0cb93
--- /dev/null
+++ b/triton_models/weights/layers.53.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:401ab812bd7185bf671404d03deebbaa41f0eca279789f50104c7ea9fe62fee9
+size 35389440
diff --git a/triton_models/weights/layers.53.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.53.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..ebd3ce03f2bb2f5bdec30fcf6780db1502a4a504
--- /dev/null
+++ b/triton_models/weights/layers.53.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ee3f7a1c7fef0d8a6786477cc34140836c0359f61f726bf376bb029cdd1fe73
+size 2211840
diff --git a/triton_models/weights/layers.53.ffn_norm.weight b/triton_models/weights/layers.53.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..1cdc3c195184e8d2ef09329d3bad682a92b91a3e
--- /dev/null
+++ b/triton_models/weights/layers.53.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6adb6b4715fa0d4ae55e41f4c3fca2f62eb23d99627a223fe6c3e240ab184a90
+size 10240
diff --git a/triton_models/weights/layers.54.attention.w_qkv.0.qweight b/triton_models/weights/layers.54.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..50763449de8e06b6a46e77e937f895129c1635f9
--- /dev/null
+++ b/triton_models/weights/layers.54.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5827597c41ba67e42fb5f6b39ccba5e44949f258e8b5a7bb133b062ab90e7e80
+size 39321600
diff --git a/triton_models/weights/layers.54.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.54.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..174cd2e4048e56f988b1afb12dc7bdd43ce18be5
--- /dev/null
+++ b/triton_models/weights/layers.54.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd8799bd6c2e022dd78d466fb89e83f6d958baf0e1be72f9380df2136414f567
+size 2457600
diff --git a/triton_models/weights/layers.54.attention.wo.0.qweight b/triton_models/weights/layers.54.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..0b23518adf87efc6f193ea50b64eeb1403a13156
--- /dev/null
+++ b/triton_models/weights/layers.54.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f195ca8cfce5aa54aa8c539f396276213b6db32082d5c689007948bf5e67bf08
+size 13107200
diff --git a/triton_models/weights/layers.54.attention.wo.0.scales_zeros b/triton_models/weights/layers.54.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..29aefb15e0183504f7749943e16186a0f3ed9750
--- /dev/null
+++ b/triton_models/weights/layers.54.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:12a7220d4e44f5d06269798456b0eeba855042904f8acc64338725df34a5ddf4
+size 819200
diff --git a/triton_models/weights/layers.54.attention_norm.weight b/triton_models/weights/layers.54.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..341e3a24778ec2b6019e2761ed2bbe4614dcbacc
--- /dev/null
+++ b/triton_models/weights/layers.54.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:279fb507190713d3076837c6d51a104a2b2468729d21f13be58393de0d8d3540
+size 10240
diff --git a/triton_models/weights/layers.54.feed_forward.w13.0.qweight b/triton_models/weights/layers.54.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..27626de47b31a664c2e3ea9760bdb2cb332a7f1f
--- /dev/null
+++ b/triton_models/weights/layers.54.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24140e99964e32037e6dff5cc244f61c52370780d91081963fd1533583412e79
+size 70778880
diff --git a/triton_models/weights/layers.54.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.54.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..76c24ab17106f1fa0c1124f91074702fe9497867
--- /dev/null
+++ b/triton_models/weights/layers.54.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c14571882c5ea6fe6f62586ff45bbd81aa1b2979205dcd419160625b95749fe3
+size 4423680
diff --git a/triton_models/weights/layers.54.feed_forward.w2.0.qweight b/triton_models/weights/layers.54.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..69a2580a88ea0bc3fcdc0abc27534d84c4a8ba12
--- /dev/null
+++ b/triton_models/weights/layers.54.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:19b63aabe7453a0f667f85d5a3212540c3911dc3b0d161709c48676f961e25a6
+size 35389440
diff --git a/triton_models/weights/layers.54.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.54.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..c812cbb0c2eb6b2c2572e2060d0bd750390a3261
--- /dev/null
+++ b/triton_models/weights/layers.54.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b27b1247c0b376e6c25fc822b488f8d17006656304c7ab161a2e07d3db959427
+size 2211840
diff --git a/triton_models/weights/layers.54.ffn_norm.weight b/triton_models/weights/layers.54.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..c5116fdcac827898bde6273f38c267ed33215f27
--- /dev/null
+++ b/triton_models/weights/layers.54.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9285304bdcf4e89fbb13322b41d900390175a83c31760a13b36b4a58d3bc5322
+size 10240
diff --git a/triton_models/weights/layers.55.attention.w_qkv.0.qweight b/triton_models/weights/layers.55.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..24368c76192adaa92bb3405b5fda090c9139b1c5
--- /dev/null
+++ b/triton_models/weights/layers.55.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d2939f3bce4ff7e400051b1c7513ceea639c277e46b2e4d2d6371467e52fc400
+size 39321600
diff --git a/triton_models/weights/layers.55.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.55.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..baad4aea8f2e79dec89a14c1a54b2dddd65a6ec1
--- /dev/null
+++ b/triton_models/weights/layers.55.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79c05c5347e190910b1d89b5157c103facb6ae72cc00a1004c44d30ed1315d49
+size 2457600
diff --git a/triton_models/weights/layers.55.attention.wo.0.qweight b/triton_models/weights/layers.55.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..d814bd98723209ceee8108f1829fddc28a3a0b53
--- /dev/null
+++ b/triton_models/weights/layers.55.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:884212fc5ce01a0c45f5fc2505352119aa69973afa76c7ed8340ee2e55a5293b
+size 13107200
diff --git a/triton_models/weights/layers.55.attention.wo.0.scales_zeros b/triton_models/weights/layers.55.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..32bbb59721c52cda5c988c24ec126aba0d878c0d
--- /dev/null
+++ b/triton_models/weights/layers.55.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:065f0f4baf5025279c520d0970d3f48be9f2b0ba0984e22d09201edf20044a88
+size 819200
diff --git a/triton_models/weights/layers.55.attention_norm.weight b/triton_models/weights/layers.55.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..26ac5074f1eb0553d6087e5d6ba2963471a37276
--- /dev/null
+++ b/triton_models/weights/layers.55.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8228e37de7ce0810d0ac45f1de456b6679d891af5443d51f685ef601329b59dc
+size 10240
diff --git a/triton_models/weights/layers.55.feed_forward.w13.0.qweight b/triton_models/weights/layers.55.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..b31a07bf3ef5b9ebb682f4373db6d0b2edd0df7d
--- /dev/null
+++ b/triton_models/weights/layers.55.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0cea400730e0d2732756ccad0f53ca817aab8d340424641593d981e1b1023cc
+size 70778880
diff --git a/triton_models/weights/layers.55.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.55.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..531cb5ed724736dbf4e835bebf4e61d59c60a03c
--- /dev/null
+++ b/triton_models/weights/layers.55.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1cc2bfb5816b939130bab3b200646cc4f3b4538c9fc0c0f50bbe8ebbb3b5e1c8
+size 4423680
diff --git a/triton_models/weights/layers.55.feed_forward.w2.0.qweight b/triton_models/weights/layers.55.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..f3fa6520a67c48c894de9ad0a3728ca1af2e281f
--- /dev/null
+++ b/triton_models/weights/layers.55.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f91256bde95de24b149e15f0d4020503eb6cfa10f54d892946ba5262c01d70d1
+size 35389440
diff --git a/triton_models/weights/layers.55.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.55.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..2641b42b5a7834dca832c0d3fe7346353efe264e
--- /dev/null
+++ b/triton_models/weights/layers.55.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb57123a59fb8a843bb635df19753a480ad0a3c50d553647a08d196da2d3c3d1
+size 2211840
diff --git a/triton_models/weights/layers.55.ffn_norm.weight b/triton_models/weights/layers.55.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..cbf8ccde570759c0e6747b05cb6739adc7a25051
--- /dev/null
+++ b/triton_models/weights/layers.55.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4134b8bf42d2dd659845fc2314da392ad1768a1f87253378fc4ede959acb709
+size 10240
diff --git a/triton_models/weights/layers.56.attention.w_qkv.0.qweight b/triton_models/weights/layers.56.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..4a835dad0887a25255e5d5b18051be60f7374bbf
--- /dev/null
+++ b/triton_models/weights/layers.56.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:40fa43ba7130c62e0b64592497aa6f8129daaf47774ce763105d62d65241f8eb
+size 39321600
diff --git a/triton_models/weights/layers.56.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.56.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..661c56e51bc4e31af9416e9e5404571b7c79d620
--- /dev/null
+++ b/triton_models/weights/layers.56.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d247bbe8c7a29f55dbe6f5167c37746f75d58a3f5209cad85e8b25466aa838f
+size 2457600
diff --git a/triton_models/weights/layers.56.attention.wo.0.qweight b/triton_models/weights/layers.56.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..e93438b54dca71cb6708898db34bb444954616ec
--- /dev/null
+++ b/triton_models/weights/layers.56.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f63c460524a7f883e4dc634ccece9070b737244569527f2839415c39ecc83320
+size 13107200
diff --git a/triton_models/weights/layers.56.attention.wo.0.scales_zeros b/triton_models/weights/layers.56.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..fe2627dd46e48ab9ab1b1be95e0df0a304efdf12
--- /dev/null
+++ b/triton_models/weights/layers.56.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa44215884165c39f1b9befdbfa038acca584d23737e477519c96c189f810ec9
+size 819200
diff --git a/triton_models/weights/layers.56.attention_norm.weight b/triton_models/weights/layers.56.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..458a900ea3c4128f4f19a3d3ad0f93901fa86cc6
--- /dev/null
+++ b/triton_models/weights/layers.56.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85f0661937290caa067a75a81835de849ab802193deeda6c9575f06e2a45b4fc
+size 10240
diff --git a/triton_models/weights/layers.56.feed_forward.w13.0.qweight b/triton_models/weights/layers.56.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..a8c85a22a7b726607abd2e58f6f354b9fa2db51d
--- /dev/null
+++ b/triton_models/weights/layers.56.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3673a6e423a0ed4461cdcd518c35748a047e1ac6f919b807d988a6b08fae0e36
+size 70778880
diff --git a/triton_models/weights/layers.56.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.56.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..9f2ddb49f96a881c5595fdb26fca2192e733719d
--- /dev/null
+++ b/triton_models/weights/layers.56.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e232979ea4c711c32efd6fcbd272bc97bc6971d13b1b54da16ee23f3b492594
+size 4423680
diff --git a/triton_models/weights/layers.56.feed_forward.w2.0.qweight b/triton_models/weights/layers.56.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..f21598bfb883041351734bd4f98a9b060c00ee53
--- /dev/null
+++ b/triton_models/weights/layers.56.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a507999dc9c17e3dc65be4efd670c7bfd160b5f299ba5c6080886074776bc94
+size 35389440
diff --git a/triton_models/weights/layers.56.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.56.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..fbea7ee842df57f75f3892857fec1eb2a4bf6638
--- /dev/null
+++ b/triton_models/weights/layers.56.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44bdf771c26993d7f1b996d2a0b8d4258d9420fb40956d2884b47b372d257ef6
+size 2211840
diff --git a/triton_models/weights/layers.56.ffn_norm.weight b/triton_models/weights/layers.56.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..fa2763591fec04bdf70dcd482f0408d8b93e653c
--- /dev/null
+++ b/triton_models/weights/layers.56.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bdee3d3a5718b55e4ca05cbe26e2204d62b6b638fd031850d5affa1845e9fe2e
+size 10240
diff --git a/triton_models/weights/layers.57.attention.w_qkv.0.qweight b/triton_models/weights/layers.57.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..0f6449e2e2f6a871971b4bd8889676c10f88ffa5
--- /dev/null
+++ b/triton_models/weights/layers.57.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d4febd14cb7c672f8ae2a427429a27852f606b20333f24d4d8be21f093b764ca
+size 39321600
diff --git a/triton_models/weights/layers.57.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.57.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..03201d7a94059d721a8e619c23712fdd94e51ed6
--- /dev/null
+++ b/triton_models/weights/layers.57.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f665479d6a13f927a7bc5ec320fa0d178f9c20e4aeaf120f56957bdc8623a527
+size 2457600
diff --git a/triton_models/weights/layers.57.attention.wo.0.qweight b/triton_models/weights/layers.57.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..50cd0ec261e8195ea26b10d4a246210e79b0c5c6
--- /dev/null
+++ b/triton_models/weights/layers.57.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e801d14930623458839754d74e6a41b17f498ca28a456063803c695a663d198d
+size 13107200
diff --git a/triton_models/weights/layers.57.attention.wo.0.scales_zeros b/triton_models/weights/layers.57.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..1fef409fd97368d0bb910d9327a4f391337e45de
--- /dev/null
+++ b/triton_models/weights/layers.57.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57f92671e416f474061ab465197e2885a06ec192856d8f37fd7544ffced67721
+size 819200
diff --git a/triton_models/weights/layers.57.attention_norm.weight b/triton_models/weights/layers.57.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..0c4b0353af15629ed6f1cb609452d1cdaa8de4c7
--- /dev/null
+++ b/triton_models/weights/layers.57.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ed3f7776c04ce6767b8e40b1283f83ae5bba4d9420f64d28ad115ad82a4b942
+size 10240
diff --git a/triton_models/weights/layers.57.feed_forward.w13.0.qweight b/triton_models/weights/layers.57.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..a30ae499438750707e814d628d0e4f6a07bbd1e6
--- /dev/null
+++ b/triton_models/weights/layers.57.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d3ab45b49194d9c5a343f7c192ebaddf079a645b2aa83d95a29bad11d627580
+size 70778880
diff --git a/triton_models/weights/layers.57.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.57.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..b766f47f24b8bed45a3449e0c53b855a503f3358
--- /dev/null
+++ b/triton_models/weights/layers.57.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a8aee83e4bbd1321fb1b742fb77b87d6b9af57f3842e024f92a46ce5ec3a4827
+size 4423680
diff --git a/triton_models/weights/layers.57.feed_forward.w2.0.qweight b/triton_models/weights/layers.57.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..e988275720482488bfe85c1051b7e85d378ae03a
--- /dev/null
+++ b/triton_models/weights/layers.57.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dd6f6035fe944da65fbf0a93547789108eb62b6ead13f20e2ac92bb345c3d638
+size 35389440
diff --git a/triton_models/weights/layers.57.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.57.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..fc7c1d76c4ea7383e3450941a0150d187869e930
--- /dev/null
+++ b/triton_models/weights/layers.57.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e73ba8df297a3dabfa639adce326587117c55beeb4499fd12b8fbcf03e6f91c
+size 2211840
diff --git a/triton_models/weights/layers.57.ffn_norm.weight b/triton_models/weights/layers.57.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..2e6ab8e0f3a9f4073d2a77d421e41d4794680576
--- /dev/null
+++ b/triton_models/weights/layers.57.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe3addfbdd0b57d7f37b91a5f6dbff7b5f91f2f853ddef042c4e7feb261d193f
+size 10240
diff --git a/triton_models/weights/layers.58.attention.w_qkv.0.qweight b/triton_models/weights/layers.58.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..d9adada5adefada5ed49b7a271b90aee5d8f8d77
--- /dev/null
+++ b/triton_models/weights/layers.58.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b08f6bb2653ee747b4aa90fdd82787ab92d1bddd7958e448c9cb262174f2dc58
+size 39321600
diff --git a/triton_models/weights/layers.58.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.58.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..1b27e9378d4edbf2db9214337803f7cf57dd1a24
--- /dev/null
+++ b/triton_models/weights/layers.58.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be5cb7cff65b2bd7e9180e89dfc2cc66a2a8fd983e33f1910010a173e2b6a02b
+size 2457600
diff --git a/triton_models/weights/layers.58.attention.wo.0.qweight b/triton_models/weights/layers.58.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..497d4f116a36212e4c03a015053c1cad5b126a0b
--- /dev/null
+++ b/triton_models/weights/layers.58.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e254fcd8817af4724798baddf13b4128ee8990ad9dd71ebdbe5fafa819a11a4
+size 13107200
diff --git a/triton_models/weights/layers.58.attention.wo.0.scales_zeros b/triton_models/weights/layers.58.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..135aa7890de40cc03716fcb3c6038ae8f790f8ab
--- /dev/null
+++ b/triton_models/weights/layers.58.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d18df912be169debafc10aa244fb59b8d697963a5ddb14a08b3edcc3f87e17e
+size 819200
diff --git a/triton_models/weights/layers.58.attention_norm.weight b/triton_models/weights/layers.58.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..27296039ae018d50bdd1fb6e2c659945c0cdf7a4
--- /dev/null
+++ b/triton_models/weights/layers.58.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd224c78049350d2623ee87b534b699e664c625566ba137caa8a5fbd0a3023d1
+size 10240
diff --git a/triton_models/weights/layers.58.feed_forward.w13.0.qweight b/triton_models/weights/layers.58.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..78759e21fc92cabc40244da352b062ba47430ea5
--- /dev/null
+++ b/triton_models/weights/layers.58.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb464a247946825b5ac5f0e933af23d500972b5e62fdaa349bdb9637ac2c19ce
+size 70778880
diff --git a/triton_models/weights/layers.58.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.58.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..bf8b382ec150ebf5c1aabc965b986bf2b2477d26
--- /dev/null
+++ b/triton_models/weights/layers.58.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2392646ec83096e15422d2af3d8da59394ae9621142ec69e999d206bde63be5d
+size 4423680
diff --git a/triton_models/weights/layers.58.feed_forward.w2.0.qweight b/triton_models/weights/layers.58.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..c424fdaac3020afd457b7f4a005296efd633ca19
--- /dev/null
+++ b/triton_models/weights/layers.58.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb685aba05fe849a28feabea0e5b4b28855f1366dac362c2c210214dcc71aae7
+size 35389440
diff --git a/triton_models/weights/layers.58.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.58.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..6c8015cf06a0d9559ea7ca26f749cfae088dc86a
--- /dev/null
+++ b/triton_models/weights/layers.58.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4401a353d6826824bc93626da01eab990303f8c756d61387fae7d7043c94cd4c
+size 2211840
diff --git a/triton_models/weights/layers.58.ffn_norm.weight b/triton_models/weights/layers.58.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..47fa8e0a5e0a3a87bc43a3b8f2d127c983ded087
--- /dev/null
+++ b/triton_models/weights/layers.58.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:30284ce35b22ce66ef68bdd4f79bf969e233ad230089deec9b4681715133a945
+size 10240
diff --git a/triton_models/weights/layers.59.attention.w_qkv.0.qweight b/triton_models/weights/layers.59.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..5ab009505be55ed7e3bed2a8de8e292a86f6501a
--- /dev/null
+++ b/triton_models/weights/layers.59.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:98425e53487c5c8584cb508facaf810230b428a1f39756e95f545657554d6320
+size 39321600
diff --git a/triton_models/weights/layers.59.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.59.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..2e7bbcde1bdd035bd3b8f1c8a761fcadc8cedc8d
--- /dev/null
+++ b/triton_models/weights/layers.59.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e65d5f8b3da1ddbe59c3a31c776e7e655c1893b7d55e2c4fe95fbf06569a45ad
+size 2457600
diff --git a/triton_models/weights/layers.59.attention.wo.0.qweight b/triton_models/weights/layers.59.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..b3959f361c66ba5de32a91a37b97f7966b9abf31
--- /dev/null
+++ b/triton_models/weights/layers.59.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a16a4daabe4ff2c4f968593da1b6cf690ce1fe37f15811136ab1a30cc2fff02d
+size 13107200
diff --git a/triton_models/weights/layers.59.attention.wo.0.scales_zeros b/triton_models/weights/layers.59.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..b713a06937fb091ea0de4fad13604c5a88a492e5
--- /dev/null
+++ b/triton_models/weights/layers.59.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d402c99f4b70b8701ab3e85716e577be8a1e6e2ea5aa501dee60eb608384bc9c
+size 819200
diff --git a/triton_models/weights/layers.59.attention_norm.weight b/triton_models/weights/layers.59.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..f1cb115c1673e48e27e546eaddcd1b1812f9f2cd
--- /dev/null
+++ b/triton_models/weights/layers.59.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:324435253ecd6899980b81a45cc019d186db7b649da328156fe08899989194d5
+size 10240
diff --git a/triton_models/weights/layers.59.feed_forward.w13.0.qweight b/triton_models/weights/layers.59.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..0aa0a04eaf5136fbcfa3971b28ee20de39663a23
--- /dev/null
+++ b/triton_models/weights/layers.59.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:629a7930ac0cac59cb9854de03cbf22a29cbb714f21adc3f9237a11a6bce010a
+size 70778880
diff --git a/triton_models/weights/layers.59.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.59.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..332a5710dd7ba08fe14b669e172d99804751af5e
--- /dev/null
+++ b/triton_models/weights/layers.59.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9510fc99231233c119c4a759ffcaa3022798e0fbeea1aa57f9157974c58ce441
+size 4423680
diff --git a/triton_models/weights/layers.59.feed_forward.w2.0.qweight b/triton_models/weights/layers.59.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..8eded9c1e0b64c50a5810cc672b0d774ece1834d
--- /dev/null
+++ b/triton_models/weights/layers.59.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b7405d2c9da527d5c103c5d3969863e8c689d92be6ab0ab700b561434a1367e
+size 35389440
diff --git a/triton_models/weights/layers.59.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.59.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..04a87937fd33fc83b7bb34250cf1e8d77077687b
--- /dev/null
+++ b/triton_models/weights/layers.59.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4edf11739f052ac283dad46d5f4efc94c6a3ced07e809c9dc4fb42d4990ced7
+size 2211840
diff --git a/triton_models/weights/layers.59.ffn_norm.weight b/triton_models/weights/layers.59.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..8f3c97cb11d618dbb1e7508b79404a07ced7ea51
--- /dev/null
+++ b/triton_models/weights/layers.59.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4736e4f94db7b418adcc93cbce26b8b5b2c029a5963d034827442c89cfd18c07
+size 10240
diff --git a/triton_models/weights/layers.6.attention.w_qkv.0.qweight b/triton_models/weights/layers.6.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..dc25112b680fd4f0806343f4d5775757bddc0dd2
--- /dev/null
+++ b/triton_models/weights/layers.6.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b1b9d8ea8d8808ec60dba2744e5a1f29eb3a214f2736d99fdc1754bdfa06fcb5
+size 39321600
diff --git a/triton_models/weights/layers.6.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.6.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..aba03d4d7fc48f26a897212c9c1b6effd514cbf3
--- /dev/null
+++ b/triton_models/weights/layers.6.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1713682d656c8f43a474898fb38f1932f14e92350354a4fd9c90ffaac10ce823
+size 2457600
diff --git a/triton_models/weights/layers.6.attention.wo.0.qweight b/triton_models/weights/layers.6.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..2367abe76a368ca41ca4d47f36cfeb648143e6bc
--- /dev/null
+++ b/triton_models/weights/layers.6.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8b8a9ff4cf6e8c269b2f3d0ce03298777089e3fa398f994a3b827ed97f449fb2
+size 13107200
diff --git a/triton_models/weights/layers.6.attention.wo.0.scales_zeros b/triton_models/weights/layers.6.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..e21350b397b3cf36b52ac1912632cbd3b0fb8746
--- /dev/null
+++ b/triton_models/weights/layers.6.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a58d87ad95da188bc2dec7a7fc67974b057129f13ff37305eb59cb47da891985
+size 819200
diff --git a/triton_models/weights/layers.6.attention_norm.weight b/triton_models/weights/layers.6.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..44907afbe0e2cefcb80e615ff535d9f98ccc0d17
--- /dev/null
+++ b/triton_models/weights/layers.6.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3d188bbc4a4f493281f5cc3bf81e2eeecdc138c1a331c2ebcea8fc31b82cf2f
+size 10240
diff --git a/triton_models/weights/layers.6.feed_forward.w13.0.qweight b/triton_models/weights/layers.6.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..91b5ef0fedc2e8140bff59bbbe9f0f6c82efa626
--- /dev/null
+++ b/triton_models/weights/layers.6.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4352fd2ec5e02619dbf53bdf234d8006732a43658864b82f66198f7d1b5530fe
+size 70778880
diff --git a/triton_models/weights/layers.6.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.6.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..032c0e707367419db1dd31b2ae4076ee5afd8344
--- /dev/null
+++ b/triton_models/weights/layers.6.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d962c7b45f36f15f33db2d5b22b8338c93351cfdb185fd45a81208e41f9f7277
+size 4423680
diff --git a/triton_models/weights/layers.6.feed_forward.w2.0.qweight b/triton_models/weights/layers.6.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..538a520c752ed7e79e4cdf4ef2772126fb733880
--- /dev/null
+++ b/triton_models/weights/layers.6.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0729160cb8481d553ce952f0e5c40d15628cf17ec57efca67e501787e32bda2
+size 35389440
diff --git a/triton_models/weights/layers.6.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.6.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..dc0c683d01b375a6befeefed353f43a04a626738
--- /dev/null
+++ b/triton_models/weights/layers.6.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1218de136d5c780a6f156744980e29c53eac60e1e5e009c2c5599dc7ae4a6e50
+size 2211840
diff --git a/triton_models/weights/layers.6.ffn_norm.weight b/triton_models/weights/layers.6.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..1a92b95e652309f3c0a609919fb0ffd937aaa05a
--- /dev/null
+++ b/triton_models/weights/layers.6.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a71372a816b22410ac823e6e7febbc2a2b487725481b788bae6117eea7be3d8c
+size 10240
diff --git a/triton_models/weights/layers.7.attention.w_qkv.0.qweight b/triton_models/weights/layers.7.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..9b79f07dc50bebfbf89991b268b929d9fb55b1d9
--- /dev/null
+++ b/triton_models/weights/layers.7.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d6667230624fda1278575b471b7ac669a96e768639eb5f07890e7aa118d1ef5a
+size 39321600
diff --git a/triton_models/weights/layers.7.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.7.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..e1e23a6612ee46c38483b98b38fbf70bc61180a1
--- /dev/null
+++ b/triton_models/weights/layers.7.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca46630c9c3487954b18053af719a284f66d05eaead3386ec61fad715691f57e
+size 2457600
diff --git a/triton_models/weights/layers.7.attention.wo.0.qweight b/triton_models/weights/layers.7.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..9021132e7ca65afaf2b293a6dc6e36c5d3a8ab03
--- /dev/null
+++ b/triton_models/weights/layers.7.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56b914f9548fee77b41eddd4b6428bb6b571150bfab153a27550a256bc04908d
+size 13107200
diff --git a/triton_models/weights/layers.7.attention.wo.0.scales_zeros b/triton_models/weights/layers.7.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..3b3138df5062653d5a232c57d158371ebb907773
--- /dev/null
+++ b/triton_models/weights/layers.7.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f89456f8687d52921d5132185d247efca3cda41bba833d0b9a387538a1510b1
+size 819200
diff --git a/triton_models/weights/layers.7.attention_norm.weight b/triton_models/weights/layers.7.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..a4569205cb06e51e68af0b39a579ccc554d40e62
--- /dev/null
+++ b/triton_models/weights/layers.7.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:21555eb420e4a29a69e41c76e038f6703ab80524116c08c115e673c8ec1f6a65
+size 10240
diff --git a/triton_models/weights/layers.7.feed_forward.w13.0.qweight b/triton_models/weights/layers.7.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..7e453e81ed8b9ce45a990abe6138d947ba0328a5
--- /dev/null
+++ b/triton_models/weights/layers.7.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:40acadfc32f6aee0dc93ff9babb6c5b77fe555ac8539f9b0c5aa876422bbd981
+size 70778880
diff --git a/triton_models/weights/layers.7.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.7.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..adb8d14a1640fcd9daf0dc3f7c0ff1b1366f3618
--- /dev/null
+++ b/triton_models/weights/layers.7.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:40ef51edeca8efcb66c9f1826e761e94c742ba20032a5631921eef20eb56ac54
+size 4423680
diff --git a/triton_models/weights/layers.7.feed_forward.w2.0.qweight b/triton_models/weights/layers.7.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..eebdcd95eeb41e1440575cddbbea19dad93b89e5
--- /dev/null
+++ b/triton_models/weights/layers.7.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:144ca6f29662e26619801e92bbb4d89120870fb8c43a750e63e37e903e07a43d
+size 35389440
diff --git a/triton_models/weights/layers.7.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.7.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..b51e9484600a80bd680f746e770027e044992000
--- /dev/null
+++ b/triton_models/weights/layers.7.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b73fb4988ca151de6ba83d3c70441e6738224dfaa157dd138713a117b6f450d9
+size 2211840
diff --git a/triton_models/weights/layers.7.ffn_norm.weight b/triton_models/weights/layers.7.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..49b68b7495b78a22e1c30a8c8ece4423cf744929
--- /dev/null
+++ b/triton_models/weights/layers.7.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:13f488f7c455ebab9038d12fba5c0e2d111b5a5df49f206b917f2fa1f23a7fae
+size 10240
diff --git a/triton_models/weights/layers.8.attention.w_qkv.0.qweight b/triton_models/weights/layers.8.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..9985abd65e516de5e30008f8a077115665d622b7
--- /dev/null
+++ b/triton_models/weights/layers.8.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b9f9364c7cafe3111772cc3873c7ce32d6e38fe52b23b88c5ad99eff1efd6606
+size 39321600
diff --git a/triton_models/weights/layers.8.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.8.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..31cb87bebeeba94627e61ba2f3ddb108428b6a45
--- /dev/null
+++ b/triton_models/weights/layers.8.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:72b14c0e08b55f010520875a07e040302f3e48f6a731446ac394fd5c14716f9c
+size 2457600
diff --git a/triton_models/weights/layers.8.attention.wo.0.qweight b/triton_models/weights/layers.8.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..8436bbf31f5f7050cf96f12e2d688235204b1494
--- /dev/null
+++ b/triton_models/weights/layers.8.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae2fe50cbb49d7b314b61437e46caf5ef1c5beb1492e332123ac0c521a342795
+size 13107200
diff --git a/triton_models/weights/layers.8.attention.wo.0.scales_zeros b/triton_models/weights/layers.8.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..6a4ee08c2adeea92b88dc88d7f6064131fb9802d
--- /dev/null
+++ b/triton_models/weights/layers.8.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:97a58c292fc6886e99df6b6b72c179bd0d1274bee7f3f19a455c67480b896e4a
+size 819200
diff --git a/triton_models/weights/layers.8.attention_norm.weight b/triton_models/weights/layers.8.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..38e51c9abfad45e372ecac51174bd7df98b463fe
--- /dev/null
+++ b/triton_models/weights/layers.8.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bfbe1585c004b7cec7b0531576158d310eacec403d4319e0aa89705f2391b52b
+size 10240
diff --git a/triton_models/weights/layers.8.feed_forward.w13.0.qweight b/triton_models/weights/layers.8.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..ec12d7604dcb12da706f7e1b6e4c55e099de14b2
--- /dev/null
+++ b/triton_models/weights/layers.8.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:479c90f83e7ffe9fc7fb2d8efabab917cfb362d17dfc6abe49d820fd97f52c46
+size 70778880
diff --git a/triton_models/weights/layers.8.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.8.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..18381908c83a3453ef494db2e8bf6affc1619f45
--- /dev/null
+++ b/triton_models/weights/layers.8.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62b6cd423063d6370a79c461adbd099aeab7813fd3ec6861e9029bf92014cf81
+size 4423680
diff --git a/triton_models/weights/layers.8.feed_forward.w2.0.qweight b/triton_models/weights/layers.8.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..917ec6853a7bc840f6309601218b535aa2ea5803
--- /dev/null
+++ b/triton_models/weights/layers.8.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe7fa456ae78c557b0d767fdcb5f75f9543a0846241aa6a67f0aefba04dafc72
+size 35389440
diff --git a/triton_models/weights/layers.8.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.8.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..5bca8023e2d9af47bd9a2e335610d1288be66b1b
--- /dev/null
+++ b/triton_models/weights/layers.8.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3700e0a9e5ca601446124b2091769435150abbe4ce7ffdbb4b8b890039691f59
+size 2211840
diff --git a/triton_models/weights/layers.8.ffn_norm.weight b/triton_models/weights/layers.8.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..074c188d1b7eae2ebd49b79cce1709c03bdd7373
--- /dev/null
+++ b/triton_models/weights/layers.8.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ca4bcec4043ea3bded6fe33e64563f63500676640adcc76ec451385ae4a88e1
+size 10240
diff --git a/triton_models/weights/layers.9.attention.w_qkv.0.qweight b/triton_models/weights/layers.9.attention.w_qkv.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..bb259e45b76e594dd7a43c80f9434afd145b821e
--- /dev/null
+++ b/triton_models/weights/layers.9.attention.w_qkv.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:92ce402acea2d54e5f4b36f37803ced8ca4518984483664d3ed89fc77072f861
+size 39321600
diff --git a/triton_models/weights/layers.9.attention.w_qkv.0.scales_zeros b/triton_models/weights/layers.9.attention.w_qkv.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..54ba6cafc926d4582346f90db1ee07a4a85a081f
--- /dev/null
+++ b/triton_models/weights/layers.9.attention.w_qkv.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:218e646428b125dfd54828d03da35762c3aad7f4e04e903993a5c36f3f77b66d
+size 2457600
diff --git a/triton_models/weights/layers.9.attention.wo.0.qweight b/triton_models/weights/layers.9.attention.wo.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..2e3cf5a3e144554107a10f4fd99f2f47adac0c1d
--- /dev/null
+++ b/triton_models/weights/layers.9.attention.wo.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:00fc69e8ef14153e3293752ab776a677aee31b94277251403c1f45c16e2d2c8c
+size 13107200
diff --git a/triton_models/weights/layers.9.attention.wo.0.scales_zeros b/triton_models/weights/layers.9.attention.wo.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..d5a34ef0179f9742568f8c4f80a272e4c80d81be
--- /dev/null
+++ b/triton_models/weights/layers.9.attention.wo.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:295bd2a218435ae1e3f67f51fae82e6c430fa19dbbe480cce2374fb027193af8
+size 819200
diff --git a/triton_models/weights/layers.9.attention_norm.weight b/triton_models/weights/layers.9.attention_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..a0056c804e8a0567456a936f29c597223dc4df1a
--- /dev/null
+++ b/triton_models/weights/layers.9.attention_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:198fdc5169e2d1d6cce1b736dc7e6df901aa965518db3c5c8c6dd0320d1b2351
+size 10240
diff --git a/triton_models/weights/layers.9.feed_forward.w13.0.qweight b/triton_models/weights/layers.9.feed_forward.w13.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..876a415e337d7d6f88f69cb49c58b6372933354c
--- /dev/null
+++ b/triton_models/weights/layers.9.feed_forward.w13.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be569dbb173e122b8c91ef9100b0f47b0c7d8a0b1f4506decf45563ee7182841
+size 70778880
diff --git a/triton_models/weights/layers.9.feed_forward.w13.0.scales_zeros b/triton_models/weights/layers.9.feed_forward.w13.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..7e8b42fd05407102926bb2f8ac6599ebf9c139c6
--- /dev/null
+++ b/triton_models/weights/layers.9.feed_forward.w13.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb39924f46337aa29e1f80f171f45b887ca479e504b0c844fafeaee872380f92
+size 4423680
diff --git a/triton_models/weights/layers.9.feed_forward.w2.0.qweight b/triton_models/weights/layers.9.feed_forward.w2.0.qweight
new file mode 100644
index 0000000000000000000000000000000000000000..ec110a2ac8cb06fcc7e4a585a1006f553dba8f3a
--- /dev/null
+++ b/triton_models/weights/layers.9.feed_forward.w2.0.qweight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a2fbaa03ffb7a71db6bfb38f8405a58a5162c6aa68351c63a61195e8ba9eb7f
+size 35389440
diff --git a/triton_models/weights/layers.9.feed_forward.w2.0.scales_zeros b/triton_models/weights/layers.9.feed_forward.w2.0.scales_zeros
new file mode 100644
index 0000000000000000000000000000000000000000..6bdd95e94811395e7761f730265875f27e83e412
--- /dev/null
+++ b/triton_models/weights/layers.9.feed_forward.w2.0.scales_zeros
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d538ba6b25429da8969eabf626df93a8370f40170c0aacda2f27f503996e235
+size 2211840
diff --git a/triton_models/weights/layers.9.ffn_norm.weight b/triton_models/weights/layers.9.ffn_norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..96e00b1796ad24faef38a20edbab7772220e7d6d
--- /dev/null
+++ b/triton_models/weights/layers.9.ffn_norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:37c5fe95bf78adae2f34ce908a94cba088b2c67a9c9adff11821a89be9e06cd9
+size 10240
diff --git a/triton_models/weights/norm.weight b/triton_models/weights/norm.weight
new file mode 100644
index 0000000000000000000000000000000000000000..fecd32beecabd3afdc39f8e1ba936e349382a166
--- /dev/null
+++ b/triton_models/weights/norm.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f26a6fffb58439a64b85d68e37888eed631835ce9c55caacfac7dda99651f3ea
+size 10240
diff --git a/triton_models/weights/output.weight b/triton_models/weights/output.weight
new file mode 100644
index 0000000000000000000000000000000000000000..a47ae71b91024fdafeb5e4f94f96d90aa4a45f22
--- /dev/null
+++ b/triton_models/weights/output.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d65ca122450dfd05630bfa18b96a2f8301cda4d0a7b59ff1d8ecef6507449365
+size 1056440320
diff --git a/triton_models/weights/tok_embeddings.weight b/triton_models/weights/tok_embeddings.weight
new file mode 100644
index 0000000000000000000000000000000000000000..a7100a5917191f72b9bcc191706fb1767ec78d8e
--- /dev/null
+++ b/triton_models/weights/tok_embeddings.weight
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c83f7c010c3004df006fbaa1a7b84c26cc0a19180de8dac0762134419483e54c
+size 1056440320