aws-neuron
/

optimum-neuron-cache

dacorvo HF Staff commited on Jan 29

Commit

509e6bf

verified ·

1 Parent(s): 60d1f86

Add DeepSeek distilled versions of LLama 8B

Files changed (1) hide show

inference-cache-config/llama-variants.json CHANGED Viewed

@@ -1,4 +1,48 @@
 {
   "princeton-nlp/Sheared-LLaMA-1.3B": [
     {
       "batch_size": 1,

 {
+   "deepseek-ai/DeepSeek-R1-Distill-Llama-8B": [
+    {
+      "batch_size": 1,
+      "sequence_length": 4096,
+      "num_cores": 2,
+      "auto_cast_type": "bf16"
+    },
+    {
+      "batch_size": 4,
+      "sequence_length": 4096,
+      "num_cores": 2,
+      "auto_cast_type": "bf16"
+    },
+    {
+      "batch_size": 8,
+      "sequence_length": 4096,
+      "num_cores": 2,
+      "auto_cast_type": "bf16"
+    },
+    {
+      "batch_size": 4,
+      "sequence_length": 4096,
+      "num_cores": 8,
+      "auto_cast_type": "bf16"
+    },
+    {
+      "batch_size": 8,
+      "sequence_length": 4096,
+      "num_cores": 8,
+      "auto_cast_type": "bf16"
+    },
+    {
+      "batch_size": 16,
+      "sequence_length": 4096,
+      "num_cores": 8,
+      "auto_cast_type": "bf16"
+    },
+    {
+      "batch_size": 32,
+      "sequence_length": 4096,
+      "num_cores": 8,
+      "auto_cast_type": "bf16"
+    }
+  ],
   "princeton-nlp/Sheared-LLaMA-1.3B": [
     {
       "batch_size": 1,