aws-neuron
/

optimum-neuron-cache

dacorvo HF staff commited on Dec 24, 2024

Commit

687da09

verified ·

1 Parent(s): a416547

Add configuration for granite models

Files changed (1) hide show

inference-cache-config/granite.json ADDED Viewed

+{
+   "ibm-granite/granite-3.1-2b-instruct": [
+    {
+      "batch_size": 1,
+      "sequence_length": 4096,
+      "num_cores": 2,
+      "auto_cast_type": "bf16"
+    },
+    {
+      "batch_size": 4,
+      "sequence_length": 4096,
+      "num_cores": 2,
+      "auto_cast_type": "bf16"
+    },
+    {
+      "batch_size": 8,
+      "sequence_length": 4096,
+      "num_cores": 2,
+      "auto_cast_type": "bf16"
+    },
+    {
+      "batch_size": 1,
+      "sequence_length": 4096,
+      "num_cores": 8,
+      "auto_cast_type": "bf16"
+    },
+    {
+      "batch_size": 32,
+      "sequence_length": 4096,
+      "num_cores": 8,
+      "auto_cast_type": "bf16"
+    }
+  ],
+  "ibm-granite/granite-3.1-8b-instruct": [
+   {
+      "batch_size": 1,
+      "sequence_length": 4096,
+      "num_cores": 2,
+      "auto_cast_type": "bf16"
+    },
+    {
+      "batch_size": 4,
+      "sequence_length": 4096,
+      "num_cores": 2,
+      "auto_cast_type": "bf16"
+    },
+    {
+      "batch_size": 8,
+      "sequence_length": 4096,
+      "num_cores": 2,
+      "auto_cast_type": "bf16"
+    },
+    {
+      "batch_size": 1,
+      "sequence_length": 4096,
+      "num_cores": 8,
+      "auto_cast_type": "bf16"
+    },
+    {
+      "batch_size": 32,
+      "sequence_length": 4096,
+      "num_cores": 8,
+      "auto_cast_type": "bf16"
+    }
+  ]
+}