infly
/

OpenCoder-8B-Instruct

+{
+    "bomFormat": "CycloneDX",
+    "specVersion": "1.6",
+    "serialNumber": "urn:uuid:674452ba-d230-4d3b-90b5-74ccd0c20c63",
+    "version": 1,
+    "metadata": {
+        "timestamp": "2025-06-05T09:40:49.133901+00:00",
+        "component": {
+            "type": "machine-learning-model",
+            "bom-ref": "infly/OpenCoder-8B-Instruct-d195bd27-df6f-5de8-8ab3-9f475c6cc49c",
+            "name": "infly/OpenCoder-8B-Instruct",
+            "externalReferences": [
+                {
+                    "url": "https://huggingface.co/infly/OpenCoder-8B-Instruct",
+                    "type": "documentation"
+                }
+            ],
+            "modelCard": {
+                "modelParameters": {
+                    "task": "text-generation",
+                    "architectureFamily": "llama",
+                    "modelArchitecture": "LlamaForCausalLM",
+                    "datasets": [
+                        {
+                            "ref": "OpenCoder-LLM/opencoder-sft-stage1-372d7af3-6153-5091-b5cd-39c612c74e03"
+                        },
+                        {
+                            "ref": "OpenCoder-LLM/opencoder-sft-stage2-ea53b7ac-b98d-5eef-a195-834ce3ec5a49"
+                        }
+                    ]
+                },
+                "properties": [
+                    {
+                        "name": "library_name",
+                        "value": "transformers"
+                    },
+                    {
+                        "name": "base_model",
+                        "value": "infly/OpenCoder-8B-Base"
+                    }
+                ]
+            },
+            "authors": [
+                {
+                    "name": "infly"
+                }
+            ],
+            "licenses": [
+                {
+                    "license": {
+                        "name": "inf",
+                        "url": "https://huggingface.co/infly/OpenCoder-8B-Instruct/blob/main/LICENSE"
+                    }
+                }
+            ],
+            "tags": [
+                "transformers",
+                "safetensors",
+                "llama",
+                "text-generation",
+                "conversational",
+                "en",
+                "zh",
+                "dataset:OpenCoder-LLM/opencoder-sft-stage1",
+                "dataset:OpenCoder-LLM/opencoder-sft-stage2",
+                "arxiv:2411.04905",
+                "base_model:infly/OpenCoder-8B-Base",
+                "base_model:finetune:infly/OpenCoder-8B-Base",
+                "license:other",
+                "autotrain_compatible",
+                "text-generation-inference",
+                "endpoints_compatible",
+                "region:us"
+            ]
+        }
+    },
+    "components": [
+        {
+            "type": "data",
+            "bom-ref": "OpenCoder-LLM/opencoder-sft-stage1-372d7af3-6153-5091-b5cd-39c612c74e03",
+            "name": "OpenCoder-LLM/opencoder-sft-stage1",
+            "data": [
+                {
+                    "type": "dataset",
+                    "bom-ref": "OpenCoder-LLM/opencoder-sft-stage1-372d7af3-6153-5091-b5cd-39c612c74e03",
+                    "name": "OpenCoder-LLM/opencoder-sft-stage1",
+                    "contents": {
+                        "url": "https://huggingface.co/datasets/OpenCoder-LLM/opencoder-sft-stage1",
+                        "properties": [
+                            {
+                                "name": "configs",
+                                "value": "Name of the dataset subset: filtered_infinity_instruct {\"split\": \"train\", \"path\": \"data/filtered_infinity_instruct-*\"}"
+                            },
+                            {
+                                "name": "configs",
+                                "value": "Name of the dataset subset: largescale_diverse_instruct {\"split\": \"train\", \"path\": \"data/largescale_diverse_instruct-*\"}"
+                            },
+                            {
+                                "name": "configs",
+                                "value": "Name of the dataset subset: realuser_instruct {\"split\": \"train\", \"path\": \"data/realuser_instruct-*\"}"
+                            },
+                            {
+                                "name": "license",
+                                "value": "mit"
+                            }
+                        ]
+                    },
+                    "governance": {
+                        "owners": [
+                            {
+                                "organization": {
+                                    "name": "OpenCoder-LLM",
+                                    "url": "https://huggingface.co/OpenCoder-LLM"
+                                }
+                            }
+                        ]
+                    },
+                    "description": "\n\n\t\n\t\t\n\t\tOpenCoder Dataset\n\t\n\nThe OpenCoder dataset is composed of the following datasets:\n\nopc-sft-stage1: the sft data used for opencoder sft-stage1 <-- you are here\nopc-sft-stage2: the sft data used for opencoder sft-stage2\nopc-annealing-corpus: the synthetic data & algorithmic corpus used for opencoder annealing\nopc-fineweb-code-corpus: the code-related page recalled from fineweb\nopc-fineweb-math-corpus: the math-related page recalled from finewebrefineCode-code-corpus-meta: the meta-data\u2026 See the full description on the dataset page: https://huggingface.co/datasets/OpenCoder-LLM/opc-sft-stage1."
+                }
+            ]
+        },
+        {
+            "type": "data",
+            "bom-ref": "OpenCoder-LLM/opencoder-sft-stage2-ea53b7ac-b98d-5eef-a195-834ce3ec5a49",
+            "name": "OpenCoder-LLM/opencoder-sft-stage2",
+            "data": [
+                {
+                    "type": "dataset",
+                    "bom-ref": "OpenCoder-LLM/opencoder-sft-stage2-ea53b7ac-b98d-5eef-a195-834ce3ec5a49",
+                    "name": "OpenCoder-LLM/opencoder-sft-stage2",
+                    "contents": {
+                        "url": "https://huggingface.co/datasets/OpenCoder-LLM/opencoder-sft-stage2",
+                        "properties": [
+                            {
+                                "name": "configs",
+                                "value": "Name of the dataset subset: educational_instruct {\"split\": \"train\", \"path\": \"educational_instruct/train-*\"}"
+                            },
+                            {
+                                "name": "configs",
+                                "value": "Name of the dataset subset: evol_instruct {\"split\": \"train\", \"path\": \"evol_instruct/train-*\"}"
+                            },
+                            {
+                                "name": "configs",
+                                "value": "Name of the dataset subset: mceval_instruct {\"split\": \"train\", \"path\": \"mceval_instruct/train-*\"}"
+                            },
+                            {
+                                "name": "configs",
+                                "value": "Name of the dataset subset: package_instruct {\"split\": \"train\", \"path\": \"package_instruct/train-*\"}"
+                            },
+                            {
+                                "name": "license",
+                                "value": "mit"
+                            }
+                        ]
+                    },
+                    "governance": {
+                        "owners": [
+                            {
+                                "organization": {
+                                    "name": "OpenCoder-LLM",
+                                    "url": "https://huggingface.co/OpenCoder-LLM"
+                                }
+                            }
+                        ]
+                    },
+                    "description": "\n\n\t\n\t\t\n\t\tOpenCoder Dataset\n\t\n\nThe OpenCoder dataset is composed of the following datasets:\n\nopc-sft-stage1: the sft data used for opencoder sft-stage1\nopc-sft-stage2: the sft data used for opencoder sft-stage2 <-- you are here\nopc-annealing-corpus: the synthetic data & algorithmic corpus used for opencoder annealing\nopc-fineweb-code-corpus: the code-related page recalled from fineweb\nopc-fineweb-math-corpus: the math-related page recalled from finewebrefineCode-code-corpus-meta: the meta-data\u2026 See the full description on the dataset page: https://huggingface.co/datasets/OpenCoder-LLM/opc-sft-stage2."
+                }
+            ]
+        }
+    ]
+}