Text Generation
Transformers
Safetensors
English
Chinese
llama
conversational
text-generation-inference
Files changed (1) hide show
  1. infly_OpenCoder-8B-Instruct.json +171 -0
infly_OpenCoder-8B-Instruct.json ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bomFormat": "CycloneDX",
3
+ "specVersion": "1.6",
4
+ "serialNumber": "urn:uuid:674452ba-d230-4d3b-90b5-74ccd0c20c63",
5
+ "version": 1,
6
+ "metadata": {
7
+ "timestamp": "2025-06-05T09:40:49.133901+00:00",
8
+ "component": {
9
+ "type": "machine-learning-model",
10
+ "bom-ref": "infly/OpenCoder-8B-Instruct-d195bd27-df6f-5de8-8ab3-9f475c6cc49c",
11
+ "name": "infly/OpenCoder-8B-Instruct",
12
+ "externalReferences": [
13
+ {
14
+ "url": "https://huggingface.co/infly/OpenCoder-8B-Instruct",
15
+ "type": "documentation"
16
+ }
17
+ ],
18
+ "modelCard": {
19
+ "modelParameters": {
20
+ "task": "text-generation",
21
+ "architectureFamily": "llama",
22
+ "modelArchitecture": "LlamaForCausalLM",
23
+ "datasets": [
24
+ {
25
+ "ref": "OpenCoder-LLM/opencoder-sft-stage1-372d7af3-6153-5091-b5cd-39c612c74e03"
26
+ },
27
+ {
28
+ "ref": "OpenCoder-LLM/opencoder-sft-stage2-ea53b7ac-b98d-5eef-a195-834ce3ec5a49"
29
+ }
30
+ ]
31
+ },
32
+ "properties": [
33
+ {
34
+ "name": "library_name",
35
+ "value": "transformers"
36
+ },
37
+ {
38
+ "name": "base_model",
39
+ "value": "infly/OpenCoder-8B-Base"
40
+ }
41
+ ]
42
+ },
43
+ "authors": [
44
+ {
45
+ "name": "infly"
46
+ }
47
+ ],
48
+ "licenses": [
49
+ {
50
+ "license": {
51
+ "name": "inf",
52
+ "url": "https://huggingface.co/infly/OpenCoder-8B-Instruct/blob/main/LICENSE"
53
+ }
54
+ }
55
+ ],
56
+ "tags": [
57
+ "transformers",
58
+ "safetensors",
59
+ "llama",
60
+ "text-generation",
61
+ "conversational",
62
+ "en",
63
+ "zh",
64
+ "dataset:OpenCoder-LLM/opencoder-sft-stage1",
65
+ "dataset:OpenCoder-LLM/opencoder-sft-stage2",
66
+ "arxiv:2411.04905",
67
+ "base_model:infly/OpenCoder-8B-Base",
68
+ "base_model:finetune:infly/OpenCoder-8B-Base",
69
+ "license:other",
70
+ "autotrain_compatible",
71
+ "text-generation-inference",
72
+ "endpoints_compatible",
73
+ "region:us"
74
+ ]
75
+ }
76
+ },
77
+ "components": [
78
+ {
79
+ "type": "data",
80
+ "bom-ref": "OpenCoder-LLM/opencoder-sft-stage1-372d7af3-6153-5091-b5cd-39c612c74e03",
81
+ "name": "OpenCoder-LLM/opencoder-sft-stage1",
82
+ "data": [
83
+ {
84
+ "type": "dataset",
85
+ "bom-ref": "OpenCoder-LLM/opencoder-sft-stage1-372d7af3-6153-5091-b5cd-39c612c74e03",
86
+ "name": "OpenCoder-LLM/opencoder-sft-stage1",
87
+ "contents": {
88
+ "url": "https://huggingface.co/datasets/OpenCoder-LLM/opencoder-sft-stage1",
89
+ "properties": [
90
+ {
91
+ "name": "configs",
92
+ "value": "Name of the dataset subset: filtered_infinity_instruct {\"split\": \"train\", \"path\": \"data/filtered_infinity_instruct-*\"}"
93
+ },
94
+ {
95
+ "name": "configs",
96
+ "value": "Name of the dataset subset: largescale_diverse_instruct {\"split\": \"train\", \"path\": \"data/largescale_diverse_instruct-*\"}"
97
+ },
98
+ {
99
+ "name": "configs",
100
+ "value": "Name of the dataset subset: realuser_instruct {\"split\": \"train\", \"path\": \"data/realuser_instruct-*\"}"
101
+ },
102
+ {
103
+ "name": "license",
104
+ "value": "mit"
105
+ }
106
+ ]
107
+ },
108
+ "governance": {
109
+ "owners": [
110
+ {
111
+ "organization": {
112
+ "name": "OpenCoder-LLM",
113
+ "url": "https://huggingface.co/OpenCoder-LLM"
114
+ }
115
+ }
116
+ ]
117
+ },
118
+ "description": "\n\n\t\n\t\t\n\t\tOpenCoder Dataset\n\t\n\nThe OpenCoder dataset is composed of the following datasets:\n\nopc-sft-stage1: the sft data used for opencoder sft-stage1 <-- you are here\nopc-sft-stage2: the sft data used for opencoder sft-stage2\nopc-annealing-corpus: the synthetic data & algorithmic corpus used for opencoder annealing\nopc-fineweb-code-corpus: the code-related page recalled from fineweb\nopc-fineweb-math-corpus: the math-related page recalled from finewebrefineCode-code-corpus-meta: the meta-data\u2026 See the full description on the dataset page: https://huggingface.co/datasets/OpenCoder-LLM/opc-sft-stage1."
119
+ }
120
+ ]
121
+ },
122
+ {
123
+ "type": "data",
124
+ "bom-ref": "OpenCoder-LLM/opencoder-sft-stage2-ea53b7ac-b98d-5eef-a195-834ce3ec5a49",
125
+ "name": "OpenCoder-LLM/opencoder-sft-stage2",
126
+ "data": [
127
+ {
128
+ "type": "dataset",
129
+ "bom-ref": "OpenCoder-LLM/opencoder-sft-stage2-ea53b7ac-b98d-5eef-a195-834ce3ec5a49",
130
+ "name": "OpenCoder-LLM/opencoder-sft-stage2",
131
+ "contents": {
132
+ "url": "https://huggingface.co/datasets/OpenCoder-LLM/opencoder-sft-stage2",
133
+ "properties": [
134
+ {
135
+ "name": "configs",
136
+ "value": "Name of the dataset subset: educational_instruct {\"split\": \"train\", \"path\": \"educational_instruct/train-*\"}"
137
+ },
138
+ {
139
+ "name": "configs",
140
+ "value": "Name of the dataset subset: evol_instruct {\"split\": \"train\", \"path\": \"evol_instruct/train-*\"}"
141
+ },
142
+ {
143
+ "name": "configs",
144
+ "value": "Name of the dataset subset: mceval_instruct {\"split\": \"train\", \"path\": \"mceval_instruct/train-*\"}"
145
+ },
146
+ {
147
+ "name": "configs",
148
+ "value": "Name of the dataset subset: package_instruct {\"split\": \"train\", \"path\": \"package_instruct/train-*\"}"
149
+ },
150
+ {
151
+ "name": "license",
152
+ "value": "mit"
153
+ }
154
+ ]
155
+ },
156
+ "governance": {
157
+ "owners": [
158
+ {
159
+ "organization": {
160
+ "name": "OpenCoder-LLM",
161
+ "url": "https://huggingface.co/OpenCoder-LLM"
162
+ }
163
+ }
164
+ ]
165
+ },
166
+ "description": "\n\n\t\n\t\t\n\t\tOpenCoder Dataset\n\t\n\nThe OpenCoder dataset is composed of the following datasets:\n\nopc-sft-stage1: the sft data used for opencoder sft-stage1\nopc-sft-stage2: the sft data used for opencoder sft-stage2 <-- you are here\nopc-annealing-corpus: the synthetic data & algorithmic corpus used for opencoder annealing\nopc-fineweb-code-corpus: the code-related page recalled from fineweb\nopc-fineweb-math-corpus: the math-related page recalled from finewebrefineCode-code-corpus-meta: the meta-data\u2026 See the full description on the dataset page: https://huggingface.co/datasets/OpenCoder-LLM/opc-sft-stage2."
167
+ }
168
+ ]
169
+ }
170
+ ]
171
+ }